In [None]:
import os
from utils import *
import pandas as pd
import mercantile, requests, os
from vt2geojson.tools import vt_bytes_to_geojson
import time


#### Get Crash data and north most and south most points

In [None]:
crash_site_data_path = os.path.join(os.getenv('PATH_TO_DATA'), 'crash_data_cleaned.csv')
df_crash_site = pd.read_csv(crash_site_data_path)

In [None]:
max_long = df_crash_site['X'].max()
min_long = df_crash_site['X'].min()

max_lat = df_crash_site['Y'].max()
min_lat = df_crash_site['Y'].min()

west, south, east, north = min_long, min_lat, max_long, max_lat

In [None]:
## Helper functions

def process_tile(data):
    all_data = {}
    miss_count = 0
    for i, feature in enumerate(data['features']):
        properties = feature['properties']
        coordinates = feature['geometry']['coordinates']

        data_feature = {
            "sequence_id": properties['sequence_id'],
            "image_id": properties['id'],
            # Note the order (lat, lon)
            "img_taken_coord": f"{coordinates[1]}, {coordinates[0]}",
            "is_pano": properties['is_pano'],
            "compass_angle": properties['compass_angle'],
        }

        if west < coordinates[0] < east and south < coordinates[1] < north:
            all_data[i] = data_feature
        else:
            miss_count += 1

    return all_data, miss_count

def get_tile_data(tile, access_token, tile_layer):
    tile_url = f'https://tiles.mapillary.com/maps/vtp/mly1_public/2/{tile.z}/{tile.x}/{tile.y}?access_token={access_token}'
    response = requests.get(tile_url)
    data = vt_bytes_to_geojson(response.content, tile.x, tile.y, tile.z, layer=tile_layer)
    return data, tile_url

In [None]:
access_token = os.getenv('CLIENT_TOKEN')

if not access_token:
    raise ValueError("CLIENT_TOKEN environment variable not set")

In [None]:
tile_output_folder = os.path.join(os.getenv('PATH_TO_DATA'), 'tiles')
if not os.path.exists(tile_output_folder):
    os.makedirs(tile_output_folder)

In [None]:
## Create tiles for the bounding box, zoom = 14 for Mapillary
tiles = list(mercantile.tiles(
    west = west, 
    south = south, 
    east= east, 
    north = north, 
    zooms = 14
    ))

In [None]:
# Define the path to the logs
path_to_data = os.getenv('PATH_TO_DATA', '')  # Default to empty string if not set
if not path_to_data:
    raise ValueError("Environment variable 'PATH_TO_DATA' is not set.")

logs_path = os.path.join(path_to_data, 'logs')
logs_file = os.path.join(logs_path, 'tile_logs.txt')

# Create the logs directory if it doesn't exist
if not os.path.exists(logs_path):
    os.makedirs(logs_path)

def record_df(df, tile, tile_data_file_path, error=None):
    log_entry = f"{tile}, data_df_shape = {df.shape}, Error = {error}\n"
    with open(logs_file, 'a') as f:
        f.write(log_entry)

    # Save the data to a csv file
    df.to_csv(tile_data_file_path, index=False)
            
    
        

In [None]:
## Go through each tile
## Try downloading and cleaning the data
## If there is an error, record the error and tile information
for tile in tiles:
    tile_data_file_name = f"tile_data_{tile.z}_{tile.x}_{tile.y}.csv"
    tile_data_file_path = os.path.join(tile_output_folder, tile_data_file_name)

    if not os.path.exists(tile_data_file_path):
        df_tile = pd.DataFrame()
        try:
            time.sleep(5)
            data, url = get_tile_data(tile, access_token, "image")
            all_data, miss_count = process_tile(data)
            df_tile = pd.DataFrame(all_data).T
            record_df(df_tile, tile, tile_data_file_path)

        except Exception as e:
            record_df(df_tile, tile, tile_data_file_path, error=e)


