# FKA get_mapillary_metadata

## This script downloads a single image, extracts and stores key metadata.

## Dataframes of import

1) df_metadata: captures basic info about the image. image id, dimensions, etc.
2) df_detections: normalized detection coordinates, ready to plot

## Storing results in sqlite

I'm not sure what's the smarter thing to store. df_segments with the base64 encoding, or df_detection_coords.
I elected to go for df_detection_coords as the normalization code is lengthy and confusing. The downside is
you have to remember to serialize the resulting list column before storing and then deserialize after loading.

So either way there's a bit of a non-standard process one has to go through when loading from sqlite


In [53]:
%run set_environment.py

Folder 'D://projects_working_directories//imagery_analysis//20250225_los_angeles_3//' already exists.
Folder 'D://projects_working_directories//imagery_analysis//20250225_los_angeles_3////images' already exists.
Folder 'D://projects_working_directories//imagery_analysis//20250225_los_angeles_3////images_metadata' already exists.
Folder 'D://projects_working_directories//imagery_analysis//20250225_los_angeles_3////output' already exists.


In [54]:
#image_ids = ['109450931235292']
#image_id_input_file = f"{base_dir}//LA_roads_clipped_points-20-subset_images.xlsx"
is_restart = True


# Download image and get geometries

In [55]:
import requests
from dotenv import load_dotenv
from os import getenv

from PIL import Image
from io import BytesIO
import pandas as pd
import numpy as np


import sqlite3
import json
import time

import mapillary_utils as mu
import detection_analysis_utils as dau

In [56]:
# image source - db or excel
# load sampled images 
def load_image_metadata():
    conn = sqlite3.connect(f'{image_metadata_folder}//{sqlite3_dbname}.db')
    df_image_metadata = pd.read_sql('select * from sampled_images', conn)
    #df_image_metadata = pd.read_sql('select * from image_metadata', conn)
    #df_image_segmentations = pd.read_sql('select * from image_segmentations', conn)
    conn.close()
    return df_image_metadata

df_image_metadata = load_image_metadata()


In [57]:
df_image_metadata.sample(2)

Unnamed: 0,image_lat,image_lon,residual,image_id,camera_type,is_pano,camera_focal_len,camera_k1,camera_k2,image_path,error,image_url,captured_at,captured_on
190,34.085388,-118.412936,228.932747,3696058567304307,perspective,0,0.46338888335247,-0.1586875276244,0.026124399135413,,,https://scontent.ffsd3-1.fna.fbcdn.net/m1/v/t6...,1710345504000,2024-03-13
125,34.064565,-118.413076,162.935513,1610188043077471,perspective,0,0.4807445264723,-0.13772377073186,0.019659951305129,,,https://scontent.ffsd3-1.fna.fbcdn.net/m1/v/t6...,1710427765000,2024-03-14


In [58]:
## get input ids from sqlite
image_ids = list(set(df_image_metadata.image_id.tolist()))

In [59]:
## alt - load image ids from a file
#df_input_ids = pd.read_excel(image_id_input_file)
#image_ids = list(set(df_input_ids.image_id.tolist()))

In [60]:
df_metadata = pd.DataFrame(columns=['guid', 'image_source', 'image_id', 'captured_at_unix', 'lat', 'lon',
       'original_height', 'original_width', 'height', 'width', 'camera_type',
       'sequence', 'compass_angle', 'computed_compass_angle', 'is_pano',
       'camera_focal_len', 'camera_k1', 'camera_k2', 'altitude',
       'image_path_on_disk'])

df_detections = pd.DataFrame(columns=['image_id', 'detection_id','detection_label','feature_id','image_height','image_width','extent','properties','coordinates'])

In [61]:
## skip this cell

if is_restart == True:
    
    ## load existing data
    df_metadata = pd.read_excel(f"{image_metadata_folder}//sample_image_metadata.xlsx")
    df_detections = pd.read_excel(f"{image_metadata_folder}//sample_image_detections.xlsx")
    
    ## get already completed items
    completed_ids = df_metadata.image_id.astype(str).tolist()

    print(len(image_ids))

    image_ids = list(set(image_ids) - set(completed_ids))
    
    print(len(image_ids))
    print(len(completed_ids))

else:
    ## initialize the necessary dfs
    df_metadata = pd.DataFrame(columns=['guid', 'image_source', 'image_id', 'captured_at_unix', 'lat', 'lon',
       'original_height', 'original_width', 'height', 'width', 'camera_type',
       'sequence', 'compass_angle', 'computed_compass_angle', 'is_pano',
       'camera_focal_len', 'camera_k1', 'camera_k2', 'altitude',
       'image_path_on_disk'])
    df_detections = pd.DataFrame(columns=['image_id', 'detection_id','detection_label','feature_id','image_height','image_width','extent','properties','coordinates'])

769
62
714


In [62]:
load_dotenv()
API_KEY = getenv("MAPILLARY_CLIENT_TOKEN")

## vars established by set_environment.py
# base_dir - root directory for working files
# image_folder - fully qualified folder where images are stored
# image_metadata_folder - fully qualified folder where image_metadata is stored
# sqlite3_dbname - fully qualified database file name
# output_folder - default location to write output files



In [63]:
def get_and_process_detections(image_id, API_KEY, image_size_indicator='thumb_original_url', image_dir=image_folder, step_down=.01):
    # get the image and metadata
    image, metadata = mu.get_mapillary_image(image_id, API_KEY, image_size_indicator='thumb_original_url', image_dir=image_folder)

    try:
        if metadata.get('error') == 'error':
            df_metadata = pd.DataFrame.from_dict([{'image_id':image_id}])
            df_detections = pd.DataFrame.from_dict([{'image_id':image_id}])
            return df_metadata, df_detections
    except: # if metadata is NoneType
        df_metadata = pd.DataFrame.from_dict([{'image_id':image_id}])
        df_detections = pd.DataFrame.from_dict([{'image_id':image_id}])
        return df_metadata, df_detections
        
    df_metadata = pd.DataFrame.from_dict([metadata])
    
    # get the detections and extract them
    detections = mu.get_mapillary_detections(image_id, API_KEY)
    df_segments = mu.extract_detections(detections)
    
    #merge w/ metadata so can accesss height/width
    df_segments = pd.merge(df_segments,df_metadata, left_on='image_id', right_on='image_id')
    
    # decode detections
    arrays = df_segments.apply(lambda x: mu.decode_base64_geometry_fromdf(x, normalize=True, image_height=x.height, image_width=x.width), axis=1)
    
    df_detections = pd.DataFrame(columns=['image_id', 'detection_id','detection_label','feature_id','image_height','image_width','extent','properties','coordinates'])
    
    #must iterate through like this because for any given detection there can be multiple arrays
    for array in arrays:
        for row in array:
            try:
                df_detections.loc[len(df_detections)] = [row[0],row[1],row[2],row[3],row[4],row[5],row[6],row[7],row[8]]
            except:
                print(f"could not add to df_detections: {image_id}")
    
    
    ## get relative pixel count per detection
    df_detections['relative_pixel_count'] = df_detections.coordinates.apply(dau.detect_relative_pixel_count, step_down=step_down)
    
    # Group by 'image_id' and calculate the sum of 'Value' for each category
    #df_detections['relative_image_pixel_count'] = df_detections.groupby('image_id')['relative_pixel_count'].transform('sum')
    #changing calculation to take the whole image instead of just detections
    df_detections['relative_image_pixel_count'] = df_detections.apply(lambda x: (x.image_height * step_down) * (x.image_width * step_down), axis=1)
    df_detections['percent_of_image'] = df_detections.apply(
        lambda x: (x.relative_pixel_count / x.relative_image_pixel_count)*100 if x.relative_image_pixel_count != 0 else 0
    , axis=1)
    
    return df_metadata, df_detections

i = 0
for image_id in image_ids:
    i +=1
    if i % 20 == 0:
        print(i)
    temp_meta, temp_detect = get_and_process_detections(image_id, API_KEY, image_size_indicator='thumb_original_url', image_dir=image_folder)
    df_metadata = pd.concat([df_metadata, temp_meta])
    df_detections = pd.concat([df_detections, temp_detect])

    time.sleep(1)
    
    #df_detections = pd.concat([df_detections, get_and_process_detections(image_id, API_KEY, image_size_indicator='thumb_original_url', image_dir=image_folder)])
    



  df_metadata = pd.concat([df_metadata, temp_meta])
  df_metadata = pd.concat([df_metadata, temp_meta])
  df_metadata = pd.concat([df_metadata, temp_meta])


20


  df_metadata = pd.concat([df_metadata, temp_meta])
  df_metadata = pd.concat([df_metadata, temp_meta])
  df_metadata = pd.concat([df_metadata, temp_meta])
  df_metadata = pd.concat([df_metadata, temp_meta])
  df_metadata = pd.concat([df_metadata, temp_meta])
  df_metadata = pd.concat([df_metadata, temp_meta])
  df_metadata = pd.concat([df_metadata, temp_meta])


40


  df_metadata = pd.concat([df_metadata, temp_meta])
  df_metadata = pd.concat([df_metadata, temp_meta])
  df_metadata = pd.concat([df_metadata, temp_meta])
  df_metadata = pd.concat([df_metadata, temp_meta])
  df_metadata = pd.concat([df_metadata, temp_meta])
  df_metadata = pd.concat([df_metadata, temp_meta])
  df_metadata = pd.concat([df_metadata, temp_meta])
  df_metadata = pd.concat([df_metadata, temp_meta])
  df_metadata = pd.concat([df_metadata, temp_meta])
  df_metadata = pd.concat([df_metadata, temp_meta])


60


  df_metadata = pd.concat([df_metadata, temp_meta])


In [64]:
## backup to excel before dropping columns
df_metadata.to_excel(f"{image_metadata_folder}//sample_image_metadata.xlsx", index=False)
df_metadata.to_csv(f"{image_metadata_folder}//sample_image_metadata.csv", index=False)

df_detections = df_detections.sort_values(by=['image_id','percent_of_image'], ascending=False)
df_detections.to_excel(f"{image_metadata_folder}//sample_image_detections.xlsx", index=False)
df_detections.to_csv(f"{image_metadata_folder}//sample_image_detections.csv", index=False)

In [65]:
df_detections = df_detections.drop(columns=['properties','coordinates'])

In [66]:

db_file = f'{image_metadata_folder}//{sqlite3_dbname}.db'

conn = sqlite3.connect(db_file)

df_detections.to_sql('image_detections', con=conn, if_exists='replace', index=False)



# Close the connection
conn.close()