## Process

1) sample images
2) obtain detections (mapillary_metadata)
3) normalize detections (mapillary_metadata)
4) for each detection, calculate number of and percent of image consumed by each type (load_and_process)

In [1]:
import math
import folium
from dotenv import load_dotenv
from os import getenv

import requests
import json

import numpy as np
import pandas as pd
from geopy.geocoders import Nominatim
from geopy.distance import ELLIPSOIDS, distance

from datetime import datetime
import sqlite3
import random

import base64
import mapbox_vector_tile

import sys
sys.path.append("D:\\projects\\geo_image_experiments\\src")
import importlib
import mapillary_utils as mu
importlib.reload(mu)


load_dotenv()
API_KEY = getenv("MAPILLARY_CLIENT_TOKEN")

## Sample Images:
### run iterate_over_locations

In [2]:
print("run iterate_over_locations, then assign the output sqlite file below")
test_dir = "d://projects_working_directories//202408_pano_images"
file = "360_images_2024-09-08.db"

images_dir = f"{test_dir}//images"

db_file = f"{test_dir}//{file}"

# load sampled images 
conn = sqlite3.connect(f"{test_dir}//{file}")
df_sampled_images = pd.read_sql('select * from sampled_images', conn)
df_image_detections = pd.read_sql('select * from image_detections', conn)
conn.close()






run iterate_over_locations, then assign the output sqlite file below


## Obtain detections

In [3]:
#df_sampled_images = df_sampled_images.sample(10).copy()

In [4]:
def scale_down_90prct(row):
    h = row.height
    w = row.width
    coords = row.coordinates

    h = int(h / 100)
    w = int(w / 100)

    coords_small=[]
    for c in coords:
        c0 = int(c[0] / 100)
        c1 = int(c[1] / 100)
        coords_small.append([c0,c1])

    return pd.Series({'h_small':h, 'w_small':w, 'coords_small':coords_small})




In [5]:
#df_sampled_images['detection_status'] = 'tbd'
df_sampled_images['detection_status'] = df_sampled_images['image_id'].apply(lambda x: 'downloaded' if len(df_image_detections[df_image_detections.image_id == x]) >0 else 'tbd')

In [6]:
len(df_sampled_images[df_sampled_images.detection_status == 'tbd'])


12746

In [233]:
df_image_detection_results = pd.DataFrame(columns=['image_id','detection_label','feature_count','detection_prct_of_image'])
iteration=0
start_time = time.time()
for i, row in df_sampled_images.iterrows():
    if row.detection_status != 'tbd': # if we already have a status other than tbd
        continue
        

    if iteration % 100 == 0:
        print(f"iteration {iteration} took {time.time() - start_time}")
        df_image_detection_results.to_excel("c://temp//backup_results.xlsx")
        start_time = time.time()
    iteration += 1+

    detections = mu.get_mapillary_detections(row.image_id, API_KEY) # download the detections
    df_segments = mu.extract_detections(detections) # process the json-formated detections
    df_segments = pd.merge(df_segments,df_sampled_images, left_on='image_id', right_on='image_id') # merge metadata back. Important for height/width atts

    arrays = df_segments.apply(lambda x: mu.decode_base64_geometry_fromdf(x, normalize=True, image_height=x.height, image_width=x.width), axis=1) 
    if len(arrays) == 0:
        df_sampled_images.at[i, 'detection_status'] = 'none_found'
        continue
        
        
    temp_df = [pd.DataFrame(arr, columns=['image_id', 'detection_id','detection_label','feature_id','height','width','extent','properties','coordinates']) for arr in arrays]
    df_detection_coords = pd.concat(temp_df, ignore_index=True)

    #scale down matrices for faster performance
    df_detection_coords[['h_small','w_small','coords_small']] = df_detection_coords.apply(scale_down_90prct, axis=1)
    
    
    df_detection_coords['image_mask_small'] = df_detection_coords.apply(lambda x: mu.create_image_mask(x.h_small, x.w_small, x.coords_small), axis=1)
    
    #get pixel count
    df_detection_coords['pixel_cnt_small'] = df_detection_coords['image_mask_small'].apply(lambda x: x.sum())
    #print(f"process took : {time.time() - start_time}")

    ## perform other calcs
    df_image_detections = df_detection_coords[['image_id','detection_label','h_small','w_small']].drop_duplicates().copy().reset_index()
    df_image_detections['px_denominator'] = df_image_detections.apply(lambda x: int(x.h_small * x.w_small), axis=1)
    df_image_detections['feature_count'] = df_image_detections.apply(lambda x: len(df_detection_coords[(df_detection_coords['image_id'] == x.image_id) &
                                                                                  (df_detection_coords['detection_label'] == x.detection_label)]), axis=1)
    
    df_image_detections['px_numerator'] = df_image_detections.apply(lambda x: df_detection_coords['pixel_cnt_small'][(df_detection_coords['image_id'] == x.image_id) &
                                                                                  (df_detection_coords['detection_label'] == x.detection_label)].sum(), axis=1)
    df_image_detections['detection_prct_of_image'] = df_image_detections.apply(lambda x: int((x.px_numerator / x.px_denominator) * 100), axis=1)

    df_image_detections[['image_id','detection_label','feature_count','detection_prct_of_image']].sort_values(by='detection_prct_of_image', ascending=False)
    if len(df_image_detection_results) == 0:
        df_image_detection_results = df_image_detections.copy()
    else:
        df_image_detection_results = pd.concat([df_image_detection_results,df_image_detections])


    
    

iteration 0 took 0.0560910701751709
iteration 100 took 99.23392748832703
iteration 200 took 146.576247215271
iteration 300 took 147.25661516189575
iteration 400 took 110.6176130771637
iteration 500 took 73.97959613800049
iteration 600 took 121.52457690238953
iteration 700 took 158.94547295570374
iteration 800 took 130.37492680549622
iteration 900 took 127.14806604385376
iteration 1000 took 100.07942652702332
iteration 1100 took 82.87362909317017
iteration 1200 took 89.16278386116028
iteration 1300 took 97.5653784275055
iteration 1400 took 130.91983914375305
iteration 1500 took 98.03778409957886
iteration 1600 took 90.7951889038086
iteration 1700 took 98.26988673210144
iteration 1800 took 96.65073370933533
iteration 1900 took 106.37265157699585
iteration 2000 took 73.55244660377502
iteration 2100 took 95.66613245010376
iteration 2200 took 95.38386082649231
iteration 2300 took 92.82564234733582
iteration 2400 took 79.53934597969055
iteration 2500 took 86.10284304618835
iteration 2600 too

ConnectionError: ('Connection aborted.', ConnectionAbortedError(10053, 'An established connection was aborted by the software in your host machine', None, 10053, None))

In [223]:
len(arrays)

0

In [234]:
df_image_detection_results.groupby('image_id').count()

Unnamed: 0_level_0,index,detection_label,h_small,w_small,px_denominator,feature_count,px_numerator,detection_prct_of_image
image_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1000138537907833,26,26,26,26,26,26,26,26
1000146154569642,26,26,26,26,26,26,26,26
1000255234893447,24,24,24,24,24,24,24,24
1000320658379856,29,29,29,29,29,29,29,29
1000407131205067,24,24,24,24,24,24,24,24
...,...,...,...,...,...,...,...,...
999617694644292,21,21,21,21,21,21,21,21
999626310736555,24,24,24,24,24,24,24,24
999667991668183,20,20,20,20,20,20,20,20
999714971332360,28,28,28,28,28,28,28,28


In [235]:
#df_image_detection_results[['image_id','detection_label','feature_count','detection_prct_of_image']].sort_values(by='detection_prct_of_image', ascending=False)

timestamp = datetime.now().strftime('%Y-%m-%d')

conn = sqlite3.connect(f'{test_dir}//360_images_{timestamp}.db')


df_image_detection_results[['image_id','detection_label','feature_count','detection_prct_of_image']].to_sql('image_detections', con=conn, if_exists='replace', index=False)



# Close the connection
conn.close()

## END

In [139]:
df_sampled_images['image_url'][df_sampled_images.image_id == '576811607122341'].tolist()


['https://scontent.ffsd3-1.fna.fbcdn.net/m1/v/t6/An-J4RAzQZCTRsKeM_kWjwDrgh1Niw_jQ3oB8C4bnfoWQmFaGcr5xHVzEh9wWbar0x3uEK8nMVrHK0oqq881UrG8wo3kxhd3Eaa5ny245Q7WXaZi7xt2MmJH0Q81YrpbzUYWRQpu6cJ3mHAHiqlKbA?ccb=10-5&oh=00_AYBCp-YWeceVVdOUD_2zAmF1amTGrKeLexSISQN27K0DhQ&oe=67051EEF&_nc_sid=201bca']

In [141]:
df_detection_coords[['h_small','w_small','coords_small']] = df_detection_coords.apply(scale_down_90prct, axis=1)

start_time = time.time()
df_detection_coords['image_mask_small'] = df_detection_coords.apply(lambda x: mu.create_image_mask(x.h_small, x.w_small, x.coords_small), axis=1)

#get pixel count
df_detection_coords['pixel_cnt_small'] = df_detection_coords['image_mask_small'].apply(lambda x: x.sum())
print(f"process took : {time.time() - start_time}")

process took : 0.22455668449401855


In [114]:
import time


<module 'time' (built-in)>

In [119]:
df_detection_coords = df_detection_coords.sample(5).copy()

In [120]:
start_time = time.time()
df_detection_coords['image_mask'] = df_detection_coords.apply(lambda x: mu.create_image_mask(x.height, x.width, x.coordinates), axis=1)

#get pixel count
df_detection_coords['pixel_cnt'] = df_detection_coords['image_mask'].apply(lambda x: x.sum())
print(f"process took : {time.time() - start_time}")

process took : 14.711581945419312


In [121]:
df_detection_coords.coordinates.sample()

416    [[161, 1218], [164, 1209], [161, 1209], [161, ...
Name: coordinates, dtype: object

In [130]:
df_detection_coords[['h_small','w_small','coords_small']] = df_detection_coords.apply(scale_down_90prct, axis=1)

start_time = time.time()
df_detection_coords['image_mask_small'] = df_detection_coords.apply(lambda x: mu.create_image_mask(x.h_small, x.w_small, x.coords_small), axis=1)

#get pixel count
df_detection_coords['pixel_cnt_small'] = df_detection_coords['image_mask_small'].apply(lambda x: x.sum())
print(f"process took : {time.time() - start_time}")

process took : 0.13829588890075684


In [145]:
df_detection_coords

Unnamed: 0,image_id,detection_id,detection_label,feature_id,height,width,extent,properties,coordinates,h_small,w_small,coords_small,image_mask_small,pixel_cnt_small
0,576811607122341,576966167106885,construction--flat--road,1,2880,5760,4096,{},"[[4795, 596], [4820, 610], [4836, 613], [4847,...",28,57,"[[47, 5], [48, 6], [48, 6], [48, 6], [48, 6], ...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
1,576811607122341,576966170440218,construction--flat--road,1,2880,5760,4096,{},"[[5114, 933], [5117, 938], [5166, 933], [5177,...",28,57,"[[51, 9], [51, 9], [51, 9], [51, 9], [52, 9], ...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",0
2,576811607122341,576966173773551,construction--flat--road,1,2880,5760,4096,{},"[[1837, 578], [1835, 578], [1835, 581], [1837,...",28,57,"[[18, 5], [18, 5], [18, 5], [18, 5], [18, 5]]","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",0
3,576811607122341,576966177106884,construction--flat--road,1,2880,5760,4096,{},"[[1503, 1166], [1508, 1172], [1525, 1174], [15...",28,57,"[[15, 11], [15, 11], [15, 11], [15, 11], [15, ...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",0
4,576811607122341,576966180440217,construction--flat--road,1,2880,5760,4096,{},"[[4253, 530], [4256, 535], [4294, 541], [4308,...",28,57,"[[42, 5], [42, 5], [42, 5], [43, 5], [43, 5], ...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
675,576811607122341,576968017106700,construction--flat--driveway,1,2880,5760,4096,{},"[[5212, 1016], [5158, 1013], [5169, 999], [517...",28,57,"[[52, 10], [51, 10], [51, 9], [51, 9], [52, 9]...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",2
676,576811607122341,576968020440033,object--sign--advertisement,1,2880,5760,4096,{},"[[4965, 1405], [4968, 1440], [4985, 1454], [50...",28,57,"[[49, 14], [49, 14], [49, 14], [50, 14], [50, ...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",0
677,576811607122341,576968020440033,object--sign--advertisement,2,2880,5760,4096,{},"[[4872, 1552], [4875, 1569], [4875, 1607], [48...",28,57,"[[48, 15], [48, 15], [48, 16], [48, 16], [48, ...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",2
678,576811607122341,576968027106699,object--support--pole,1,2880,5760,4096,{},"[[3069, 1333], [3072, 1333], [3072, 1330], [30...",28,57,"[[30, 13], [30, 13], [30, 13], [30, 13], [30, ...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",0


In [187]:
df_image_detections[['image_id','detection_label','feature_count','detection_prct_of_image']].sort_values(by='detection_prct_of_image', ascending=False)

Unnamed: 0,image_id,detection_label,feature_count,detection_prct_of_image
3,576811607122341,nature--sky,56,21.365915
0,576811607122341,construction--flat--road,45,17.54386
7,576811607122341,void--ego-vehicle,15,14.912281
5,576811607122341,object--wire-group,54,14.097744
1,576811607122341,construction--structure--building,59,10.025063
9,576811607122341,void--unlabeled,267,6.203008
13,576811607122341,object--banner,5,2.192982
8,576811607122341,void--ground,39,2.192982
18,576811607122341,object--support--utility-pole,16,1.691729
4,576811607122341,nature--vegetation,30,1.566416


In [None]:
importlib.reload(mu)

In [105]:
df_detection_coords2 = df_detection_coords.sample(1).copy()

In [106]:
df_detection_coords2['image_mask'] = df_detection_coords2.apply(lambda x: mu.create_image_mask(x.height, x.width, x.coordinates), axis=1)

In [111]:
df_detection_coords2.image_mask.tolist()[0].sum()

289