Script para extraer metadatos de un dataset de imágenes de señales de tráfico capturadas en la ciudad de Cochabamba.
Author: Alvaro Zambrana Sejas

In [1]:
import os
import pandas as pd
import numpy as np

In [2]:
# Load the dataset

# Path to the dataset
path = '../dataset/processed/yolo_signals_cbba'

train_path = os.path.join(path, 'train', 'labels')
test_path = os.path.join(path, 'test', 'labels')
val_path = os.path.join(path, 'val', 'labels')

# traverse all files in the dataset
train_files = os.listdir(train_path)
test_files = os.listdir(test_path)
val_files = os.listdir(val_path)

In [3]:
print(train_files[:5])

['2024_08_18_17_37_25_601_-0400_1280x1280.left-region.txt', '2024_08_18_17_40_54_602_-0400_1280x1280.left-region.txt', '2024_08_18_17_40_55_571_-0400_1280x1280.left-region.txt', '2024_08_18_17_40_57_532_-0400_1280x1280.left-region.txt', '2024_08_18_17_42_10_543_-0400_1280x1280.right-region.txt']


In [4]:
# read data.yaml file
data_yaml = os.path.join(path, 'data.yaml')
data_yaml

'../dataset/yolo_signals_cbba\\data.yaml'

In [5]:
# parse yaml file
import yaml
with open(data_yaml, 'r') as f:
    data = yaml.load(f, Loader=yaml.FullLoader)
    
class_names = data['names']
class_names

{0: 'zona-escolar',
 1: 'pare',
 2: 'paso-peatonal',
 3: 'ceda-el-paso',
 4: 'limite-velocidad-10',
 5: 'limite-velocidad-20',
 6: 'limite-velocidad-30',
 7: 'limite-velocidad-40',
 8: 'limite-velocidad-35',
 9: 'prohibido-girar-izquierda',
 10: 'semaforo-rojo',
 11: 'prohibido-girar-u',
 12: 'semaforo-inteligente-rojo',
 13: 'prohibido-girar-derecha',
 14: 'pare-horizontal'}

In [6]:
# extract file name without extension
def extract_file_name(file):
    return os.path.splitext(os.path.basename(file))[0]

# Extract labels from the yolo dataset
def extract_labels(file):
    # Read the file
    with open(file, 'r') as f:
        lines = f.readlines()
    # Extract the labels
    labels = []
    for line in lines:
        label = line.split(' ')[0]
        labels.append(class_names[int(label)])
        
    # convert labels to string and remove square brackets
    labels = str(labels)
    labels = labels.replace('[', '')
    labels = labels.replace(']', '')
    labels = labels.replace('\'', '')
    # remove leading and trailing whitespaces
    labels = labels.strip()
    # remove whitespaces between commas
    labels = labels.replace(', ', ',')

    return labels

# Extract the labels from the dataset
train_labels = {}
for file in train_files:
    file = os.path.join(train_path, file)
    labels = extract_labels(file)
    train_labels[extract_file_name(file)] = labels
    
test_labels = {}
for file in test_files:
    file = os.path.join(test_path, file)
    labels = extract_labels(file)
    test_labels[extract_file_name(file)] = labels
    
val_labels = {}
for file in val_files:
    file = os.path.join(val_path, file)
    labels = extract_labels(file)
    val_labels[extract_file_name(file)] = labels

In [7]:
train_labels

{'2024_08_18_17_37_25_601_-0400_1280x1280.left-region': ['zona-escolar'],
 '2024_08_18_17_40_54_602_-0400_1280x1280.left-region': ['zona-escolar',
  'prohibido-girar-izquierda'],
 '2024_08_18_17_40_55_571_-0400_1280x1280.left-region': ['prohibido-girar-izquierda',
  'zona-escolar'],
 '2024_08_18_17_40_57_532_-0400_1280x1280.left-region': ['prohibido-girar-izquierda'],
 '2024_08_18_17_42_10_543_-0400_1280x1280.right-region': ['limite-velocidad-40'],
 '2024_08_18_17_42_27_608_-0400 (20 Km)_1280x1280.left-region': ['limite-velocidad-20'],
 '2024_08_18_17_42_27_608_-0400_1280x1280.left-region': ['limite-velocidad-20'],
 '2024_08_18_19_03_56_772_-0400_1280x1280.left-region': ['zona-escolar'],
 '2024_09_08_17_08_57_158_-0400_1280x1280.left-region': ['zona-escolar',
  'prohibido-girar-izquierda'],
 '2024_09_08_17_08_59_088_-0400_1280x1280.left-region': ['zona-escolar',
  'prohibido-girar-izquierda'],
 '2024_09_08_17_09_00_186_-0400_1280x1280.left-region': ['prohibido-girar-izquierda'],
 '2024

In [9]:
# extract EXIF and GPS metadata from the images
from PIL import Image
from PIL.ExifTags import TAGS, GPSTAGS

def get_exif_data(image):
    img = Image.open(image)
    exif_data = img._getexif()
    if exif_data:
        exif_data = {TAGS.get(tag): value for tag, value in exif_data.items()}
    return exif_data

def get_gps_data(image):
    img = Image.open(image)
    exif_data = img._getexif()
    if exif_data:
        exif_data = {TAGS.get(tag): value for tag, value in exif_data.items()}
        if 'GPSInfo' in exif_data:
            gps_data = {GPSTAGS.get(tag): value for tag, value in exif_data['GPSInfo'].items()}
        else:
            gps_data = None
    return gps_data

# Extract the EXIF and GPS data from the images
train_exif_data = []
train_gps_data = []
for file in train_files:
    file = os.path.join(train_path, file)
    exif_data = get_exif_data(file)
    gps_data = get_gps_data(file)
    train_exif_data.append(exif_data)
    train_gps_data.append(gps_data)
    
test_exif_data = []
test_gps_data = []
for file in test_files:
    file = os.path.join(test_path, file)
    exif_data = get_exif_data(file)
    gps_data = get_gps_data(file)
    test_exif_data.append(exif_data)
    test_gps_data.append(gps_data)
    
val_exif_data = []
val_gps_data = []
for file in val_files:
    file = os.path.join(val_path, file)
    exif_data = get_exif_data(file)
    gps_data = get_gps_data(file)
    val_exif_data.append(exif_data)
    val_gps_data.append(gps_data)
    

In [10]:
train_exif_data

[{'ImageWidth': 4624,
  'ImageLength': 3468,
  'GPSInfo': {1: 'S',
   2: (17.0, 25.0, 44.35176),
   3: 'W',
   4: (66.0, 9.0, 34.43112),
   5: b'\x00',
   6: 2596.5,
   7: (21.0, 37.0, 25.0),
   12: 'K',
   13: 27.8943,
   17: 255.0,
   27: b'fused',
   29: '2024:08:18'},
  'ResolutionUnit': 2,
  'ExifOffset': 237,
  'Make': 'samsung',
  'Model': 'SM-A715F',
  'Software': 'A715FXXS3AUA2',
  'Orientation': 1,
  'DateTime': '2024:08:18 17:37:25',
  'YCbCrPositioning': 1,
  'XResolution': 72.0,
  'YResolution': 72.0,
  'ExifVersion': b'022',
  'ComponentsConfiguration': b'\x01\x02\x03\x00',
  'ShutterSpeedValue': 7.339,
  'DateTimeOriginal': '2024:08:18 17:37:25',
  'DateTimeDigitized': '2024:08:18 17:37:25',
  'ApertureValue': 1.69,
  'BrightnessValue': 0.0,
  'ExposureBiasValue': 0.0,
  'MaxApertureValue': 1.69,
  'MeteringMode': 1,
  'LightSource': 0,
  'Flash': 0,
  'FocalLength': 5.23,
  'ColorSpace': 1,
  'ExifImageWidth': 1280,
  'SceneCaptureType': 0,
  'SubsecTime': '751434',
  '

In [11]:
train_gps_data

[{'GPSLatitudeRef': 'S',
  'GPSLatitude': (17.0, 25.0, 44.35176),
  'GPSLongitudeRef': 'W',
  'GPSLongitude': (66.0, 9.0, 34.43112),
  'GPSAltitudeRef': b'\x00',
  'GPSAltitude': 2596.5,
  'GPSTimeStamp': (21.0, 37.0, 25.0),
  'GPSSpeedRef': 'K',
  'GPSSpeed': 27.8943,
  'GPSImgDirection': 255.0,
  'GPSProcessingMethod': b'fused',
  'GPSDateStamp': '2024:08:18'},
 {'GPSLatitudeRef': 'S',
  'GPSLatitude': (17.0, 25.0, 1.54668),
  'GPSLongitudeRef': 'W',
  'GPSLongitude': (66.0, 9.0, 26.01324),
  'GPSAltitudeRef': b'\x00',
  'GPSAltitude': 2600.3,
  'GPSTimeStamp': (21.0, 40.0, 54.0),
  'GPSSpeedRef': 'K',
  'GPSSpeed': 34.4967,
  'GPSImgDirection': 244.0,
  'GPSProcessingMethod': b'fused',
  'GPSDateStamp': '2024:08:18'},
 {'GPSLatitudeRef': 'S',
  'GPSLatitude': (17.0, 25.0, 1.26228),
  'GPSLongitudeRef': 'W',
  'GPSLongitude': (66.0, 9.0, 25.96896),
  'GPSAltitudeRef': b'\x00',
  'GPSAltitude': 2600.3,
  'GPSTimeStamp': (21.0, 40.0, 55.0),
  'GPSSpeedRef': 'K',
  'GPSSpeed': 27.9167,


In [12]:
# Convert the extracted metadata to a dataframe
train_df = pd.DataFrame({'exif': train_exif_data, 'gps': train_gps_data})
test_df = pd.DataFrame({'exif': test_exif_data, 'gps': test_gps_data})
val_df = pd.DataFrame({'exif': val_exif_data, 'gps': val_gps_data})

In [13]:
train_df.head()

Unnamed: 0,exif,gps
0,"{'ImageWidth': 4624, 'ImageLength': 3468, 'GPS...","{'GPSLatitudeRef': 'S', 'GPSLatitude': (17.0, ..."
1,"{'ImageWidth': 4624, 'ImageLength': 3468, 'GPS...","{'GPSLatitudeRef': 'S', 'GPSLatitude': (17.0, ..."
2,"{'ImageWidth': 4624, 'ImageLength': 3468, 'GPS...","{'GPSLatitudeRef': 'S', 'GPSLatitude': (17.0, ..."
3,"{'ImageWidth': 4624, 'ImageLength': 3468, 'GPS...","{'GPSLatitudeRef': 'S', 'GPSLatitude': (17.0, ..."
4,"{'ImageWidth': 4624, 'ImageLength': 3468, 'GPS...","{'GPSLatitudeRef': 'S', 'GPSLatitude': (17.0, ..."


In [8]:
# extract EXIF metadata from the dataset, images folders

# Path to the dataset
path = '../dataset/processed/yolo_signals_cbba'

train_path = os.path.join(path, 'train', 'images')
test_path = os.path.join(path, 'test', 'images')
val_path = os.path.join(path, 'val', 'images')

# traverse all files in the dataset
train_files = os.listdir(train_path)
test_files = os.listdir(test_path)
val_files = os.listdir(val_path)


In [14]:
# genera un archivo .csv de los metadatos extraídos con las columnas: timestamp, image_name, exif_image_width, 
# exif_image_height, labels, latitude, latitude_ref, longitude, longitude_ref, altitude, altitude_ref, speed
def extract_metadata(df, files, labels):
    metadata = []
    
    for i in range(len(df)):
        exif = df['exif'][i]
        gps = df['gps'][i]
        
        if exif:
            timestamp = exif.get('DateTimeOriginal')
            image_name = files[i]
            exif_image_width = exif.get('ExifImageWidth')
            exif_image_height = exif.get('ExifImageHeight')
            classes = labels[extract_file_name(image_name)]
            
            if gps:
                latitude = gps.get('GPSLatitude')
                latitude_ref = gps.get('GPSLatitudeRef')
                longitude = gps.get('GPSLongitude')
                longitude_ref = gps.get('GPSLongitudeRef')
                altitude = gps.get('GPSAltitude')
                altitude_ref = gps.get('GPSAltitudeRef')
                speed = gps.get('GPSSpeed')
            else:
                latitude = None
                longitude = None
                altitude = None
                speed = None

            metadata.append([timestamp, image_name, exif_image_width, exif_image_height, classes, latitude, 
                             latitude_ref, longitude, longitude_ref, altitude, altitude_ref, speed])
    return metadata

train_metadata = extract_metadata(train_df, train_files, train_labels)
test_metadata = extract_metadata(test_df, test_files, test_labels)
val_metadata = extract_metadata(val_df, val_files, val_labels)

In [15]:
columns = ['timestamp', 'image_name', 'exif_image_width', 'exif_image_height', 'labels', 'latitude', 'latitude_ref', 'longitude', 'longitude_ref', 'altitude', 'altitude_ref', 'speed']
train_metadata_df = pd.DataFrame(train_metadata, columns=columns)
test_metadata_df = pd.DataFrame(test_metadata, columns=columns)
val_metadata_df = pd.DataFrame(val_metadata, columns=columns)

In [16]:
train_metadata_df.head()

Unnamed: 0,timestamp,image_name,exif_image_width,exif_image_height,labels,latitude,latitude_ref,longitude,longitude_ref,altitude,altitude_ref,speed
0,2024:08:18 17:37:25,2024_08_18_17_37_25_601_-0400_1280x1280.left-r...,1280,1280,[zona-escolar],"(17.0, 25.0, 44.35176)",S,"(66.0, 9.0, 34.43112)",W,2596.5,b'\x00',27.8943
1,2024:08:18 17:40:54,2024_08_18_17_40_54_602_-0400_1280x1280.left-r...,1280,1280,"[zona-escolar, prohibido-girar-izquierda]","(17.0, 25.0, 1.54668)",S,"(66.0, 9.0, 26.01324)",W,2600.3,b'\x00',34.4967
2,2024:08:18 17:40:55,2024_08_18_17_40_55_571_-0400_1280x1280.left-r...,1280,1280,"[prohibido-girar-izquierda, zona-escolar]","(17.0, 25.0, 1.26228)",S,"(66.0, 9.0, 25.96896)",W,2600.3,b'\x00',27.9167
3,2024:08:18 17:40:57,2024_08_18_17_40_57_532_-0400_1280x1280.left-r...,1280,1280,[prohibido-girar-izquierda],"(17.0, 25.0, 0.84144)",S,"(66.0, 9.0, 25.88796)",W,2600.3,b'\x00',16.9381
4,2024:08:18 17:42:10,2024_08_18_17_42_10_543_-0400_1280x1280.right-...,1280,1280,[limite-velocidad-40],"(17.0, 24.0, 44.23716)",S,"(66.0, 9.0, 22.89816)",W,2599.3,b'\x00',31.8807


In [17]:
train_metadata_df.dtypes

timestamp            object
image_name           object
exif_image_width      int64
exif_image_height     int64
labels               object
latitude             object
latitude_ref         object
longitude            object
longitude_ref        object
altitude             object
altitude_ref         object
speed                object
dtype: object

In [18]:
def dms_to_decimal(dms):
    """
    Convert a coordinate in DMS (degrees, minutes, seconds) format to decimal degrees.    
    :return: Decimal degrees
    """
    degrees = float(dms[0])
    minutes = float(dms[1])
    seconds = float(dms[2])
    decimal_degrees = degrees + (minutes / 60) + (seconds / 3600)
    return decimal_degrees

# fix the data types
def fix_data_types(df):
    df['timestamp'] = pd.to_datetime(df['timestamp'], format='%Y:%m:%d %H:%M:%S')    
    df['speed'] = df['speed'].astype(float)
    return df

train_metadata_df = fix_data_types(train_metadata_df)
test_metadata_df = fix_data_types(test_metadata_df)
val_metadata_df = fix_data_types(val_metadata_df)

In [19]:
def convert_latitude_and_longitude(df):
    for (i, row) in df.iterrows():
        latitude = row['latitude']
        longitude = row['longitude']
        if latitude:
            latitude = dms_to_decimal(latitude)
            if row['latitude_ref'] == 'S':
                latitude = -latitude
            df.at[i, 'latitude'] = latitude
        if longitude:
            longitude = dms_to_decimal(longitude)
            if row['longitude_ref'] == 'W':
                longitude = -longitude
            df.at[i, 'longitude'] = longitude
    return df
    
train_metadata_df = convert_latitude_and_longitude(train_metadata_df)
test_metadata_df = convert_latitude_and_longitude(test_metadata_df)
val_metadata_df = convert_latitude_and_longitude(val_metadata_df)
# df['altitude'] = df['altitude'].astype(float)

In [20]:
train_metadata_df.dtypes

timestamp            datetime64[ns]
image_name                   object
exif_image_width              int64
exif_image_height             int64
labels                       object
latitude                     object
latitude_ref                 object
longitude                    object
longitude_ref                object
altitude                     object
altitude_ref                 object
speed                       float64
dtype: object

In [21]:
train_metadata_df['speed'].describe()

count    816.000000
mean      23.031026
std       10.628836
min        0.935700
25%       14.764800
50%       22.819400
75%       31.126625
max       66.852300
Name: speed, dtype: float64

In [22]:
# check for missing values
train_metadata_df.isnull().sum()

timestamp            0
image_name           0
exif_image_width     0
exif_image_height    0
labels               0
latitude             0
latitude_ref         0
longitude            0
longitude_ref        0
altitude             0
altitude_ref         0
speed                0
dtype: int64

In [23]:
# merge the 3 datasets, train, test and val
metadata_df = pd.concat([train_metadata_df, test_metadata_df, val_metadata_df])


In [24]:
metadata_df.head()

Unnamed: 0,timestamp,image_name,exif_image_width,exif_image_height,labels,latitude,latitude_ref,longitude,longitude_ref,altitude,altitude_ref,speed
0,2024-08-18 17:37:25,2024_08_18_17_37_25_601_-0400_1280x1280.left-r...,1280,1280,[zona-escolar],-17.428987,S,-66.159564,W,2596.5,b'\x00',27.8943
1,2024-08-18 17:40:54,2024_08_18_17_40_54_602_-0400_1280x1280.left-r...,1280,1280,"[zona-escolar, prohibido-girar-izquierda]",-17.417096,S,-66.157226,W,2600.3,b'\x00',34.4967
2,2024-08-18 17:40:55,2024_08_18_17_40_55_571_-0400_1280x1280.left-r...,1280,1280,"[prohibido-girar-izquierda, zona-escolar]",-17.417017,S,-66.157214,W,2600.3,b'\x00',27.9167
3,2024-08-18 17:40:57,2024_08_18_17_40_57_532_-0400_1280x1280.left-r...,1280,1280,[prohibido-girar-izquierda],-17.4169,S,-66.157191,W,2600.3,b'\x00',16.9381
4,2024-08-18 17:42:10,2024_08_18_17_42_10_543_-0400_1280x1280.right-...,1280,1280,[limite-velocidad-40],-17.412288,S,-66.156361,W,2599.3,b'\x00',31.8807


In [25]:
metadata_df.shape

(1167, 12)

In [26]:
# save the metadata to a csv file
metadata_df.to_csv('../dataset/processed/yolo_signals_cbba/raw_metadata.csv', index=False)

In [27]:
# one hot encode the labels using MultiLabelBinarizer
from sklearn.preprocessing import MultiLabelBinarizer

mlb = MultiLabelBinarizer()
labels = mlb.fit_transform(metadata_df['labels'])
labels_df = pd.DataFrame(labels, columns=mlb.classes_)

In [28]:
labels_df.head()

Unnamed: 0,ceda-el-paso,limite-velocidad-10,limite-velocidad-20,limite-velocidad-30,limite-velocidad-35,limite-velocidad-40,pare,pare-horizontal,paso-peatonal,prohibido-girar-derecha,prohibido-girar-izquierda,prohibido-girar-u,zona-escolar
0,0,0,0,0,0,0,0,0,0,0,0,0,1
1,0,0,0,0,0,0,0,0,0,0,1,0,1
2,0,0,0,0,0,0,0,0,0,0,1,0,1
3,0,0,0,0,0,0,0,0,0,0,1,0,0
4,0,0,0,0,0,1,0,0,0,0,0,0,0


In [29]:
# fix InvalidIndexError: Reindexing only valid with uniquely valued Index objects
metadata_df = metadata_df.reset_index(drop=True)
metadata_df.head()

Unnamed: 0,timestamp,image_name,exif_image_width,exif_image_height,labels,latitude,latitude_ref,longitude,longitude_ref,altitude,altitude_ref,speed
0,2024-08-18 17:37:25,2024_08_18_17_37_25_601_-0400_1280x1280.left-r...,1280,1280,[zona-escolar],-17.428987,S,-66.159564,W,2596.5,b'\x00',27.8943
1,2024-08-18 17:40:54,2024_08_18_17_40_54_602_-0400_1280x1280.left-r...,1280,1280,"[zona-escolar, prohibido-girar-izquierda]",-17.417096,S,-66.157226,W,2600.3,b'\x00',34.4967
2,2024-08-18 17:40:55,2024_08_18_17_40_55_571_-0400_1280x1280.left-r...,1280,1280,"[prohibido-girar-izquierda, zona-escolar]",-17.417017,S,-66.157214,W,2600.3,b'\x00',27.9167
3,2024-08-18 17:40:57,2024_08_18_17_40_57_532_-0400_1280x1280.left-r...,1280,1280,[prohibido-girar-izquierda],-17.4169,S,-66.157191,W,2600.3,b'\x00',16.9381
4,2024-08-18 17:42:10,2024_08_18_17_42_10_543_-0400_1280x1280.right-...,1280,1280,[limite-velocidad-40],-17.412288,S,-66.156361,W,2599.3,b'\x00',31.8807


In [30]:
# merge the labels with the metadata
metadata_df = pd.concat([metadata_df, labels_df], axis=1)
metadata_df.head()

Unnamed: 0,timestamp,image_name,exif_image_width,exif_image_height,labels,latitude,latitude_ref,longitude,longitude_ref,altitude,...,limite-velocidad-30,limite-velocidad-35,limite-velocidad-40,pare,pare-horizontal,paso-peatonal,prohibido-girar-derecha,prohibido-girar-izquierda,prohibido-girar-u,zona-escolar
0,2024-08-18 17:37:25,2024_08_18_17_37_25_601_-0400_1280x1280.left-r...,1280,1280,[zona-escolar],-17.428987,S,-66.159564,W,2596.5,...,0,0,0,0,0,0,0,0,0,1
1,2024-08-18 17:40:54,2024_08_18_17_40_54_602_-0400_1280x1280.left-r...,1280,1280,"[zona-escolar, prohibido-girar-izquierda]",-17.417096,S,-66.157226,W,2600.3,...,0,0,0,0,0,0,0,1,0,1
2,2024-08-18 17:40:55,2024_08_18_17_40_55_571_-0400_1280x1280.left-r...,1280,1280,"[prohibido-girar-izquierda, zona-escolar]",-17.417017,S,-66.157214,W,2600.3,...,0,0,0,0,0,0,0,1,0,1
3,2024-08-18 17:40:57,2024_08_18_17_40_57_532_-0400_1280x1280.left-r...,1280,1280,[prohibido-girar-izquierda],-17.4169,S,-66.157191,W,2600.3,...,0,0,0,0,0,0,0,1,0,0
4,2024-08-18 17:42:10,2024_08_18_17_42_10_543_-0400_1280x1280.right-...,1280,1280,[limite-velocidad-40],-17.412288,S,-66.156361,W,2599.3,...,0,0,1,0,0,0,0,0,0,0


In [31]:
metadata_df.shape

(1167, 25)

In [32]:
metadata_df.dtypes

timestamp                    datetime64[ns]
image_name                           object
exif_image_width                      int64
exif_image_height                     int64
labels                               object
latitude                             object
latitude_ref                         object
longitude                            object
longitude_ref                        object
altitude                             object
altitude_ref                         object
speed                               float64
ceda-el-paso                          int64
limite-velocidad-10                   int64
limite-velocidad-20                   int64
limite-velocidad-30                   int64
limite-velocidad-35                   int64
limite-velocidad-40                   int64
pare                                  int64
pare-horizontal                       int64
paso-peatonal                         int64
prohibido-girar-derecha               int64
prohibido-girar-izquierda       

In [82]:
# consume OpenStreetMap API using lat, lon to get the streets, avenues, city, country
import requests
import json
import time

def get_address(lat, lon):
    # https://operations.osmfoundation.org/policies/nominatim/
    url = f'https://nominatim.openstreetmap.org/reverse?format=json&lat={lat}&lon={lon}'
    print(url)
    response = requests.get(url)
    print(response)
    data = response.json()
    return data

# get the address for the first 5 rows
addresses = []
for i in range(5):
    print(time.time())
    # convert "(1,2,3)" to series of integers    
    lat = metadata_df['latitude'][i]
    lon = metadata_df['longitude'][i]
    # sleep by 1 second    
    address = get_address(lat, lon)
    addresses.append(address)
    time.sleep(60)

1729811336.5524888
https://nominatim.openstreetmap.org/reverse?format=json&lat=-17.4289866&lon=-66.1595642
<Response [403]>


JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [40]:
metadata_df.head()

Unnamed: 0,timestamp,image_name,exif_image_width,exif_image_height,labels,latitude,latitude_ref,longitude,longitude_ref,altitude,...,limite-velocidad-35,limite-velocidad-40,pare,pare-horizontal,paso-peatonal,prohibido-girar-derecha,prohibido-girar-izquierda,prohibido-girar-u,zona-escolar,address
0,2024-08-18 17:37:25,2024_08_18_17_37_25_601_-0400_1280x1280.left-r...,1280,1280,[zona-escolar],-17.428987,S,-66.159564,W,2596.5,...,0,0,0,0,0,0,0,0,1,
1,2024-08-18 17:40:54,2024_08_18_17_40_54_602_-0400_1280x1280.left-r...,1280,1280,"[zona-escolar, prohibido-girar-izquierda]",-17.417096,S,-66.157226,W,2600.3,...,0,0,0,0,0,0,1,0,1,
2,2024-08-18 17:40:55,2024_08_18_17_40_55_571_-0400_1280x1280.left-r...,1280,1280,"[prohibido-girar-izquierda, zona-escolar]",-17.417017,S,-66.157214,W,2600.3,...,0,0,0,0,0,0,1,0,1,
3,2024-08-18 17:40:57,2024_08_18_17_40_57_532_-0400_1280x1280.left-r...,1280,1280,[prohibido-girar-izquierda],-17.4169,S,-66.157191,W,2600.3,...,0,0,0,0,0,0,1,0,0,
4,2024-08-18 17:42:10,2024_08_18_17_42_10_543_-0400_1280x1280.right-...,1280,1280,[limite-velocidad-40],-17.412288,S,-66.156361,W,2599.3,...,0,1,0,0,0,0,0,0,0,


In [43]:
addresses
# get the address for all the rows
addresses_dict = {}
for (i, row) in metadata_df.iterrows():
    lat = row['latitude']
    lon = row['longitude']
    print(lat, lon)
    address = get_address(lat, lon)
    print(address)
    # addresses.append(address)

    # add address to metadata_df
    metadata_df.at[i, 'address'] = address    

metadata_df.head()

-17.4289866 -66.1595642
https://nominatim.openstreetmap.org/reverse?format=json&lat=-17.4289866&lon=-66.1595642


JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [None]:
# dibujar un mapa con las coordenadas de las imágenes
import folium

def draw_map(df):
    m = folium.Map(location=[df['latitude'].mean(), df['longitude'].mean()], zoom_start=12)
    for (i, row) in df.iterrows():
        if row['latitude'] and row['longitude']:
            folium.Marker([row['latitude'], row['longitude']]).add_to(m)
    return m

draw_map(metadata_df)

In [83]:
!pip install geopy

Collecting geopy
  Downloading geopy-2.4.1-py3-none-any.whl.metadata (6.8 kB)
Collecting geographiclib<3,>=1.52 (from geopy)
  Downloading geographiclib-2.0-py3-none-any.whl.metadata (1.4 kB)
Downloading geopy-2.4.1-py3-none-any.whl (125 kB)
Downloading geographiclib-2.0-py3-none-any.whl (40 kB)
Installing collected packages: geographiclib, geopy
Successfully installed geographiclib-2.0 geopy-2.4.1


In [33]:
from geopy.geocoders import Nominatim

def get_address_from_lat_lon(lat, lon):
    geolocator = Nominatim(user_agent="my-app-gps")
    location = geolocator.reverse((lat, lon), language='en')
    return location.address if location else "No address found"

In [34]:
# create a new dataframe with the addresses with   latitud, longitud, address columns
import pandas as pd 

# get the address for all the rows
addresses_df = pd.DataFrame(columns=['latitud', 'longitud', 'address'])
addresses_df.columns = ['latitud', 'longitud', 'address']

for (i, row) in metadata_df.iterrows():
    lat = row['latitude']
    lon = row['longitude']
    print(lat, lon)
    address = get_address_from_lat_lon(lat, lon)
    print(address)
    addresses_df.loc[i] = [lat, lon, address]

# save the addresses to a csv file
addresses_df.to_csv('../dataset/processed/yolo_signals_cbba/addresses.csv', index=False)

-17.4289866 -66.1595642
Avenida Panamericana, Lindo, Cochabamba, Cercado, Cochabamba, Bolivia
-17.4170963 -66.1572259
Avenida Panamericana, Independencia, Cochabamba, Cercado, Cochabamba, Bolivia
-17.4170173 -66.1572136
Avenida Panamericana, Independencia, Cochabamba, Cercado, Cochabamba, Bolivia
-17.416900400000003 -66.1571911
Avenida Panamericana, Independencia, Cochabamba, Cercado, Cochabamba, Bolivia
-17.412288099999998 -66.1563606
Avenida Panamericana, Canata, Cochabamba, Cercado, Cochabamba, Bolivia
-17.411066899999998 -66.1560297
Avenida Independencia, San Carlos, Cercado, Cochabamba, Bolivia
-17.411066899999998 -66.1560297
Avenida Independencia, San Carlos, Cercado, Cochabamba, Bolivia
-17.3858383 -66.1590355
Caxia Tours, 707, Calle La Paz, Barba de Padilla, Cochabamba, Cercado, Cochabamba, Bolivia
-17.4173392 -66.15727360000001
Avenida Panamericana, Independencia, Cochabamba, Cercado, Cochabamba, Bolivia
-17.4171072 -66.1572302
Avenida Panamericana, Independencia, Cochabamba, 