In [90]:
import pandas as pd
from geopy import Nominatim
from geopy.extra.rate_limiter import RateLimiter


def add_geographic_attributes(df: pd.DataFrame) -> pd.DataFrame:
    geolocator = Nominatim(user_agent="geoapiExercises")
    geocode = RateLimiter(geolocator.reverse, min_delay_seconds=1)

    df['lat_lon'] = df['latitude'].astype('string') + ", " + df['longitude'].astype('string')
    df['location'] = df['lat_lon'].apply(geocode)
    df['neighbourhood'] = df['location'].transform(lambda location: location.raw.get('address').get('neighbourhood'))
    df['suburb'] = df['location'].transform(lambda location: location.raw.get('address').get('suburb'))
    df['city'] = df['location'].transform(lambda location: location.raw.get('address').get('city'))
    df['postcode'] = df['location'].transform(lambda location: location.raw.get('address').get('postcode'))
    df.drop(['lat_lon', 'location'], axis='columns', inplace=True)

    return df

In [1]:
catalog.list()
df = catalog.load('enriched_df')
df

Unnamed: 0,trip,step,time_x,latitude_x,longitude_x,elevation_x,time_y,latitude_y,longitude_y,elevation_y,neighbourhood,suburb,city,postcode
0,data/01_raw/recovery.01-Mar-2022-1533.gpx,0,2022-03-01 20:33:48+00:00,6.297476,-75.578192,1668.879307,2022-03-01 20:33:48+00:00,6.297476,-75.578192,1668.879307,Doce de Octubre No. 2,Comuna 6 - Doce de Octubre,Medellín,50041
1,data/01_raw/recovery.01-Mar-2022-1533.gpx,1,2022-03-01 20:33:49+00:00,6.297476,-75.578146,1668.386143,2022-03-01 20:33:48+00:00,6.297476,-75.578192,1668.879307,Doce de Octubre No. 2,Comuna 6 - Doce de Octubre,Medellín,50041
2,data/01_raw/recovery.01-Mar-2022-1533.gpx,2,2022-03-01 20:33:57+00:00,6.297460,-75.578169,1667.904453,2022-03-01 20:33:57+00:00,6.297460,-75.578169,1667.904453,Doce de Octubre No. 2,Comuna 6 - Doce de Octubre,Medellín,50041
3,data/01_raw/recovery.05-Mar-2022.1025.gpx,0,2022-03-04 00:04:34+00:00,6.180893,-75.577296,1589.639492,2022-03-04 00:04:34+00:00,6.180893,-75.577296,1589.639492,Las Flores,Comuna 6 - Doce de Octubre,Medellín,55422
4,data/01_raw/recovery.05-Mar-2022.1025.gpx,1,2022-03-04 00:04:35+00:00,6.180912,-75.577295,1588.165859,2022-03-04 00:04:34+00:00,6.180893,-75.577296,1589.639492,Las Flores,Comuna 6 - Doce de Octubre,Medellín,55422
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21151,data/01_raw/18-Mar-2023-1213.gpx,50,2023-03-18 17:13:31+00:00,6.274343,-75.591691,1546.499603,2023-03-18 17:11:31+00:00,6.274443,-75.592179,1541.127842,Robledo,Comuna 7 - Robledo,Medellín,50034
21152,data/01_raw/18-Mar-2023-1213.gpx,51,2023-03-18 17:13:33+00:00,6.274324,-75.591693,1545.774729,2023-03-18 17:11:31+00:00,6.274443,-75.592179,1541.127842,Robledo,Comuna 7 - Robledo,Medellín,50034
21153,data/01_raw/18-Mar-2023-1213.gpx,52,2023-03-18 17:13:35+00:00,6.274314,-75.591676,1545.817768,2023-03-18 17:11:31+00:00,6.274443,-75.592179,1541.127842,Robledo,Comuna 7 - Robledo,Medellín,50034
21154,data/01_raw/18-Mar-2023-1213.gpx,53,2023-03-18 17:13:38+00:00,6.274321,-75.591653,1545.318932,2023-03-18 17:11:31+00:00,6.274443,-75.592179,1541.127842,Robledo,Comuna 7 - Robledo,Medellín,50034


In [92]:
first_last_df = pd.concat([df.groupby('trip', as_index=False).first(), df.groupby('trip', as_index=False).last()])

In [93]:
geo_df = add_geographic_attributes(first_last_df)

                                                trip   step  \
0                   data/01_raw/03-Mar-2023-1714.gpx      0   
1                   data/01_raw/18-Mar-2023-1213.gpx      0   
2                data/01_raw/18-Mar-2023-1809Scy.gpx      0   
3  data/01_raw/2023-03-18_Mar_18,_2023_5_54_26_PM...      0   
4          data/01_raw/recovery.01-Mar-2022-1533.gpx      0   
5          data/01_raw/recovery.05-Mar-2022.1025.gpx      0   
6          data/01_raw/recovery.25-May-2022-0907.gpx      0   
0                   data/01_raw/03-Mar-2023-1714.gpx    487   
1                   data/01_raw/18-Mar-2023-1213.gpx     54   
2                data/01_raw/18-Mar-2023-1809Scy.gpx    357   
3  data/01_raw/2023-03-18_Mar_18,_2023_5_54_26_PM...   1000   
4          data/01_raw/recovery.01-Mar-2022-1533.gpx      2   
5          data/01_raw/recovery.05-Mar-2022.1025.gpx  18592   
6          data/01_raw/recovery.25-May-2022-0907.gpx    656   

                               time  latitude  longitu

In [94]:
geo_df

Unnamed: 0,trip,step,time,latitude,longitude,elevation,neighbourhood,suburb,city,postcode
0,data/01_raw/03-Mar-2023-1714.gpx,0,2023-03-03 21:56:07+00:00,6.222098,-75.595565,1513.948643,Las Playas,Comuna 16 - Belén,Medellín,50024
1,data/01_raw/18-Mar-2023-1213.gpx,0,2023-03-18 17:11:31+00:00,6.274443,-75.592179,1541.127842,Robledo,Comuna 7 - Robledo,Medellín,50034
2,data/01_raw/18-Mar-2023-1809Scy.gpx,0,2023-03-18 22:59:20+00:00,6.26511,-75.587935,1496.458668,Cuarta Brigada,Comuna 11 - Laureles-Estadio,Medellín,50034
3,"data/01_raw/2023-03-18_Mar_18,_2023_5_54_26_PM...",0,2023-03-18 22:54:27+00:00,6.275533,-75.592247,1600.0,Robledo,Comuna 7 - Robledo,Medellín,50034
4,data/01_raw/recovery.01-Mar-2022-1533.gpx,0,2022-03-01 20:33:48+00:00,6.297476,-75.578192,1668.879307,Doce de Octubre No. 2,Comuna 6 - Doce de Octubre,Medellín,50041
5,data/01_raw/recovery.05-Mar-2022.1025.gpx,0,2022-03-04 00:04:34+00:00,6.180893,-75.577296,1589.639492,Las Flores,,,55422
6,data/01_raw/recovery.25-May-2022-0907.gpx,0,2022-05-25 21:24:46.433000+00:00,6.208559,-75.57163,1558.78,Astorga,Comuna 14 - El Poblado,Medellín,50021
0,data/01_raw/03-Mar-2023-1714.gpx,487,2023-03-03 22:14:00+00:00,6.211884,-75.59471,1526.831767,La Mota,Comuna 16 - Belén,Medellín,50024
1,data/01_raw/18-Mar-2023-1213.gpx,54,2023-03-18 17:13:40+00:00,6.274318,-75.591673,1545.248981,Robledo,Comuna 7 - Robledo,Medellín,50034
2,data/01_raw/18-Mar-2023-1809Scy.gpx,357,2023-03-18 23:09:22+00:00,6.247514,-75.586948,1481.290026,San Joaquín,Comuna 11 - Laureles-Estadio,Medellín,50031


In [103]:
jeje = df.merge(geo_df, on=['trip', 'step'], how='left').ffill()

In [104]:
jeje[jeje.trip == 'data/01_raw/recovery.05-Mar-2022.1025.gpx']

Unnamed: 0,trip,step,time_x,latitude_x,longitude_x,elevation_x,time_y,latitude_y,longitude_y,elevation_y,neighbourhood,suburb,city,postcode
3,data/01_raw/recovery.05-Mar-2022.1025.gpx,0,2022-03-04 00:04:34+00:00,6.180893,-75.577296,1589.639492,2022-03-04 00:04:34+00:00,6.180893,-75.577296,1589.639492,Las Flores,Comuna 6 - Doce de Octubre,Medellín,055422
4,data/01_raw/recovery.05-Mar-2022.1025.gpx,1,2022-03-04 00:04:35+00:00,6.180912,-75.577295,1588.165859,2022-03-04 00:04:34+00:00,6.180893,-75.577296,1589.639492,Las Flores,Comuna 6 - Doce de Octubre,Medellín,055422
5,data/01_raw/recovery.05-Mar-2022.1025.gpx,2,2022-03-04 00:04:43+00:00,6.180921,-75.577312,1591.435879,2022-03-04 00:04:34+00:00,6.180893,-75.577296,1589.639492,Las Flores,Comuna 6 - Doce de Octubre,Medellín,055422
6,data/01_raw/recovery.05-Mar-2022.1025.gpx,3,2022-03-04 00:04:46+00:00,6.180897,-75.577270,1587.116055,2022-03-04 00:04:34+00:00,6.180893,-75.577296,1589.639492,Las Flores,Comuna 6 - Doce de Octubre,Medellín,055422
7,data/01_raw/recovery.05-Mar-2022.1025.gpx,4,2022-03-04 00:04:48+00:00,6.180874,-75.577276,1586.772305,2022-03-04 00:04:34+00:00,6.180893,-75.577296,1589.639492,Las Flores,Comuna 6 - Doce de Octubre,Medellín,055422
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18591,data/01_raw/recovery.05-Mar-2022.1025.gpx,18588,2022-03-05 15:25:40+00:00,6.255059,-75.590578,1481.257603,2022-03-04 00:04:34+00:00,6.180893,-75.577296,1589.639492,Las Flores,Comuna 6 - Doce de Octubre,Medellín,055422
18592,data/01_raw/recovery.05-Mar-2022.1025.gpx,18589,2022-03-05 15:25:42+00:00,6.255073,-75.590597,1480.845982,2022-03-04 00:04:34+00:00,6.180893,-75.577296,1589.639492,Las Flores,Comuna 6 - Doce de Octubre,Medellín,055422
18593,data/01_raw/recovery.05-Mar-2022.1025.gpx,18590,2022-03-05 15:25:44+00:00,6.255086,-75.590614,1480.847569,2022-03-04 00:04:34+00:00,6.180893,-75.577296,1589.639492,Las Flores,Comuna 6 - Doce de Octubre,Medellín,055422
18594,data/01_raw/recovery.05-Mar-2022.1025.gpx,18591,2022-03-05 15:25:46+00:00,6.255103,-75.590619,1480.186069,2022-03-04 00:04:34+00:00,6.180893,-75.577296,1589.639492,Las Flores,Comuna 6 - Doce de Octubre,Medellín,055422
