In [94]:
import pandas as pd
import requests

In [74]:
# Leer el archivo y procesar las coordenadas
df = pd.read_csv('../data/bicimad_stations.csv', sep="\t")
df[['longitud', 'latitud']] = df['geometry.coordinates'].str.strip('[]').str.split(', ', expand=True).astype(float)

# Extraer el nombre de la estación y renombrar columnas
df['name'] = df['name'].str.extract(r'\s-\s(.*)')
df_bicimad_stations = df[['name', 'address', 'latitud', 'longitud', 'dock_bikes']].rename(columns={
    'name': 'BiciMAD station', 
    'address': 'Station location', 
    'latitud': 'station_lat',
    'longitud': 'station_long',
    'dock_bikes': 'Bikes_available' 
})

#df_bicimad_stations

In [75]:
def acquisition_method_csv(file_path):
    df_acquisition_csv = pd.read_csv(file_path, sep="\t")
    return df_acquisition_csv

In [76]:
file_path = '../data/bicimad_stations.csv'

In [77]:
df_acquisition_csv = acquisition_method_csv(file_path)

In [78]:
def clean_df_stations(df_acquisition_csv):
    df_acquisition_csv[['longitud', 'latitud']] = df_acquisition_csv['geometry.coordinates']\
    .str.strip('[]').str.split(', ', expand=True).astype(float)
    df_acquisition_csv['name'] = df_acquisition_csv['name'].str.extract(r'\s-\s(.*)')
    df_bicimad_stations = df_acquisition_csv[['name', 'address', 'latitud', 'longitud', 'dock_bikes']].rename(columns={
        'name': 'BiciMAD station', 
        'address': 'Station location', 
        'latitud': 'station_lat',
        'longitud': 'station_long',
        'dock_bikes': 'Bikes_available' 
    })
    return df_bicimad_stations

In [79]:
df_bicimad_stations = clean_df_stations(df_acquisition_csv)

In [80]:
# Acceder a la API
url = 'https://datos.madrid.es/egob/catalogo/300614-0-centros-educativos.json'
response = requests.get(url)

# Convertir la respuesta a JSON
json_data = response.json()

# Crear DataFrame con la información de interés
df = pd.DataFrame(json_data['@graph'])

# Extraer la dirección, latitud y longitud
df['street_address'] = df['address'].apply(lambda x: x.get('street-address') if pd.notnull(x) else None).str.title()
df['latitude'] = df['location'].apply(lambda x: x['latitude'] if isinstance(x, dict) else None)
df['longitude'] = df['location'].apply(lambda x: x['longitude'] if isinstance(x, dict) else None)

# Crear el DataFrame final con las columnas relevantes
df_places = df[['title', 'street_address', 'latitude', 'longitude']].rename(columns={
    'title': 'Place of interest', 'street_address': 'Place address', 'latitude': 'place_lat', 'longitude': 'place_long'})

# Eliminar filas con valores nulos y restablecer el índice
df_places = df_places.dropna().reset_index(drop=True)


#df_places

In [81]:
def acquisition_method_api(url):
    response = requests.get(url)
    json_data = response.json()
    df_acquisition_api = pd.DataFrame(json_data['@graph'])
    return df_acquisition_api

In [82]:
url = 'https://datos.madrid.es/egob/catalogo/300614-0-centros-educativos.json'

In [83]:
df_acquisition_api = acquisition_method_api(url)

In [84]:
def clean_df_places(df_acquisition_api):
    df_acquisition_api['street_address'] =  df_acquisition_api['address']\
    .apply(lambda x: x.get('street-address') if pd.notnull(x) else None).str.title()
    
    df_acquisition_api['latitude'] =  df_acquisition_api['location']\
    .apply(lambda x: x['latitude'] if isinstance(x, dict) else None)
    df_acquisition_api['longitude'] =  df_acquisition_api['location']\
    .apply(lambda x: x['longitude'] if isinstance(x, dict) else None)
    
    df_places =  df_acquisition_api[['title', 'street_address', 'latitude', 'longitude']].rename(columns={
        'title': 'Place of interest', 
        'street_address': 'Place address', 
        'latitude': 'place_lat', 
        'longitude': 'place_long'
    })
    df_places = df_places.dropna().reset_index(drop=True)
    return df_places

In [85]:
df_places = clean_df_places(df_acquisition_api)

In [58]:
#DF final con valores de prueba
df_combined = pd.merge(stations_prueba, places_prueba, how='cross')

df_combined['distance'] = df_combined.apply(lambda row: ditance_meters_pitagoras(
        row['station_lat'], row['station_long'], row['place_lat'], row['place_long']),axis=1)
nearest_stations = df_combined.loc[df_combined.groupby('Place of interest')['distance'].idxmin()]

final_result = nearest_stations[['Place of interest', 'Place address', 'BiciMAD station', 'Station location', 'Bikes_available']]

In [45]:
# Utilizamos los DataFrames de prueba: places_prueba y stations_prueba
places_prueba = df_bicimad_stations.head(50)  
stations_prueba = df_places.head(50)      

In [89]:
def combine_dataframes_cross (df1, df2):
    df_combined = pd.merge(df1, df2, how = 'cross')
    return df_combined

In [90]:
df_combined = combine_dataframes_cross(df_bicimad_stations, df_places)
df_combined

Unnamed: 0,BiciMAD station,Station location,station_lat,station_long,Bikes_available,Place of interest,Place address,place_lat,place_long
0,Puerta del Sol A,Puerta del Sol nº 1,40.417214,-3.701834,0,ABC Schule (Alemán),Calle Leizaran 27,40.447932,-3.681382
1,Puerta del Sol A,Puerta del Sol nº 1,40.417214,-3.701834,0,Academia Darío Estudio,Calle Gran Via 63,40.422491,-3.709818
2,Puerta del Sol A,Puerta del Sol nº 1,40.417214,-3.701834,0,AFI Escuela de Finanzas,Calle Marques De Villamejor 5,40.431247,-3.687559
3,Puerta del Sol A,Puerta del Sol nº 1,40.417214,-3.701834,0,Agencia Estatal Consejo Superior de Investigac...,Calle Serrano 117,40.440884,-3.687026
4,Puerta del Sol A,Puerta del Sol nº 1,40.417214,-3.701834,0,"Altair, Colegio Internacional",Calle Joaquin Bau 4,40.463788,-3.685848
...,...,...,...,...,...,...,...,...,...
496315,Facultad Derecho,Avenida Complutense nº 23,40.451090,-3.729370,8,Universidad Pontificia Comillas. Facultad de D...,Calle Alberto Aguilera 23,40.429980,-3.711258
496316,Facultad Derecho,Avenida Complutense nº 23,40.451090,-3.729370,8,Universidad Pontificia Comillas. Facultad de T...,Calle Universidad De Comillas 3,40.552126,-3.682802
496317,Facultad Derecho,Avenida Complutense nº 23,40.451090,-3.729370,8,Universidad Pontificia de Comillas. Facultad d...,Calle Alberto Aguilera 23,40.429980,-3.711258
496318,Facultad Derecho,Avenida Complutense nº 23,40.451090,-3.729370,8,Universidad Pontificia de Comillas. Facultad d...,Calle Alberto Aguilera 23,40.429980,-3.711258


In [91]:
def ditance_meters_pitagoras(lat1, lon1, lat2, lon2):
    distance = ((lat2 - lat1)**2 + (lon2 - lon1)**2)**0.5
    return distance

In [92]:
def find_nearest_stations (df_combined):
    df_combined['distance'] = df_combined.apply(lambda row: ditance_meters_pitagoras(
        row['station_lat'], row['station_long'], row['place_lat'], row['place_long']),axis=1)
    nearest_stations = df_combined.loc[df_combined.groupby('Place of interest')['distance'].idxmin()]
    final_result = nearest_stations[['Place of interest', 'Place address', 'BiciMAD station', 'Station location', 'Bikes_available']]
    return final_result

In [93]:
df_final_result = find_nearest_stations(df_combined)
df_final_result

Unnamed: 0,Place of interest,Place address,BiciMAD station,Station location,Bikes_available
280120,ABC Schule (Alemán),Calle Leizaran 27,Doctor Arce 45,Avenida del Doctor Arce nº 45,1
191762,AFI Escuela de Finanzas,Calle Marques De Villamejor 5,Ortega y Gasset,Calle José Ortega y Gasset nº 4,0
221841,Academia Darío Estudio,Calle Gran Via 63,Plaza de España B,Calle Princesa nº 5,0
473763,Agencia Estatal Consejo Superior de Investigac...,Calle Serrano 117,Serrano 113,Calle Serrano nº 113B,8
483164,"Altair, Colegio Internacional",Calle Joaquin Bau 4,López Pozas,Calle General López Pozas nº 2,5
...,...,...,...,...,...
109035,Universidad Pontificia Comillas. Facultad de D...,Calle Alberto Aguilera 23,Santa Cruz del Marcenado,Calle Santa Cruz del Marcenado nº 24,17
413596,Universidad Pontificia Comillas. Facultad de T...,Calle Universidad De Comillas 3,Pedro Rico,Calle Pedro Rico nº 4,0
109037,Universidad Pontificia de Comillas. Facultad d...,Calle Alberto Aguilera 23,Santa Cruz del Marcenado,Calle Santa Cruz del Marcenado nº 24,17
109038,Universidad Pontificia de Comillas. Facultad d...,Calle Alberto Aguilera 23,Santa Cruz del Marcenado,Calle Santa Cruz del Marcenado nº 24,17


In [29]:
#Guardar CSV
final_result.to_csv("../data/"nearest_bicimad_stations.csv", index=False)

In [95]:
def save_to_csv (dataframe):
    dataframe.to_csv('../data/nearest_bicimad_stations.csv')
    return '---your file is located in the data folder---'

In [96]:
save_csv(df_final_result)

'---your file is located in the data folder---'