In [1]:
import pandas as pd
import requests
from shapely.geometry import Point
import geopandas as gpd

In [2]:
# Leer el archivo y procesar las coordenadas
df = pd.read_csv('../data/bicimad_stations.csv', sep="\t")
df[['longitud', 'latitud']] = df['geometry.coordinates'].str.strip('[]').str.split(', ', expand=True).astype(float)

# Extraer el nombre de la estación y renombrar columnas
df['name'] = df['name'].str.extract(r'\s-\s(.*)')
df_bicimad_stations = df[['name', 'address', 'latitud', 'longitud', 'dock_bikes']].rename(columns={
    'name': 'BiciMAD station', 
    'address': 'Station location', 
    'latitud': 'station_lat',
    'longitud': 'station_long',
    'dock_bikes': 'Bikes_available' 
})

#df_bicimad_stations

In [3]:
def acquisition_method_csv(file_path):
    df_acquisition_csv = pd.read_csv(file_path, sep="\t")
    return df_acquisition_csv

In [4]:
file_path = '../data/bicimad_stations.csv'

In [5]:
df_acquisition_csv = acquisition_method_csv(file_path)

In [6]:
def clean_df_stations(df_acquisition_csv):
    df_acquisition_csv[['longitud', 'latitud']] = df_acquisition_csv['geometry.coordinates']\
    .str.strip('[]').str.split(', ', expand=True).astype(float)
    df_acquisition_csv['name'] = df_acquisition_csv['name'].str.extract(r'\s-\s(.*)')
    df_bicimad_stations = df_acquisition_csv[['name', 'address', 'latitud', 'longitud', 'dock_bikes']].rename(columns={
        'name': 'BiciMAD station', 
        'address': 'Station location', 
        'latitud': 'station_lat',
        'longitud': 'station_long',
        'dock_bikes': 'Bikes_available' 
    })
    return df_bicimad_stations

In [7]:
df_bicimad_stations = clean_df_stations(df_acquisition_csv)

In [8]:
# Acceder a la API
url = 'https://datos.madrid.es/egob/catalogo/300614-0-centros-educativos.json'
response = requests.get(url)

# Convertir la respuesta a JSON
json_data = response.json()

# Crear DataFrame con la información de interés
df = pd.DataFrame(json_data['@graph'])

# Extraer la dirección, latitud y longitud
df['street_address'] = df['address'].apply(lambda x: x.get('street-address') if pd.notnull(x) else None).str.title()
df['latitude'] = df['location'].apply(lambda x: x['latitude'] if isinstance(x, dict) else None)
df['longitude'] = df['location'].apply(lambda x: x['longitude'] if isinstance(x, dict) else None)

# Crear el DataFrame final con las columnas relevantes
df_places = df[['title', 'street_address', 'latitude', 'longitude']].rename(columns={
    'title': 'Place of interest', 'street_address': 'Place address', 'latitude': 'place_lat', 'longitude': 'place_long'})

# Eliminar filas con valores nulos y restablecer el índice
df_places = df_places.dropna().reset_index(drop=True)


#df_places

In [9]:
def acquisition_method_api(url):
    response = requests.get(url)
    json_data = response.json()
    df_acquisition_api = pd.DataFrame(json_data['@graph'])
    return df_acquisition_api

In [10]:
url = 'https://datos.madrid.es/egob/catalogo/300614-0-centros-educativos.json'

In [11]:
df_acquisition_api = acquisition_method_api(url)

In [12]:
def clean_df_places(df_acquisition_api):
    df_acquisition_api['street_address'] =  df_acquisition_api['address']\
    .apply(lambda x: x.get('street-address') if pd.notnull(x) else None).str.title()
    
    df_acquisition_api['latitude'] =  df_acquisition_api['location']\
    .apply(lambda x: x['latitude'] if isinstance(x, dict) else None)
    df_acquisition_api['longitude'] =  df_acquisition_api['location']\
    .apply(lambda x: x['longitude'] if isinstance(x, dict) else None)
    
    df_places =  df_acquisition_api[['title', 'street_address', 'latitude', 'longitude']].rename(columns={
        'title': 'Place of interest', 
        'street_address': 'Place address', 
        'latitude': 'place_lat', 
        'longitude': 'place_long'
    })
    df_places = df_places.dropna().reset_index(drop=True)
    return df_places

In [25]:
df_places = clean_df_places(df_acquisition_api)

In [26]:
def to_mercator(lat, long):
    # transform latitude/longitude data in degrees to pseudo-mercator coordinates in metres
    c = gpd.GeoSeries([Point(lat, long)], crs=4326)
    c = c.to_crs(3857)
    return c

def distance_meters(lat_start, long_start, lat_finish, long_finish):
    # return the distance in metres between to latitude/longitude pair points in degrees 
    # (e.g.: Start Point -> 40.4400607 / -3.6425358 End Point -> 40.4234825 / -3.6292625)
    start = to_mercator(lat_start, long_start)
    finish = to_mercator(lat_finish, long_finish)
    return start.distance(finish)

In [27]:
# Utilizamos los DataFrames de prueba: places_prueba y stations_prueba
places_prueba = df_bicimad_stations.head(50)  
stations_prueba = df_places.head(50)      

In [28]:
#DF final con valores de prueba
df_combined = pd.merge(stations_prueba, places_prueba, how='cross') 

df_combined['distance'] = df_combined.apply(lambda row: distance_meters(
    row['station_lat'], row['station_long'], row['place_lat'], row['place_long']).item(), axis=1)

nearest_stations = df_combined.loc[df_combined.groupby('Place of interest')['distance'].idxmin()]

final_result = nearest_stations[['Place of interest', 'Place address', 'BiciMAD station', 'Station location', 'Bikes_available']]
#final_result

In [17]:
def combine_dataframes_cross (df1, df2):
    df_combined = pd.merge(df1, df2, how = 'cross')
    return df_combined

In [18]:
df_combined = combine_dataframes_cross(stations_prueba, places_prueba)


In [23]:
def find_nearest_stations (df_combined):
    df_combined['distance'] = df_combined.apply(lambda row: distance_meters(
    row['station_lat'], row['station_long'], row['place_lat'], row['place_long']).item(), axis=1)
    nearest_stations = df_combined.loc[df_combined.groupby('Place of interest')['distance'].idxmin()]
    final_result = nearest_stations[['Place of interest', 'Place address', 'BiciMAD station', 'Station location', 'Bikes_available']]
    return final_result

In [30]:
df_final_result = find_nearest_stations(df_combined)

In [29]:
#Guardar CSV
final_result.to_csv("nearest_bicimad_stations.csv", index=False)

In [37]:
def save_csv (dataframe, file_name):
    dataframe.to_csv(file_name, index=False)
    return f"File successfully saved as {file_name}"

In [38]:
save_csv(final_result, 'nearest_bicimad_stations.csv')

'File successfully saved as nearest_bicimad_stations.csv'