In [None]:
# Carga de datasets
import pandas as pd
import folium
from folium import plugins
from IPython.display import display, HTML

dfbike = pd.read_csv("C:/Users/Cat/Desktop/MASTER/PREPROCESO/Trabajo Grupal/Bike Sharing Madrid (5)/bicimad_trips.csv", low_memory=False)

# Filtrar address y station
dfbike_filtered = dfbike[ 
    dfbike['station_unlock'].notnull() & (dfbike['station_unlock'] != "") &
    dfbike['station_lock'].notnull() & (dfbike['station_lock'] != "") &
    dfbike['address_unlock'].notnull() & (dfbike['address_unlock'] != "") &
    dfbike['address_lock'].notnull() & (dfbike['address_lock'] != "")
]

# Seleccionar 1000 registros del dataset filtrado
dfbike_sample = dfbike_filtered.sample(n=1000, random_state=42)

def extraer_coordenadas(geolocation):
    if pd.isnull(geolocation):
        return None, None
    try:
        # Extraer la parte de las coordenadas
        start = geolocation.find('[') + 1
        end = geolocation.find(']')
        coords = geolocation[start:end].split(',')
        # Convertir las coordenadas a float
        lon = float(coords[0].strip())
        lat = float(coords[1].strip())
        return lat, lon
    except (ValueError, IndexError):
        # Si ocurre algún error, devolver None
        return None, None


dfbike_sample[['lat_unlock', 'lon_unlock']] = dfbike_sample['geolocation_unlock'].apply(
    lambda x: pd.Series(extraer_coordenadas(x))
)
dfbike_sample[['lat_lock', 'lon_lock']] = dfbike_sample['geolocation_lock'].apply(
    lambda x: pd.Series(extraer_coordenadas(x))
)

In [60]:
# Coordenadas únicas para estaciones
estaciones_unlock = dfbike_sample[['station_unlock', 'unlock_station_name', 'lat_unlock', 'lon_unlock']].rename(
    columns={'station_unlock': 'station_id', 'unlock_station_name': 'station_name', 'lat_unlock': 'lat', 'lon_unlock': 'lon'}
)
estaciones_lock = dfbike_sample[['station_lock', 'lock_station_name', 'lat_lock', 'lon_lock']].rename(
    columns={'station_lock': 'station_id', 'lock_station_name': 'station_name', 'lat_lock': 'lat', 'lon_lock': 'lon'}
)

# Convertir stations a int
dfbike_sample['station_lock'] = dfbike_sample['station_lock'].astype(float).astype(int)
dfbike_sample['station_unlock'] = dfbike_sample['station_unlock'].astype(float).astype(int)


# Combinar y eliminar duplicados
estaciones = pd.concat([estaciones_unlock, estaciones_lock]).drop_duplicates(subset=['station_id']).dropna()

# Calcular conteos
conteo_desbloqueos = dfbike_sample.groupby('station_unlock').size().rename("desbloqueos")
conteo_bloqueos = dfbike_sample.groupby('station_lock').size().rename("bloqueos")

# Unir conteos
estaciones = estaciones.set_index('station_id')
estaciones = estaciones.join(conteo_desbloqueos).join(conteo_bloqueos).fillna(0).reset_index()

In [61]:
# Madrid map
mapa = folium.Map(location=[40.4168, -3.7038], zoom_start=12)

# Añadir marcadores 
for _, row in estaciones.iterrows():
    popup_text = f"""
    <b>Estación:</b> {row['station_name']}<br>
    <b>Desbloqueos:</b> {int(row['desbloqueos'])}<br>
    <b>Bloqueos:</b> {int(row['bloqueos'])}
    """
    folium.CircleMarker(
        location=[row['lat'], row['lon']],
        radius=5,
        color='blue',
        fill=True,
        fill_opacity=0.7,
        popup=folium.Popup(popup_text, max_width=300)
    ).add_to(mapa)

# Mostrar el mapa
display(mapa)