In [None]:
# libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import folium 
#import geopandas as gpd
#from shapely.geometry import Point
#from geopy.distance import distance
import os
import sys
from collections import defaultdict
sys.path.append(os.path.abspath("abstract_flows"))

In [2]:
import arrow
import grid

In [3]:
dir = '/home/user/Desktop/Datos/'
#dir = '/Users/antoniomendez/Desktop/Tesis/Datos/datos_limpios/'

In [4]:
grid_eco = grid.Grid(36, 20, 'ecobici')

In [5]:
mapa_eco = grid_eco.map_around()

In [7]:
estaciones_eco = np.load(dir + 'Adj_eco/matrices_estaciones/est_2024.npy')

In [4]:
def find_location_cell(data_grid, station_array):
    #[lat, lon] = station
    station = [station_array[0], station_array[1]]
    if station[0] < 0:
        station = [station[1], station[0]]

    for i in range(len(data_grid)):
        current = data_grid.iloc[i]
        if station[0] >= np.min([current['lat1'], current['lat2']]) and station[0] <= np.max([current['lat1'], current['lat2']]):
            if station[1] >= np.min([current['lon1'], current['lon2']]) and station[1] <= np.max([current['lon1'], current['lon2']]):
                return data_grid.iloc[i]['i'], data_grid.iloc[i]['j']
    return None

In [5]:
def find_station(station_id, station_matrix):
    for i in range(len(station_matrix)):
        if station_matrix[i,0] == station_id:
            # [lat, lon]
            return station_matrix[i,1], station_matrix[i,2]
    return None

In [6]:
def stations_and_cells(data_grid, stations_ids, stations_matrix):
    stations_cells = dict()
    for id in stations_ids:
        station = find_station(id, stations_matrix)
        cell = find_location_cell(data_grid, station)
        if cell != None:
            stations_cells[id] = cell
    return stations_cells

In [7]:
def count_trips_ecobici(data_user, threshold = 5, complement = False):
    viajes_user = data_user.groupby([data_user[['Ciclo_Estacion_Retiro', 'Ciclo_Estacion_Arribo']].min(axis=1), data_user[['Ciclo_Estacion_Retiro', 'Ciclo_Estacion_Arribo']].max(axis=1)]).size().reset_index(name='counts')
    viajes_user.columns = ['Est_A', 'Est_B', 'counts']
    if not complement:
        viajes_user = viajes_user[viajes_user['counts'] >= threshold]
    else:
        viajes_user = viajes_user[viajes_user['counts'] < threshold]
    if viajes_user.empty:
        return None
    total = viajes_user['counts'].sum()
    viajes_user['prob'] = viajes_user['counts']/total
    viajes_user = viajes_user.sort_values(by = 'prob', ascending = False).reset_index(drop=True)
    return viajes_user

def count_trips_mibici(data_user, threshold = 5, complement = False):
    viajes_user = data_user.groupby([data_user[['Origen_Id', 'Destino_Id']].min(axis=1), data_user[['Origen_Id', 'Destino_Id']].max(axis=1)]).size().reset_index(name='counts')
    viajes_user.columns = ['Est_A', 'Est_B', 'counts']
    if not complement:
        viajes_user = viajes_user[viajes_user['counts'] >= threshold]
    else:
        viajes_user = viajes_user[viajes_user['counts'] < threshold]
    if viajes_user.empty:
        return None
    total = viajes_user['counts'].sum()
    viajes_user['prob'] = viajes_user['counts']/total
    viajes_user = viajes_user.sort_values(by = 'prob', ascending = False).reset_index(drop=True)
    return viajes_user

In [8]:
def abstract_flows(trips_counted, station_cells, station_matrix):
    '''
    trips_counted: dataframe with columns Est_A, Est_B, counts, prob
    station_cells: dictionary with station_id as key and cell as value
    station_matrix: numpy array with station_id, lat, lon
    '''
    cell_flows = defaultdict(lambda: defaultdict(int))

    # Paso 1: Agrupar los flujos por celdas de origen y destino
    for _, row in trips_counted.iterrows():
        cell_A = station_cells[row['Est_A']]
        cell_B = station_cells[row['Est_B']]
        cell_flows[cell_A][cell_B] += row['counts']

    # Paso 2: Calcular los centros de masa para cada celda
    cell_mass_centers = {}
    
    for cell_A in cell_flows:
        total_trips = sum(cell_flows[cell_A].values())
        lat_sum, lon_sum = 0, 0

        # Recorremos todas las estaciones que pertenecen a la celda A
        stations_in_cell_A = [s for s, cell in station_cells.items() if cell == cell_A]
        if stations_in_cell_A:
            for station in stations_in_cell_A:
                lat, lon = find_station(station, station_matrix)
                # Sumamos las coordenadas ponderadas por los viajes
                lat_sum += lat * trips_counted[trips_counted['Est_A'] == station]['counts'].sum()
                lon_sum += lon * trips_counted[trips_counted['Est_A'] == station]['counts'].sum()

            # Calculamos el centro de masa
            if total_trips > 0:
                center_lat = lat_sum / total_trips
                center_lon = lon_sum / total_trips
                cell_mass_centers[cell_A] = (center_lat, center_lon)
            else:
                # Si no hay viajes, asignamos un valor predeterminado
                cell_mass_centers[cell_A] = (None, None)
        else:
            # Si no hay estaciones en la celda, asignamos un valor predeterminado
            cell_mass_centers[cell_A] = (None, None)

    # Paso 3: Construir el grafo abstraído
    abstracted_flows = {}
    for cell_A in cell_flows:
        abstracted_flows[cell_A] = {}
        for cell_B in cell_flows[cell_A]:
            # Verificamos si cell_B tiene un centro de masa calculado
            mass_center_B = cell_mass_centers.get(cell_B, (None, None))
            abstracted_flows[cell_A][cell_B] = {
                'flow_count': cell_flows[cell_A][cell_B],
                'mass_center_A': cell_mass_centers.get(cell_A, (None, None)),
                'mass_center_B': mass_center_B
            }
    return abstracted_flows

In [9]:
def plot_abstracted_graph(abstracted_graph, folium_map):
    flow_counts = [
        flow_data['flow_count']
        for cell_A in abstracted_graph
        for cell_B, flow_data in abstracted_graph[cell_A].items()
    ]
    max_flow = max(flow_counts) if flow_counts else 1
    min_flow = min(flow_counts) if flow_counts else 0

    # linspace para grosores
    min_weight = 1
    max_weight = 6
    weights = np.linspace(min_weight, max_weight, num=len(flow_counts)) if flow_counts else [min_weight]

    # Crear un vector de colores asociados a los pesos
    colors = plt.cm.viridis(np.linspace(0, 1, num=len(flow_counts))) if flow_counts else [plt.cm.viridis(0)]

    # asignar pesos a los flujos
    sorted_flows = sorted(flow_counts)
    flow_to_weight = {flow: weight for flow, weight in zip(sorted_flows, weights)}

    # dibujar los flujos
    for cell_A in abstracted_graph:
        for cell_B, flow_data in abstracted_graph[cell_A].items():
            mass_center_A = flow_data['mass_center_A']
            mass_center_B = flow_data['mass_center_B']
            flow_count = flow_data['flow_count']

            # solo si ambos centros de masa son válidos y diferentes
            if mass_center_A != (None, None) and mass_center_B != (None, None) and mass_center_A != mass_center_B:
                lat1, lon1 = mass_center_A
                lat2, lon2 = mass_center_B

                weight = flow_to_weight.get(flow_count, min_weight)

                # para el arco
                arrow.draw_arrow(
                    folium_map,
                    lat1, lon1, lat2, lon2,
                    color=colors[flow_count],
                    weight=weight, 
                    tip=6,  
                    text=f'Flujos: {flow_count}', 
                    radius_fac=1.0  
                )

                # centros de masa
                folium.CircleMarker(
                    location=[lat1, lon1],
                    radius=5,  
                    color='red',  
                    fill=True,
                    fill_color='red',  
                    fill_opacity=1.0,
                    popup=f'Centro de masa: {cell_A}'
                ).add_to(folium_map)

                folium.CircleMarker(
                    location=[lat2, lon2],
                    radius=5,
                    color='red',
                    fill=True,
                    fill_color='red',
                    fill_opacity=1.0,
                    popup=f'Centro de masa: {cell_B}'
                ).add_to(folium_map)

    return folium_map

In [None]:
## ESTA 
def plot_abstracted_graph(abstracted_graph, folium_map, range_weights = [1,8]):
    # Paso 1: Encontrar el valor máximo y mínimo de 'flow_count'
    flow_counts = [
        flow_data['flow_count']
        for cell_A in abstracted_graph
        for cell_B, flow_data in abstracted_graph[cell_A].items()
    ]
    flow_counts = list(set(flow_counts))
    max_flow = max(flow_counts) if flow_counts else 1
    min_flow = min(flow_counts) if flow_counts else 0

    # Paso 2: Crear un linspace para grosores y colores
    min_weight = range_weights[0]
    max_weight = range_weights[1]
    group_size = len(flow_counts)//range_weights[1]
    weights = np.linspace(min_weight, max_weight, num=len(flow_counts)) if flow_counts else [min_weight]
    #weights = []
    #for i in range(range_weights[1]):
    #    weights += [i+1]*group_size

    # Paso 3: Crear un vector de colores basado en el valor normalizado de 'flow_count'
    if flow_counts:
        #normalized_flows = (np.array(flow_counts) - min_flow) / (max_flow - min_flow)  # Normalizar entre 0 y 1
        #colors = plt.cm.viridis(normalized_flows)  # Usar el colormap 'viridis'
        exp = 0.5
        colors = plt.cm.inferno(np.linspace(1e-2, 1, num=range_weights[1])**exp)  # Usar el colormap 'viridis'
    else:
        colors = [plt.cm.viridis(0)]  # Color por defecto si no hay flujos
    
    colors_vector = []
    for i in range(range_weights[1]):
        colors_vector += [colors[i].tolist()]*group_size
    colors = colors_vector
    #print(len(flow_counts), len(weights), len(colors))
    # Paso 4: Asignar pesos y colores a los flujos
    sorted_flows = sorted(flow_counts)
    flow_to_weight = {flow: weight for flow, weight in zip(sorted_flows, weights)}
    #flow_to_color = {flow: color for flow, color in zip(sorted_flows, colors)}
    flow_to_color = {flow: color for flow, color in zip(sorted_flows, colors)}

    # Paso 5: Dibujar los arcos y los centros de masa
    for cell_A in abstracted_graph:
        for cell_B, flow_data in abstracted_graph[cell_A].items():
            mass_center_A = flow_data['mass_center_A']
            mass_center_B = flow_data['mass_center_B']
            flow_count = flow_data['flow_count']

            # Solo dibujar si los centros de masa son válidos y diferentes
            if mass_center_A != (None, None) and mass_center_B != (None, None) and mass_center_A != mass_center_B:
                lat1, lon1 = mass_center_A
                lat2, lon2 = mass_center_B

                # Obtener el peso y el color basado en 'flow_count'
                weight = flow_to_weight.get(flow_count, min_weight)
                color = flow_to_color.get(flow_count, plt.cm.viridis(0))

                # Convertir el color RGBA a un formato hexadecimal para Folium
                color_hex = '#{:02x}{:02x}{:02x}'.format(
                    int(color[0] * 255),  # Componente rojo
                    int(color[1] * 255),  # Componente verde
                    int(color[2] * 255)   # Componente azul
                )

                # Dibujar el arco
                arrow.draw_arrow(
                    folium_map,
                    lat1, lon1, lat2, lon2,
                    color=color_hex,  # Color basado en el peso
                    weight=weight,   # Grosor basado en el peso
                    tip=6,           # Tamaño de la punta de la flecha
                    text=f'Flujos: {flow_count}',  # Texto opcional
                    radius_fac=1.0    # Factor de curvatura
                )

                # Dibujar los centros de masa como marcadores rojos
                folium.CircleMarker(
                    location=[lat1, lon1],
                    radius=5,  
                    color='black',  
                    fill=True,
                    fill_color='black',  
                    fill_opacity=1.0,
                    popup=f'Centro de masa: {cell_A}'
                ).add_to(folium_map)

                folium.CircleMarker(
                    location=[lat2, lon2],
                    radius=5,
                    color='black',
                    fill=True,
                    fill_color='black',
                    fill_opacity=1.0,
                    popup=f'Centro de masa: {cell_B}'
                ).add_to(folium_map)

    return folium_map

In [65]:
import numpy as np
import folium
import matplotlib.pyplot as plt
import base64
import io

def plot_abstracted_graph(abstracted_graph, folium_map, range_weights=[1, 6], title=None):
    # Step 1: Find max and min flow_count
    flow_counts = [
        flow_data['flow_count']
        for cell_A in abstracted_graph
        for cell_B, flow_data in abstracted_graph[cell_A].items()
    ]
    flow_counts = list(set(flow_counts))
    max_flow = max(flow_counts) if flow_counts else 1
    min_flow = min(flow_counts) if flow_counts else 0

    # Step 2: Create linspace for thickness and colors
    min_weight = range_weights[0]
    max_weight = range_weights[1]
    group_size = max(1, len(flow_counts) // range_weights[1])
    weights = np.linspace(min_weight, max_weight, num=len(flow_counts)) if flow_counts else [min_weight]

    exp = 0.5
    #colors = plt.cm.inferno(np.linspace(1e-2, 1, num=range_weights[1])**exp) if flow_counts else [plt.cm.viridis(0)]
    colors = plt.cm.inferno(np.linspace(0, 1, num=len(weights))**exp) if flow_counts else [plt.cm.viridis(0)]

    #colors_vector = []
    #for i in range(range_weights[1]):
    #    colors_vector += [colors[i].tolist()] * group_size
    #colors = colors_vector

    sorted_flows = sorted(flow_counts)
    flow_to_weight = {flow: weight for flow, weight in zip(sorted_flows, weights)}
    flow_to_color = {flow: color for flow, color in zip(sorted_flows, colors)}

    # Step 3: Find the most relevant node (highest sum of flows and most connections)
    node_relevance = {}
    for cell_A in abstracted_graph:
        total_weight = sum(flow_data['flow_count'] for flow_data in abstracted_graph[cell_A].values())
        connections = len(abstracted_graph[cell_A])
        node_relevance[cell_A] = total_weight + connections  # Combined score

    most_relevant_node = max(node_relevance, key=node_relevance.get, default=None)

    # Step 4: Draw edges and mass centers
    for cell_A in abstracted_graph:
        for cell_B, flow_data in abstracted_graph[cell_A].items():
            mass_center_A = flow_data['mass_center_A']
            mass_center_B = flow_data['mass_center_B']
            flow_count = flow_data['flow_count']

            if mass_center_A != (None, None) and mass_center_B != (None, None) and mass_center_A != mass_center_B:
                lat1, lon1 = mass_center_A
                lat2, lon2 = mass_center_B

                weight = flow_to_weight.get(flow_count, min_weight)
                color = flow_to_color.get(flow_count, plt.cm.viridis(0))

                color_hex = '#{:02x}{:02x}{:02x}'.format(
                    int(color[0] * 255), int(color[1] * 255), int(color[2] * 255)
                )

                # Draw the arc
                arrow.draw_arrow(
                    folium_map,
                    lat1, lon1, lat2, lon2,
                    color=color_hex,
                    weight=weight,
                    tip=6,
                    text=f'Flujos: {int(flow_count)}',
                    radius_fac=1.0
                )

                # Determine the color of the node
                node_color = 'red' if cell_A == most_relevant_node else 'black'
                folium.CircleMarker(
                    location=[lat1, lon1],
                    radius=7 if cell_A == most_relevant_node else 5,  
                    color=node_color,
                    fill=True,
                    fill_color=node_color,
                    fill_opacity=1.0,
                    popup=f'Centro de masa: {cell_A}'
                ).add_to(folium_map)

                node_color = 'red' if cell_B == most_relevant_node else 'black'
                folium.CircleMarker(
                    location=[lat2, lon2],
                    radius=7 if cell_B == most_relevant_node else 5,
                    color=node_color,
                    fill=True,
                    fill_color=node_color,
                    fill_opacity=1.0,
                    popup=f'Centro de masa: {cell_B}'
                ).add_to(folium_map)

    # Step 5: Add title if provided
    if title:
        title_html = f"""
        <h3 align="center" style="font-size:16px"><b>{title}</b></h3>
        """
        folium_map.get_root().html.add_child(folium.Element(title_html))

    # Step 6: Add color bar
    fig, ax = plt.subplots(figsize=(4, 1))
    cmap = plt.cm.inferno
    norm = plt.Normalize(vmin=min_flow, vmax=max_flow)
    cb = plt.colorbar(plt.cm.ScalarMappable(norm=norm, cmap=cmap), ax=ax, orientation='horizontal')
    cb.set_label('Flow Count')

    buf = io.BytesIO()
    plt.savefig(buf, format='png', bbox_inches='tight')
    plt.close(fig)
    encoded = base64.b64encode(buf.getvalue()).decode('utf-8')
    colorbar_html = f'<img src="data:image/png;base64,{encoded}" style="position: absolute; bottom: 10px; left: 10px; width: 200px;">'
    folium_map.get_root().html.add_child(folium.Element(colorbar_html))

    return folium_map


In [10]:
data_load = pd.read_csv(dir + 'ecobici/ecobici_2024.csv')
data_load.head()

Unnamed: 0,Genero_Usuario,Edad_Usuario,Bici,Ciclo_Estacion_Retiro,Fecha_Retiro,Hora_Retiro,Ciclo_Estacion_Arribo,Fecha_Arribo,Hora_Arribo,Tiempo_viaje
0,M,48.0,3371879,113,2023-12-31,23:21:52,659,2024-01-01,00:00:02,38.166667
1,F,26.0,7033434,281,2023-12-31,23:46:02,47,2024-01-01,00:00:02,14.0
2,F,37.0,7169857,15,2023-12-31,23:51:57,217,2024-01-01,00:00:49,8.866667
3,F,30.0,6368211,555,2023-12-31,23:53:10,8,2024-01-01,00:02:39,9.483333
4,M,30.0,5136924,555,2023-12-31,23:52:53,8,2024-01-01,00:02:45,9.866667


In [15]:
data_eco = data_load[data_load['Fecha_Retiro'].str.contains('01-01')]
data_eco.head()

Unnamed: 0,Genero_Usuario,Edad_Usuario,Bici,Ciclo_Estacion_Retiro,Fecha_Retiro,Hora_Retiro,Ciclo_Estacion_Arribo,Fecha_Arribo,Hora_Arribo,Tiempo_viaje
20,O,41.0,7181191,165,2024-01-01,00:04:35,182,2024-01-01,00:11:59,7.4
25,M,21.0,5646617,173,2024-01-01,00:12:09,171,2024-01-01,00:13:10,1.016667
27,M,44.0,2130284,175,2024-01-01,00:05:24,74,2024-01-01,00:14:14,8.833333
29,M,43.0,5136924,8,2024-01-01,00:11:47,10,2024-01-01,00:14:24,2.616667
36,M,33.0,5613211,550,2024-01-01,00:10:44,544,2024-01-01,00:15:03,4.316667


In [16]:
counter_eco = count_trips_ecobici(data_eco, threshold = 5)

In [17]:
# tarda más de 30 segundos
estaciones_eco_2024 = np.unique(np.concatenate((data_eco['Ciclo_Estacion_Retiro'].unique(), data_eco['Ciclo_Estacion_Arribo'].unique())))
stations_cells_eco = stations_and_cells(grid_eco.geodataframe(), estaciones_eco_2024, estaciones_eco)

In [18]:
graph_eco = abstract_flows(counter_eco, stations_cells_eco, estaciones_eco)

In [50]:
# tarda hasta 5 minutos
mapa_eco = plot_abstracted_graph(graph_eco, mapa_eco)

NameError: name 'graph_eco' is not defined

In [28]:
mapa_eco.save('abstracted_ecobici.html')

In [10]:
grid_mibici = grid.Grid(10, 10, 'mibici')

In [11]:
estaciones_mibici = np.load(dir + 'Adj_mibici/matrices_estaciones/est_2024.npy')
#estaciones_mibici = np.load('/Users/antoniomendez/Desktop/Tesis/Datos/Adj_mibici/matrices_estaciones/est_2024.npy')

In [12]:
data_load = pd.read_csv(dir + 'mibici/mibici_2024.csv')
data_load.head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id,A}äe_nacimiento,Tiempo_viaje
0,31789291,640715,M,1984.0,2024-01-01 00:05:25,2024-01-01 00:29:37,294,271,,24.2
1,31789292,2353010,F,1995.0,2024-01-01 00:07:23,2024-01-01 00:16:37,182,254,,9.233333
2,31789293,1556365,M,1984.0,2024-01-01 00:07:31,2024-01-01 00:16:37,182,254,,9.1
3,31789294,2626233,M,1994.0,2024-01-01 00:07:56,2024-01-01 00:31:51,35,154,,23.916667
4,31789295,2602006,M,1998.0,2024-01-01 00:08:09,2024-01-01 00:31:51,35,154,,23.7


In [13]:
data_mibici = data_load[data_load['Inicio_del_viaje'].str.contains('01-01')]

In [14]:
counter_mibici = count_trips_mibici(data_mibici, threshold = 1)
estaciones_mibici_2024 = np.unique(np.concatenate((data_mibici['Origen_Id'].unique(), data_mibici['Destino_Id'].unique())))
stations_cells_mibici = stations_and_cells(grid_mibici.geodataframe(), estaciones_mibici_2024, estaciones_mibici)

In [66]:
mapa_mibici = grid_mibici.map_around()

In [16]:
graph_mibici = abstract_flows(counter_mibici, stations_cells_mibici, estaciones_mibici)

In [67]:
mapa_mibici = plot_abstracted_graph(graph_mibici, mapa_mibici, title='Prueba')

In [68]:
#mapa_mibici.save('/Users/antoniomendez/Desktop/abstracted_mibici.html')
mapa_mibici.save('/home/user/Desktop/abstracted_mibici.html')