In [126]:
#libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import folium
import os
import sys
sys.path.append(os.path.abspath('abstract_flows'))
import arrow
import grid
import flows

In [127]:
def find_location_cell(data_grid, station_array):
    #[lat, lon] = station
    station = [station_array[0], station_array[1]]
    if station[0] < 0:
        station = [station[1], station[0]]

    for i in range(len(data_grid)):
        current = data_grid.iloc[i]
        if station[0] >= np.min([current['lat1'], current['lat2']]) and station[0] <= np.max([current['lat1'], current['lat2']]):
            if station[1] >= np.min([current['lon1'], current['lon2']]) and station[1] <= np.max([current['lon1'], current['lon2']]):
                return data_grid.iloc[i]['i'], data_grid.iloc[i]['j']
    return None

def find_station(station_id, station_matrix):
    for i in range(len(station_matrix)):
        if station_matrix[i,0] == station_id:
            # [lat, lon]
            return station_matrix[i,1], station_matrix[i,2]
    return None

def stations_and_cells(data_grid, stations_ids, stations_matrix):
    stations_cells = dict()
    for id in stations_ids:
        try:
            station = find_station(id, stations_matrix)
            cell = find_location_cell(data_grid, station)
        except:
            cell = None
        if cell != None:
            stations_cells[id] = cell
    return stations_cells

In [128]:
def count_trips_ecobici(data_user, threshold=5, complement=False, directed=False):
    if directed:
        # For directed trips, group by the exact origin and destination
        viajes_user = data_user.groupby(['Ciclo_Estacion_Retiro', 'Ciclo_Estacion_Arribo']).size().reset_index(name='counts')
        viajes_user.columns = ['Est_A', 'Est_B', 'counts']
    else:
        # For undirected trips, group by min and max of origin and destination
        viajes_user = data_user.groupby([data_user[['Ciclo_Estacion_Retiro', 'Ciclo_Estacion_Arribo']].min(axis=1), 
                                      data_user[['Ciclo_Estacion_Retiro', 'Ciclo_Estacion_Arribo']].max(axis=1)]).size().reset_index(name='counts')
        viajes_user.columns = ['Est_A', 'Est_B', 'counts']

    # Apply threshold filtering
    if not complement:
        viajes_user = viajes_user[viajes_user['counts'] >= threshold]
    else:
        viajes_user = viajes_user[viajes_user['counts'] < threshold]

    if viajes_user.empty:
        return None

    # Calculate probabilities
    total = viajes_user['counts'].sum()
    viajes_user['prob'] = viajes_user['counts'] / total

    # Sort by probability
    viajes_user = viajes_user.sort_values(by='prob', ascending=False).reset_index(drop=True)
    return viajes_user


def count_trips_mibici(data_user, threshold=5, complement=False, directed=False):
    if directed:
        # For directed trips, group by the exact origin and destination
        viajes_user = data_user.groupby(['Origen_Id', 'Destino_Id']).size().reset_index(name='counts')
        viajes_user.columns = ['Est_A', 'Est_B', 'counts']
    else:
        # For undirected trips, group by min and max of origin and destination
        viajes_user = data_user.groupby([data_user[['Origen_Id', 'Destino_Id']].min(axis=1), 
                                      data_user[['Origen_Id', 'Destino_Id']].max(axis=1)]).size().reset_index(name='counts')
        viajes_user.columns = ['Est_A', 'Est_B', 'counts']

    # Apply threshold filtering
    if not complement:
        viajes_user = viajes_user[viajes_user['counts'] >= threshold]
    else:
        viajes_user = viajes_user[viajes_user['counts'] < threshold]

    if viajes_user.empty:
        return None

    # Calculate probabilities
    total = viajes_user['counts'].sum()
    viajes_user['prob'] = viajes_user['counts'] / total

    # Sort by probability
    viajes_user = viajes_user.sort_values(by='prob', ascending=False).reset_index(drop=True)
    return viajes_user

In [129]:
import pandas as pd
from collections import defaultdict
import numpy as np

def abstract_flows(trips_counted, cells_data, station_cells, station_matrix, threshold=0):
    '''
    Parameters:
        trips_counted (DataFrame): DataFrame with columns Est_A, Est_B, counts, prob.
        cells_data (DataFrame): DataFrame with cell information (e.g., lat1, lat2, lon1, lon2).
        station_cells (dict): Dictionary with station_id as key and cell as value.
        station_matrix (numpy.ndarray): Array with station_id, lat, lon.
        threshold (int): Minimum flow count to include in the DataFrame (default is 0).

    Returns:
        flows_df (DataFrame): DataFrame with columns i_A, j_A, i_B, j_B, flow_count, mass_center_A, and mass_center_B.
    '''
    cell_flows = defaultdict(lambda: defaultdict(int))

    # Group trips by cell
    for _, row in trips_counted.iterrows():
        try:
            cell_A = station_cells[row['Est_A']]
            cell_B = station_cells[row['Est_B']]
            cell_flows[cell_A][cell_B] += row['counts']
        except KeyError:
            continue

    # Calculate mass centers of the cells
    cell_mass_centers = {}
    
    for cell_A in cell_flows:
        total_trips = sum(cell_flows[cell_A].values())
        lat_sum, lon_sum = 0, 0

        # Collect stations in cell_A
        stations_in_cell_A = [s for s, cell in station_cells.items() if cell == cell_A]
        if stations_in_cell_A:
            for station in stations_in_cell_A:
                lat, lon = find_station(station, station_matrix)
                # Sum coordinates weighted by the number of trips
                lat_sum += lat * trips_counted[trips_counted['Est_A'] == station]['counts'].sum()
                lon_sum += lon * trips_counted[trips_counted['Est_A'] == station]['counts'].sum()

            # Calculate mass center
            if total_trips > 0:
                center_lat = lat_sum / total_trips
                center_lon = lon_sum / total_trips
                cell_mass_centers[cell_A] = (center_lat, center_lon)
            else:
                # If no trips, assign a default value
                cell_mass_centers[cell_A] = (None, None)
        else:
            # If no stations in the cell, assign a default value
            cell_mass_centers[cell_A] = (None, None)

        # Ensure mass center is within the cell
        if find_location_cell(cells_data, cell_mass_centers[cell_A]) != cell_A:
            cell_mass_centers[cell_A] = (
                np.mean([cells_data.iloc[cell_A[0]]['lat1'], cells_data.iloc[cell_A[0]]['lat2']]),
                np.mean([cells_data.iloc[cell_A[1]]['lon1'], cells_data.iloc[cell_A[1]]['lon2']])
            )

    # Build the flows DataFrame
    flows_data = []
    for cell_A in cell_flows:
        for cell_B, flow_count in cell_flows[cell_A].items():
            if flow_count >= threshold:  # Apply threshold
                mass_center_A = cell_mass_centers.get(cell_A, (None, None))
                mass_center_B = cell_mass_centers.get(cell_B, (None, None))
                flows_data.append([
                    cell_A[0], cell_A[1],  # i_A, j_A
                    cell_B[0], cell_B[1],  # i_B, j_B
                    flow_count,            # flow_count
                    mass_center_A,         # mass_center_A
                    mass_center_B          # mass_center_B
                ])

    # Create DataFrame
    flows_df = pd.DataFrame(
        flows_data,
        columns=['i_A', 'j_A', 'i_B', 'j_B', 'flow_count', 'mass_center_A', 'mass_center_B']
    )

    return flows_df

In [130]:
import folium
import numpy as np
import matplotlib.pyplot as plt
import io
import base64
from collections import defaultdict

def plot_flows_dataframe(flows_df, folium_map, range_weights=[1, 6], title=None):
    '''
    Parameters:
        flows_df (DataFrame): DataFrame with columns i_A, j_A, i_B, j_B, flow_count, mass_center_A, and mass_center_B.
        folium_map (folium.Map): Folium map object.
        range_weights (list): Range of weights for edge thickness.
        title (str): Title of the map.
    '''
    # Step 1: Find max and min flow_count
    flow_counts = flows_df['flow_count'].unique()
    max_flow = flow_counts.max() if len(flow_counts) > 0 else 1
    min_flow = flow_counts.min() if len(flow_counts) > 0 else 0

    # Step 2: Create linspace for thickness and colors
    min_weight = range_weights[0]
    max_weight = range_weights[1]
    weights = np.linspace(min_weight, max_weight, num=len(flow_counts)) if len(flow_counts) > 0 else [min_weight]

    exp = 0.5
    colors = plt.cm.inferno(np.linspace(0, 1, num=len(weights))**exp) if len(flow_counts) > 0 else [plt.cm.inferno(0)]

    sorted_flows = sorted(flow_counts)
    flow_to_weight = {flow: weight for flow, weight in zip(sorted_flows, weights)}
    flow_to_color = {flow: color for flow, color in zip(sorted_flows, colors)}

    # Step 3: Find the most relevant node and self-connected nodes
    node_relevance = defaultdict(int)
    self_connected_nodes = set()

    # Calculate node relevance and identify self-connected nodes
    for _, row in flows_df.iterrows():
        cell_A = (row['i_A'], row['j_A'])
        cell_B = (row['i_B'], row['j_B'])
        flow_count = row['flow_count']

        node_relevance[cell_A] += flow_count
        node_relevance[cell_B] += flow_count

        if cell_A == cell_B:
            self_connected_nodes.add(cell_A)

    most_relevant_node = max(node_relevance, key=node_relevance.get, default=None)

    # Step 4: Draw edges and mass centers
    for _, row in flows_df.iterrows():
        cell_A = (row['i_A'], row['j_A'])
        cell_B = (row['i_B'], row['j_B'])
        flow_count = row['flow_count']
        mass_center_A = row['mass_center_A']
        mass_center_B = row['mass_center_B']

        if mass_center_A != (None, None) and mass_center_B != (None, None) and mass_center_A != mass_center_B:
            lat1, lon1 = mass_center_A
            lat2, lon2 = mass_center_B

            weight = flow_to_weight.get(flow_count, min_weight)
            color = flow_to_color.get(flow_count, plt.cm.viridis(0))

            color_hex = '#{:02x}{:02x}{:02x}'.format(
                int(color[0] * 255), int(color[1] * 255), int(color[2] * 255)
            )

            # Draw the arc
            arrow.draw_arrow(
                folium_map,
                lat1, lon1, lat2, lon2,
                color=color_hex,
                weight=weight,
                tip=6,
                text=f'Flujos: {int(flow_count)}',
                radius_fac=1.0
            )

    # Step 5: Plot nodes with appropriate colors
    for cell, relevance in node_relevance.items():
        # Get mass center from flows_df
        mass_center = flows_df[(flows_df['i_A'] == cell[0]) & (flows_df['j_A'] == cell[1])]['mass_center_A'].iloc[0]
        if mass_center != (None, None):
            lat, lon = mass_center

            # Determine node color
            if cell == most_relevant_node:
                node_color = 'red'
            elif cell in self_connected_nodes:
                node_color = 'green'
            else:
                node_color = 'black'

            # Plot the node
            folium.CircleMarker(
                location=[lat, lon],
                radius=7 if cell == most_relevant_node else 5,
                color=node_color,
                fill=True,
                fill_color=node_color,
                fill_opacity=1.0,
                popup=f'Celda: {cell}\nRelevancia: {relevance:.2f}'
            ).add_to(folium_map)

    # Step 6: Add title if provided
    if title:
        title_html = f"""
        <h3 align="center" style="font-size:16px"><b>{title}</b></h3>
        """
        folium_map.get_root().html.add_child(folium.Element(title_html))

    # Step 7: Add color bar
    fig, ax = plt.subplots(figsize=(4, 1))
    cmap = plt.cm.inferno
    norm = plt.Normalize(vmin=min_flow, vmax=max_flow)
    cb = plt.colorbar(plt.cm.ScalarMappable(norm=norm, cmap=cmap), ax=ax, orientation='horizontal')
    cb.set_label('Flow Count')

    buf = io.BytesIO()
    plt.savefig(buf, format='png', bbox_inches='tight')
    plt.close(fig)
    encoded = base64.b64encode(buf.getvalue()).decode('utf-8')
    colorbar_html = f'<img src="data:image/png;base64,{encoded}" style="position: absolute; bottom: 10px; left: 10px; width: 200px;">'
    folium_map.get_root().html.add_child(folium.Element(colorbar_html))

    return folium_map

In [146]:
import folium
import numpy as np
import matplotlib.pyplot as plt
import io
import base64
from collections import defaultdict

def plot_flows_dataframe(flows_df, cell_data, folium_map, range_weights=[1, 6], title=None):
    '''
    Parameters:
        flows_df (DataFrame): DataFrame with columns i_A, j_A, i_B, j_B, flow_count, mass_center_A, and mass_center_B.
        cell_data (DataFrame): DataFrame with columns i, j, lat1, lon1, lat2, lon2.
        folium_map (folium.Map): Folium map object.
        range_weights (list): Range of weights for edge thickness.
        title (str): Title of the map.
    '''
    # Step 1: Find max and min flow_count
    flow_counts = flows_df['flow_count'].unique()
    max_flow = flow_counts.max() if len(flow_counts) > 0 else 1
    min_flow = flow_counts.min() if len(flow_counts) > 0 else 0

    # Step 2: Create linspace for thickness and colors
    min_weight = range_weights[0]
    max_weight = range_weights[1]
    weights = np.linspace(min_weight, max_weight, num=len(flow_counts)) if len(flow_counts) > 0 else [min_weight]

    exp = 0.5
    colors = plt.cm.inferno(np.linspace(0, 1, num=len(weights))**exp) if len(flow_counts) > 0 else [plt.cm.inferno(0)]

    sorted_flows = sorted(flow_counts)
    flow_to_weight = {flow: weight for flow, weight in zip(sorted_flows, weights)}
    flow_to_color = {flow: color for flow, color in zip(sorted_flows, colors)}

    # Step 3: Find the most relevant node and self-connected nodes
    node_relevance = defaultdict(int)
    self_connected_nodes = set()

    # Calculate node relevance and identify self-connected nodes
    for _, row in flows_df.iterrows():
        cell_A = (row['i_A'], row['j_A'])
        cell_B = (row['i_B'], row['j_B'])
        flow_count = row['flow_count']

        node_relevance[cell_A] += flow_count
        node_relevance[cell_B] += flow_count

        if cell_A == cell_B:
            self_connected_nodes.add(cell_A)

    most_relevant_node = max(node_relevance, key=node_relevance.get, default=None)

    # Step 4: Compute trips starting and ending in each cell
    trips_start = defaultdict(int)
    trips_end = defaultdict(int)

    for _, row in flows_df.iterrows():
        cell_A = (row['i_A'], row['j_A'])
        cell_B = (row['i_B'], row['j_B'])
        flow_count = row['flow_count']

        trips_start[cell_A] += flow_count
        trips_end[cell_B] += flow_count
        
    # Step 6: Plot rectangles for trips starting and ending in each cell
    for _, row in cell_data.iterrows():
        cell = (row['i'], row['j'])
        lat1, lon1 = row['lat1'], row['lon1']
        lat2, lon2 = row['lat2'], row['lon2']

        if cell in trips_start or cell in trips_end:
            total_trips = trips_start.get(cell, 0) + trips_end.get(cell, 0)
            if total_trips > 0:
                start_percent = trips_start.get(cell, 0) / total_trips
                end_percent = trips_end.get(cell, 0) / total_trips

                # Calculate bounds for the left (end) and right (start) parts of the rectangle
                mid_lat = lat1 + (lat2 - lat1) * end_percent

                # Draw the left part (end trips)
                folium.Rectangle(
                    bounds=[(lat1, lon1), (mid_lat, lon2)],
                    color='blue',
                    fill=True,
                    fill_color='blue',
                    fill_opacity=0.1,
                    popup=f'Celda: {cell}\nViajes que terminan: {end_percent:.2%}'
                ).add_to(folium_map)

                # Draw the right part (start trips)
                folium.Rectangle(
                    bounds=[(mid_lat, lon1), (lat2, lon2)],
                    color='green',
                    fill=True,
                    fill_color='green',
                    fill_opacity=0.1,
                    popup=f'Celda: {cell}\nViajes que inician: {start_percent:.2%}'
                ).add_to(folium_map)

    # Step 5: Draw edges and mass centers
    for _, row in flows_df.iterrows():
        cell_A = (row['i_A'], row['j_A'])
        cell_B = (row['i_B'], row['j_B'])
        flow_count = row['flow_count']
        mass_center_A = row['mass_center_A']
        mass_center_B = row['mass_center_B']

        if mass_center_A != (None, None) and mass_center_B != (None, None) and mass_center_A != mass_center_B:
            lat1, lon1 = mass_center_A
            lat2, lon2 = mass_center_B

            weight = flow_to_weight.get(flow_count, min_weight)
            color = flow_to_color.get(flow_count, plt.cm.viridis(0))

            color_hex = '#{:02x}{:02x}{:02x}'.format(
                int(color[0] * 255), int(color[1] * 255), int(color[2] * 255)
            )

            # Draw the arc
            arrow.draw_arrow(
                folium_map,
                lat1, lon1, lat2, lon2,
                color=color_hex,
                weight=weight,
                tip=6,
                text=f'Flujos: {int(flow_count)}',
                radius_fac=1.0
            )

    # Step 7: Plot nodes with appropriate colors
    for cell, relevance in node_relevance.items():
        # Get mass center from flows_df
        mass_center = flows_df[(flows_df['i_A'] == cell[0]) & (flows_df['j_A'] == cell[1])]['mass_center_A'].iloc[0]
        if mass_center != (None, None):
            lat, lon = mass_center

            # Determine node color
            if cell == most_relevant_node:
                node_color = 'red'
            elif cell in self_connected_nodes:
                node_color = 'green'
            else:
                node_color = 'black'

            # Plot the node
            folium.CircleMarker(
                location=[lat, lon],
                radius=7 if cell == most_relevant_node else 5,
                color=node_color,
                fill=True,
                fill_color=node_color,
                fill_opacity=1.0,
                popup=f'Celda: {cell}\nRelevancia: {relevance:.2f}'
            ).add_to(folium_map)

    # Step 8: Add title if provided
    if title:
        title_html = f"""
        <h3 align="center" style="font-size:16px"><b>{title}</b></h3>
        """
        folium_map.get_root().html.add_child(folium.Element(title_html))

    # Step 9: Add color bar
    fig, ax = plt.subplots(figsize=(4, 1))
    cmap = plt.cm.inferno
    norm = plt.Normalize(vmin=min_flow, vmax=max_flow)
    cb = plt.colorbar(plt.cm.ScalarMappable(norm=norm, cmap=cmap), ax=ax, orientation='horizontal')
    cb.set_label('Flow Count')

    buf = io.BytesIO()
    plt.savefig(buf, format='png', bbox_inches='tight')
    plt.close(fig)
    encoded = base64.b64encode(buf.getvalue()).decode('utf-8')
    colorbar_html = f'<img src="data:image/png;base64,{encoded}" style="position: absolute; bottom: 10px; left: 10px; width: 200px;">'
    folium_map.get_root().html.add_child(folium.Element(colorbar_html))

    return folium_map

In [166]:
import folium
import numpy as np
import matplotlib.pyplot as plt
import io
import base64
from collections import defaultdict

def plot_flows_dataframe(flows_df, cell_data, folium_map, range_weights=[1, 6], title=None):
    '''
    Parameters:
        flows_df (DataFrame): DataFrame with columns i_A, j_A, i_B, j_B, flow_count, mass_center_A, and mass_center_B.
        cell_data (DataFrame): DataFrame with columns i, j, lat1, lon1, lat2, lon2.
        folium_map (folium.Map): Folium map object.
        range_weights (list): Range of weights for edge thickness.
        title (str): Title of the map.
    '''
    # Step 1: Find max and min flow_count
    flow_counts = flows_df['flow_count'].unique()
    max_flow = flow_counts.max() if len(flow_counts) > 0 else 1
    min_flow = flow_counts.min() if len(flow_counts) > 0 else 0

    # Step 2: Create linspace for thickness and colors
    min_weight = range_weights[0]
    max_weight = range_weights[1]
    weights = np.linspace(min_weight, max_weight, num=len(flow_counts)) if len(flow_counts) > 0 else [min_weight]

    exp = 0.5
    colors = plt.cm.inferno(np.linspace(0, 1, num=len(weights))**exp) if len(flow_counts) > 0 else [plt.cm.inferno(0)]

    sorted_flows = sorted(flow_counts)
    flow_to_weight = {flow: weight for flow, weight in zip(sorted_flows, weights)}
    flow_to_color = {flow: color for flow, color in zip(sorted_flows, colors)}

    # Step 3: Find the most relevant node and self-connected nodes
    node_relevance = defaultdict(int)
    self_connected_nodes = set()

    # Calculate node relevance and identify self-connected nodes
    for _, row in flows_df.iterrows():
        cell_A = (row['i_A'], row['j_A'])
        cell_B = (row['i_B'], row['j_B'])
        flow_count = row['flow_count']

        node_relevance[cell_A] += flow_count
        node_relevance[cell_B] += flow_count

        if cell_A == cell_B:
            self_connected_nodes.add(cell_A)


    most_relevant_node = max(node_relevance, key=node_relevance.get, default=None)

    # Step 4: Compute trips starting and ending in each cell
    trips_start = defaultdict(int)
    trips_end = defaultdict(int)

    for _, row in flows_df.iterrows():
        cell_A = (row['i_A'], row['j_A'])
        cell_B = (row['i_B'], row['j_B'])
        flow_count = row['flow_count']

        trips_start[cell_A] += flow_count
        trips_end[cell_B] += flow_count

    # Step 5: Plot rectangles for trips starting and ending in each cell
    for _, row in cell_data.iterrows():
        cell = (row['i'], row['j'])
        lat1, lon1 = row['lat1'], row['lon1']
        lat2, lon2 = row['lat2'], row['lon2']

        if cell in trips_start or cell in trips_end:
            total_trips = trips_start.get(cell, 0) + trips_end.get(cell, 0)
            if total_trips > 0:
                start_percent = trips_start.get(cell, 0) / total_trips
                end_percent = trips_end.get(cell, 0) / total_trips

                # Calculate bounds for the left (end) and right (start) parts of the rectangle
                mid_lat = lat1 + (lat2 - lat1) * end_percent

                # Draw the left part (end trips) in dark gray
                folium.Rectangle(
                    bounds=[(lat1, lon1), (mid_lat, lon2)],
                    color='black',  # Dark gray
                    fill=True,
                    fill_color='#555555',
                    fill_opacity=0.25,
                    popup=f'Celda: {cell}\nViajes que terminan: {end_percent:.2%}'
                ).add_to(folium_map)

                # Draw the right part (start trips) in light gray
                folium.Rectangle(
                    bounds=[(mid_lat, lon1), (lat2, lon2)],
                    color='black',  # Light gray
                    fill=True,
                    fill_color='#AAAAAA',
                    fill_opacity=0.25,
                    popup=f'Celda: {cell}\nViajes que inician: {start_percent:.2%}'
                ).add_to(folium_map)

    # Step 6: Draw edges and mass centers
    for _, row in flows_df.iterrows():
        cell_A = (row['i_A'], row['j_A'])
        cell_B = (row['i_B'], row['j_B'])
        flow_count = row['flow_count']
        mass_center_A = row['mass_center_A']
        mass_center_B = row['mass_center_B']

        if mass_center_A != (None, None) and mass_center_B != (None, None) and mass_center_A != mass_center_B:
            lat1, lon1 = mass_center_A
            lat2, lon2 = mass_center_B

            weight = flow_to_weight.get(flow_count, min_weight)
            color = flow_to_color.get(flow_count, plt.cm.viridis(0))

            color_hex = '#{:02x}{:02x}{:02x}'.format(
                int(color[0] * 255), int(color[1] * 255), int(color[2] * 255)
            )

            # Draw the arc
            arrow.draw_arrow(
                folium_map,
                lat1, lon1, lat2, lon2,
                color=color_hex,
                weight=weight,
                tip=6,
                text=f'Flujos: {int(flow_count)}',
                radius_fac=1.0
            )

    # Step 7: Plot nodes with appropriate colors
    max_relevance = max(node_relevance.values()) if node_relevance else 1
    green_intensity = np.linspace(100, 255, num=len(node_relevance)) if len(node_relevance) > 0 else [0]
    for cell, relevance in node_relevance.items():
        # Get mass center from flows_df
        mass_center = flows_df[(flows_df['i_A'] == cell[0]) & (flows_df['j_A'] == cell[1])]['mass_center_A'].iloc[0]
        if mass_center != (None, None):
            lat, lon = mass_center

            # Determine node color
            if cell == most_relevant_node:
                node_color = 'red'
            elif cell in self_connected_nodes:
                # Gradient of green based on relevance
                current_green = int(green_intensity[sorted(node_relevance, key=node_relevance.get).index(cell)])
                node_color = f'#00{current_green:02x}00'  # Green gradient
            else:
                node_color = 'black'

            # Plot the node
            folium.CircleMarker(
                location=[lat, lon],
                radius=7 if cell == most_relevant_node else 5,
                color=node_color,
                fill=True,
                fill_color=node_color,
                fill_opacity=1.0,
                popup=f'Celda: {cell}\nRelevancia: {relevance:.2f}'
            ).add_to(folium_map)

    # Step 8: Add title if provided
    if title:
        title_html = f"""
        <h3 align="center" style="font-size:16px"><b>{title}</b></h3>
        """
        folium_map.get_root().html.add_child(folium.Element(title_html))

    # Step 9: Add color bar
    fig, ax = plt.subplots(figsize=(4, 1))
    cmap = plt.cm.inferno
    norm = plt.Normalize(vmin=min_flow, vmax=max_flow)
    cb = plt.colorbar(plt.cm.ScalarMappable(norm=norm, cmap=cmap), ax=ax, orientation='horizontal')
    cb.set_label('Flow Count')

    buf = io.BytesIO()
    plt.savefig(buf, format='png', bbox_inches='tight')
    plt.close(fig)
    encoded = base64.b64encode(buf.getvalue()).decode('utf-8')
    colorbar_html = f'<img src="data:image/png;base64,{encoded}" style="position: absolute; bottom: 10px; left: 10px; width: 200px;">'
    folium_map.get_root().html.add_child(folium.Element(colorbar_html))

    return folium_map

In [131]:
#dir = '/home/user/Desktop/Datos/'
dir = '/Users/antoniomendez/Desktop/Tesis/Datos/datos_limpios/'

In [132]:
estaciones_mibici = np.load('/Users/antoniomendez/Desktop/Tesis/Datos/Adj_mibici/matrices_estaciones/est_2019.npy')

In [133]:
data_mibici = pd.read_csv(dir + 'mibici/mibici_2019.csv')
data_mibici = data_mibici[data_mibici['Inicio_del_viaje'].str.contains('2019-01')]
data_mibici.head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id,Tiempo_viaje
0,8901295,341162,M,1992.0,2019-01-01 00:00:00,2019-01-01 00:16:09,50,54,16.15
1,8901296,413443,M,1985.0,2019-01-01 06:01:05,2019-01-01 06:16:27,38,194,15.366667
2,8901297,444232,M,1995.0,2019-01-01 06:01:19,2019-01-01 06:09:55,86,18,8.6
3,8901298,437533,M,1998.0,2019-01-01 06:01:25,2019-01-01 06:02:04,86,86,0.65
4,8901299,430224,M,1992.0,2019-01-01 06:01:57,2019-01-01 06:05:01,27,52,3.066667


In [134]:
data_mibici.tail()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id,Tiempo_viaje
403643,9372070,433730,M,1990.0,2019-01-31 23:59:00,2019-02-01 00:28:05,210,254,29.083333
403644,9372071,443674,F,1993.0,2019-01-31 23:59:00,2019-02-01 00:14:28,254,271,15.466667
403645,9372072,223014,M,1994.0,2019-01-31 23:59:07,2019-02-01 00:20:21,32,261,21.233333
403646,9372073,73255,F,1994.0,2019-01-31 23:59:30,2019-02-01 00:28:58,53,256,29.466667
403647,9372074,3735,F,1988.0,2019-01-31 23:59:49,2019-02-01 00:43:55,77,11,44.1


In [135]:
current_data = data_mibici[data_mibici['Inicio_del_viaje'].str.contains('2019-01-01')]
current_data.head()

Unnamed: 0,Viaje_Id,Usuario_Id,Genero,Año_de_nacimiento,Inicio_del_viaje,Fin_del_viaje,Origen_Id,Destino_Id,Tiempo_viaje
0,8901295,341162,M,1992.0,2019-01-01 00:00:00,2019-01-01 00:16:09,50,54,16.15
1,8901296,413443,M,1985.0,2019-01-01 06:01:05,2019-01-01 06:16:27,38,194,15.366667
2,8901297,444232,M,1995.0,2019-01-01 06:01:19,2019-01-01 06:09:55,86,18,8.6
3,8901298,437533,M,1998.0,2019-01-01 06:01:25,2019-01-01 06:02:04,86,86,0.65
4,8901299,430224,M,1992.0,2019-01-01 06:01:57,2019-01-01 06:05:01,27,52,3.066667


In [136]:
current_counter = count_trips_mibici(current_data, threshold=1, complement=False, directed=True)
current_counter.head()

Unnamed: 0,Est_A,Est_B,counts,prob
0,229,229,22,0.008346
1,35,35,15,0.00569
2,25,25,8,0.003035
3,296,157,7,0.002656
4,34,35,6,0.002276


In [137]:
current_stations = pd.unique(np.concatenate((current_counter['Est_A'].unique(), current_counter['Est_B'].unique())))

In [138]:
from collections import defaultdict
current_station_cells = stations_and_cells(current_grid.geodataframe(), current_stations, estaciones_mibici)
current_graph_df = abstract_flows(current_counter, current_grid.geodataframe(), current_station_cells, estaciones_mibici)
current_graph_df.head()

Unnamed: 0,i_A,j_A,i_B,j_B,flow_count,mass_center_A,mass_center_B
0,1,3,1,3,136.0,"(20.669224044479492, -103.34338056971609)","(20.669224044479492, -103.34338056971609)"
1,1,3,1,2,65.0,"(20.669224044479492, -103.34338056971609)","(20.67180152219178, -103.36090330602738)"
2,1,3,0,4,19.0,"(20.669224044479492, -103.34338056971609)","(20.64316597142857, -103.31450170952381)"
3,1,3,2,2,28.0,"(20.669224044479492, -103.34338056971609)","(20.685327399706896, -103.36194979827589)"
4,1,3,2,3,54.0,"(20.669224044479492, -103.34338056971609)","(20.68270766990596, -103.34545475109718)"


In [139]:
current_graph_df.tail()

Unnamed: 0,i_A,j_A,i_B,j_B,flow_count,mass_center_A,mass_center_B
104,0,1,0,0,1.0,"(20.65331833333333, -103.390695)","(20.65381, -103.40134)"
105,0,1,1,0,1.0,"(20.65331833333333, -103.390695)","(20.66805526117647, -103.4066917254902)"
106,0,1,0,1,1.0,"(20.65331833333333, -103.390695)","(20.65331833333333, -103.390695)"
107,0,1,1,1,2.0,"(20.65331833333333, -103.390695)","(20.67067382740741, -103.38405699300412)"
108,0,1,2,2,1.0,"(20.65331833333333, -103.390695)","(20.685327399706896, -103.36194979827589)"


In [169]:
current_grid = grid.Grid(5, 5, "mibici")
current_map = current_grid.map_around()
current_map = plot_flows_dataframe(current_graph_df, current_grid.geodataframe(), current_map, title='Flujos de MiBici')


In [168]:
current_map.save('/Users/antoniomendez/Desktop/2019-01-01_new.html')