In [1]:
import config as cfg
import os
import pandas as pd
import networkx as nx
import geopandas as gpd
from shapely.geometry import Polygon
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import plotly.graph_objects as go

# Load data
all_viajes = pd.read_csv('/Users/caro/Desktop/thesis_project/mobility_data/VIAJES/all_viajes_month_0322.csv')
gdf = gpd.read_file(cfg.ZONIFICACION_DATA / 'distritos/madrid_gdf.geojson')  # Load your GeoJSON file into a GeoDataFrame
gdf = gdf.to_crs(epsg=3042)

# Filter data
filtered_df = all_viajes.loc[(all_viajes['actividad_origen'] == 'casa')]

In [2]:
filtered_df.head()

Unnamed: 0,fecha,periodo,origen,destino,distancia,actividad_origen,actividad_destino,estudio_origen_posible,estudio_destino_posible,residencia,renta,edad,sexo,viajes,viajes_km
0,20220301,0,2807901,2807901,0.5-2,casa,frecuente,no,no,28,>15,0-25,hombre,29.337,30.222
1,20220301,0,2807901,2807901,0.5-2,casa,frecuente,no,no,28,>15,0-25,mujer,34.143,27.522
2,20220301,0,2807901,2807901,0.5-2,casa,frecuente,no,no,28,>15,25-45,hombre,92.799,63.331
3,20220301,0,2807901,2807901,0.5-2,casa,frecuente,no,no,28,>15,25-45,mujer,65.475,44.556
4,20220301,0,2807901,2807901,0.5-2,casa,frecuente,no,no,28,>15,45-65,hombre,44.319,31.052


In [15]:
# DEFINING GRAPH --------------------------------------------------------------------------------------------------------- 

# Function to generate node positions based on GeoDataFrame
def get_positions(gdf):
    return {
        int(row['ID']): (row['geometry'].centroid.x, row['geometry'].centroid.y)
        for idx, row in gdf.iterrows()
    }

# Define the graph based on DataFrame
def define_graph(df):
    G = nx.DiGraph()

    # Group by origin and destination, and aggregate trip count and renta (taking the first renta value)
    trip_counts = df.groupby(['origen', 'destino', 'renta']).size().reset_index(name='trip_count')
    
    # Add edges to the graph
    for idx, row in trip_counts.iterrows():
        G.add_edge(row['origen'], row['destino'], weight=row['trip_count'], renta=row['renta'])  # Save 'renta' as edge attribute
    return G, trip_counts

# PLOTTING ---------------------------------------------------------------------------------------------------------

# Set edge attributes like color and width for visualization
def set_art(G, weight_scale=30000):
    edge_colors = []
    edge_widths = []
    
    # Iterate over the edges and set colors based on 'renta' attribute
    for u, v, data in G.edges(data=True):
        # Choose color based on 'renta' value (numerical or categorical)
        if data['renta'] == '>15':
            edge_colors.append('blue')
        elif data['renta'] == '10-15':
            edge_colors.append('red')
        else:
            edge_colors.append('green')

        # Adjust edge width based on the weight (i.e., trip count)
        edge_widths.append(max(0.5, data['weight'] / weight_scale))
    
    return edge_colors, edge_widths

def plotly_graph(G, positions, edge_colors, edge_widths, node_size=20):
    # Extract node positions
    node_x = [positions[node][0] for node in G.nodes()]
    node_y = [positions[node][1] for node in G.nodes()]

    # Create node scatter trace
    node_trace = go.Scatter(
        x=node_x, y=node_y,
        mode='markers',
        marker=dict(size=node_size, color='white', line=dict(width=2, color='#888')),
        text=[f'Node {node}' for node in G.nodes()],
        hoverinfo='text'
    )

    # Prepare edges in batches based on edge color (blue, red, green)
    edge_traces = []
    for edge_color in ['blue', 'green', 'red']:
        edge_x, edge_y = [], []
        for i, (u, v, data) in enumerate(G.edges(data=True)):
            if edge_colors[i] == edge_color:
                x0, y0 = positions[u]
                x1, y1 = positions[v]
                edge_x.extend([x0, x1, None])
                edge_y.extend([y0, y1, None])

        # Create edge trace for each color
        edge_traces.append(
            go.Scatter(
                x=edge_x, y=edge_y,
                line=dict(width=edge_widths[i], color=edge_color),
                hoverinfo='none',
                mode='lines'
            )
        )

    # Create a layout for the graph
    layout = go.Layout(
        showlegend=True,
        hovermode='closest',
        margin=dict(b=0, l=0, r=0, t=0),
        xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
        yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
    )

    # Combine edge and node traces
    fig = go.Figure(data=edge_traces + [node_trace], layout=layout)
    fig.show()

# ANALYSIS ---------------------------------------------------------------------------------------------------------

def check_in_weights(G):
    in_weights = {}
    for node in G.nodes():
        total_in_weight = sum(data['weight'] for u, v, data in G.in_edges(node, data=True))
        in_weights[node] = total_in_weight
        print(f"Node {node} Total In-weight: {total_in_weight}")
    return in_weights

def check_out_weights(G):
    out_weights = {}
    for node in G.nodes():
        total_out_weight = sum(data['weight'] for u, v, data in G.out_edges(node, data=True))
        out_weights[node] = total_out_weight
        print(f"Node {node} Total Out-weight: {total_out_weight}")
    return out_weights

def print_node_degrees(G):
    print("Node Degrees (In-degree, Out-degree, Total degree):")
    for node in G.nodes():
        in_degree = G.in_degree(node)
        out_degree = G.out_degree(node)
        total_degree = G.degree(node)  # Total degree (in + out)
        print(f"Node {node}: In-degree = {in_degree}, Out-degree = {out_degree}, Total degree = {total_degree}")



In [16]:
G, trip_counts = define_graph(filtered_df)
positions = get_positions(gdf)
edge_colors, edge_widths = set_art(G)
plotly_graph(G, positions, edge_colors, edge_widths)

In [17]:
len(G.edges)

441

In [18]:
G_unfiltered, trip_counts_unfiltered = define_graph(all_viajes)
edge_colors_unfiltered, edge_widths_unfiltered = set_art(G_unfiltered)
plotly_graph(G_unfiltered, positions, edge_colors_unfiltered, edge_widths_unfiltered)

In [19]:
nx.degree(G, weight='weight')

DiDegreeView({2807901: 543561, 2807902: 461813, 2807903: 444431, 2807904: 495449, 2807905: 487883, 2807906: 441168, 2807907: 480998, 2807908: 530953, 2807909: 493894, 2807910: 452842, 2807911: 489101, 2807912: 420916, 2807913: 558842, 2807914: 381366, 2807915: 548924, 2807916: 472105, 2807917: 375799, 2807918: 416940, 2807919: 373983, 2807920: 497571, 2807921: 309677})

In [20]:
nx.betweenness_centrality(G, weight='weight')

{2807901: 0.0,
 2807902: 0.0,
 2807903: 0.0,
 2807904: 0.0,
 2807905: 0.0,
 2807906: 0.0,
 2807907: 0.0,
 2807908: 0.0,
 2807909: 0.007894736842105263,
 2807910: 0.007894736842105263,
 2807911: 0.0,
 2807912: 0.031578947368421054,
 2807913: 0.0,
 2807914: 0.06315789473684211,
 2807915: 0.0,
 2807916: 0.0,
 2807917: 0.049999999999999996,
 2807918: 0.0,
 2807919: 0.05789473684210526,
 2807920: 0.0,
 2807921: 0.20526315789473684}

In [21]:
print_node_degrees(G)

Node Degrees (In-degree, Out-degree, Total degree):
Node 2807901: In-degree = 21, Out-degree = 21, Total degree = 42
Node 2807902: In-degree = 21, Out-degree = 21, Total degree = 42
Node 2807903: In-degree = 21, Out-degree = 21, Total degree = 42
Node 2807904: In-degree = 21, Out-degree = 21, Total degree = 42
Node 2807905: In-degree = 21, Out-degree = 21, Total degree = 42
Node 2807906: In-degree = 21, Out-degree = 21, Total degree = 42
Node 2807907: In-degree = 21, Out-degree = 21, Total degree = 42
Node 2807908: In-degree = 21, Out-degree = 21, Total degree = 42
Node 2807909: In-degree = 21, Out-degree = 21, Total degree = 42
Node 2807910: In-degree = 21, Out-degree = 21, Total degree = 42
Node 2807911: In-degree = 21, Out-degree = 21, Total degree = 42
Node 2807912: In-degree = 21, Out-degree = 21, Total degree = 42
Node 2807913: In-degree = 21, Out-degree = 21, Total degree = 42
Node 2807914: In-degree = 21, Out-degree = 21, Total degree = 42
Node 2807915: In-degree = 21, Out-degr

In [22]:
# Check total in-weights for each node
in_weights = check_in_weights(G)


Node 2807901 Total In-weight: 314497
Node 2807902 Total In-weight: 231099
Node 2807903 Total In-weight: 241772
Node 2807904 Total In-weight: 294641
Node 2807905 Total In-weight: 282802
Node 2807906 Total In-weight: 228846
Node 2807907 Total In-weight: 286760
Node 2807908 Total In-weight: 279935
Node 2807909 Total In-weight: 271858
Node 2807910 Total In-weight: 180527
Node 2807911 Total In-weight: 209789
Node 2807912 Total In-weight: 190129
Node 2807913 Total In-weight: 231677
Node 2807914 Total In-weight: 169536
Node 2807915 Total In-weight: 257026
Node 2807916 Total In-weight: 240942
Node 2807917 Total In-weight: 157500
Node 2807918 Total In-weight: 186369
Node 2807919 Total In-weight: 173072
Node 2807920 Total In-weight: 232022
Node 2807921 Total In-weight: 178309


In [23]:
# Check total out-weights for each node
out_weights = check_out_weights(G)


Node 2807901 Total Out-weight: 229064
Node 2807902 Total Out-weight: 230714
Node 2807903 Total Out-weight: 202659
Node 2807904 Total Out-weight: 200808
Node 2807905 Total Out-weight: 205081
Node 2807906 Total Out-weight: 212322
Node 2807907 Total Out-weight: 194238
Node 2807908 Total Out-weight: 251018
Node 2807909 Total Out-weight: 222036
Node 2807910 Total Out-weight: 272315
Node 2807911 Total Out-weight: 279312
Node 2807912 Total Out-weight: 230787
Node 2807913 Total Out-weight: 327165
Node 2807914 Total Out-weight: 211830
Node 2807915 Total Out-weight: 291898
Node 2807916 Total Out-weight: 231163
Node 2807917 Total Out-weight: 218299
Node 2807918 Total Out-weight: 230571
Node 2807919 Total Out-weight: 200911
Node 2807920 Total Out-weight: 265549
Node 2807921 Total Out-weight: 131368


In [24]:
nx.community.louvain_communities(G)

[{2807904, 2807905, 2807906, 2807907, 2807908, 2807909},
 {2807915, 2807916, 2807920, 2807921},
 {2807901, 2807902, 2807910, 2807911, 2807912, 2807917},
 {2807903, 2807913, 2807914, 2807918, 2807919}]

In [25]:
nx.write_gml(G, "0322_home_origin_graph.gml")