In [1]:
import networkx as nx
import geopandas as gpd
import config as cfg
from utils import *
from pyvis.network import Network

In [12]:
# Load data
all_viajes = pd.read_csv('/Users/caro/Desktop/thesis_project/mobility_data/VIAJES/all_viajes_month_0322.csv')
filtered_df = all_viajes.loc[(all_viajes['actividad_origen'] == 'casa')] # filtering only home origin trips
income = gpd.read_file('/Users/caro/Desktop/thesis_project/segregation_indices/data/processed/geometries_and_income.geojson')

gdf = gpd.read_file(cfg.ZONIFICACION_DATA / 'distritos/madrid_gdf.geojson')  # for positions when plotting
gdf = gdf.to_crs(epsg=4326)

income['Gini Index Scaled'] = income['Gini Index'] # ** 2.5
var_of_interest = 'Gini Index Scaled' # or 'Gini Index Scaled' or Median income per consumption unit

In [13]:
district_counts = filtered_df['origen'].value_counts().reset_index()
district_counts.columns = ['ID', 'Population']

In [46]:
def get_positions(gdf):
    return {
        int(row['ID']): (row['geometry'].centroid.x, row['geometry'].centroid.y) 
        for idx, row in gdf.iterrows()
    }

# Define graph based on OD trips including normalisation by population 
def define_graph(df, population_df=None, NORMALISE_BY_POP=False, remove_weak_edges=False, threshold=0.1):
    """
    Define a directed graph from a DataFrame with trip counts and optional population normalization.

    Parameters:
    - df: DataFrame containing 'origen', 'destino', and trip data.
    - population_df: DataFrame containing district population with 'district' and 'population' columns.
    - NORMALISE_BY_POP: Boolean, if True, normalizes trip counts by the population of the origin district.
    - remove_weak_edges: Boolean, if True, removes edges with normalized weight below the threshold.
    - threshold: Float, the minimum normalized weight to include an edge.

    Returns:
    - G: A directed graph (networkx.DiGraph).
    - trip_counts: DataFrame with trip count and normalized trip count.
    """
    G = nx.DiGraph()

    # Group by origin and destination, and aggregate trip counts
    trip_counts = df.groupby(['origen', 'destino']).size().reset_index(name='trip_count')

    # Normalize by population if the flag is set
    if NORMALISE_BY_POP and population_df is not None:
        # Merge population data into the trip_counts DataFrame
        trip_counts = trip_counts.merge(population_df, left_on='origen', right_on='ID', how='left')
        # Normalize trip counts by population of the origin district
        trip_counts['normalized_trip_count'] = trip_counts['trip_count'] / trip_counts['Population']
        trip_counts.drop(columns=['ID'], inplace=True)  # removing extra columns
        normalization_column = 'normalized_trip_count'
    else:
        normalization_column = 'trip_count'

    # Normalize trip counts between 0 and 1
    trip_counts['normalized_trip_count'] = (
        (trip_counts[normalization_column] - trip_counts[normalization_column].min()) /
        (trip_counts[normalization_column].max() - trip_counts[normalization_column].min()))

    # Option to remove weak edges below a threshold
    if remove_weak_edges:
        trip_counts = trip_counts[trip_counts['normalized_trip_count'] >= threshold]

    # Add edges to the graph with correct attributes
    for idx, row in trip_counts.iterrows():
        G.add_edge(
            row['origen'], 
            row['destino'], 
            weight=row['normalized_trip_count']
        )
    
    return G, trip_counts



# PLOTTING ---------------------------------------------------------------------------------------------------------

# Set edge attributes like color and width for visualization. NOTE: Color has been removed for ease of analysis
def set_art(G, weight_scale):
    edge_widths = []
    
    # Iterate over the edges and set widths based on the 'weight' attribute
    for u, v, data in G.edges(data=True):
        # Scale the weight to get appropriate edge widths
        width = data['weight'] / weight_scale
        edge_widths.append(max(0.5, width))  # Ensuring a minimum width of 0.5
    
    return edge_widths

def get_in_weights(G):
    in_weights = {}
    for node in G.nodes():
        total_in_weight = round(sum(data['weight'] for u, v, data in G.in_edges(node, data=True)), 2)
        in_weights[node] = total_in_weight
    return in_weights

def get_out_weights(G):
    out_weights = {}
    for node in G.nodes():
        total_out_weight = round(sum(data['weight'] for u, v, data in G.out_edges(node, data=True) if u != v  # Exclude self-loops
        ),2)
        out_weights[node] = total_out_weight
    return out_weights

In [99]:
G, trip_counts = define_graph(filtered_df, district_counts, NORMALISE_BY_POP=True, remove_weak_edges=True, threshold=0.2)
edge_widths = set_art(G, 0.1)
positions = get_positions(gdf) 
positions = {k: tuple(v) for k, v in positions.items()}
nx.set_node_attributes(G, positions, 'pos')

In [100]:
in_weights = get_in_weights(G)
out_weights = get_out_weights(G)

In [101]:
nx.set_node_attributes(G, in_weights, 'size')
edge_widths = set_art(G, 0.1)

In [102]:
# change edge colors
# for u, v, data in G.edges(data=True):
    # if data['weight'] < 0.3:
        # data['color'] = 'azure'  # Assign a single color
    # else:
        # data['color'] = 'fuchsia'

# change node colors
import matplotlib.cm as cm
norm = mcolors.Normalize(vmin=income['Median income per consumption unit'].min(),
                          vmax=income['Median income per consumption unit'].max())

# Create a colormap
colormap = cm.get_cmap('RdPu')  # Choose a colormap (e.g., 'RdYlBu')

# Map normalized values to colors
income['Color'] = income['Median income per consumption unit'].apply(lambda x: mcolors.to_hex(colormap(norm(x))))
color_map = income.set_index('ID')['Color'].to_dict()

  colormap = cm.get_cmap('RdPu')  # Choose a colormap (e.g., 'RdYlBu')


In [104]:
net = Network(notebook=True, directed=True, cdn_resources='remote',height='800px', width="100%", bgcolor="#222222", font_color="white",filter_menu=True )

# Ensure physics is disabled to use fixed positions
net.toggle_physics(False)

# Add nodes with fixed positions, sizes, and colors
for node, pos in positions.items():
    # Get the color for the node from the mapping, default to a neutral color if not found
    color = color_map.get(node, '#cccccc')  # Use '#cccccc' as the default color
    
    # Add node with fixed positions
    net.add_node(
        node,
        x = pos[0],
        y = pos[1],
        label=id_to_name.get(node, str(node)),  # Use a friendly label
        size=in_weights.get(node, 500) * 10,  # Scale size
        color=color  # Assign color based on income
    )

# Ensure edges are added (if not already)
for u, v, data in G.edges(data=True):
    label = f"{data['weight']:.2f}"  # Customize the label format as needed
    net.add_edge(u, v, value=data['weight'], color='grey', label=label)

net.set_options("""
    var options = {
      "nodes": {
        "shape": "dot",
        "scaling": {
          "min": 1,
          "max": 15
        },
        "font": {
          "size": 30,
          "color": "white"
        }
      },
      "edges": {
        "scaling": {
          "min": 1,
          "max": 5
        },
        "font": {
          "size": 20,
          "color": "white",
          "face": "arial"
        },
        "arrows": {
          "to": { 
            "enabled": true
          },
          "scaleFactor": 0.001
        }
      },
      "physics": {
        "barnesHut": {
          "gravitationalConstant": -5000
        },
        "minVelocity": 0.5
      },
      "layout": {
        "improvedLayout": true
      },
      "interaction": {
        "hover": false
      },
      "manipulation": {
        "enabled": false
      },
      "background": {
        "color": "black"
      }
    }
""")


# Display the network

net.save_graph("htmls/labels_02_threshold.html")


In [105]:
legend_html = """
<div style="position: absolute; top: 50%; right: 10px; transform: translateY(-50%);
            background-color: #222; color: white; padding: 15px; border-radius: 5px; text-align: center;">
  <h4 style="margin: 10px 0;">Income Scale</h4>
  <div style="position: relative; width: 20px; height: 200px; background: linear-gradient(to bottom, #67001f, #f4a582, #f7f4f9); margin: 20px auto;">
    <span style="position: absolute; top: -25px; left: 50%; transform: translateX(-50%); font-size: 12px;">High</span>
    <span style="position: absolute; bottom: -25px; left: 50%; transform: translateX(-50%); font-size: 12px;">Low</span>
  </div>
</div>
"""


# Append to PyVis HTML output
with open("htmls/labels_02_threshold.html", "r") as f:
    html_content = f.read()

# Add the legend HTML at the end of the body
html_content = html_content.replace("</body>", legend_html + "</body>")

with open("htmls/labels_02_threshold.html", "w") as f:
    f.write(html_content)