In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
import os

In [2]:
# Loading Data 

output_dir = "outputs"

csv_path = os.path.join(output_dir, "OD_ADULTS_Timeindex_Motive_location_tripexpansionfactors.csv")
wide_df = pd.read_csv((csv_path), header=[0,1], low_memory=False)

od_path = os.path.join(output_dir, "cleandbf.csv")
od_df = pd.read_csv(od_path)

gdf_path = os.path.join(output_dir, "H3_res9/Municipios_2023_h3_res9.shp")
hex_gdf = gpd.read_file(gdf_path)

urbanised_gdf = gpd.read_file("ucl-mres-thesis/SPMA Urbanised Areas-MapBiomas/2022/AU_2022.shp")
municipal_gdf = gpd.read_file("ucl-mres-thesis/Municipal boundaries-ShapeFiles/Municipios_2023.shp")


In [3]:
# CRS Alignment

if 'h3_index' in hex_gdf.columns:
    hex_gdf = hex_gdf.rename(columns={'h3_index':'hexagon'})
hex_gdf['hexagon'] = hex_gdf['hexagon'].astype(str)

if hex_gdf.crs != urbanised_gdf.crs:
    urbanised_gdf = urbanised_gdf.to_crs(hex_gdf.crs)
if hex_gdf.crs != municipal_gdf.crs:
    municipal_gdf = municipal_gdf.to_crs(hex_gdf.crs)


In [4]:
# Economic Group and Motive Preparation
def econ_group(code):
    code = int(code)
    if code == 1:
        return 'High (A)'
    elif code in [2, 3]:
        return 'Upper middle (B1/B2)'
    elif code in [4, 5]:
        return 'Lower middle (C1/C2)'
    elif code == 6:
        return 'Low (D/E)'
    else:
        return 'Unknown'


od_df['econ_group'] = od_df['CRITERIOBR'].astype(int).apply(econ_group)
od_df['ID_PESS'] = od_df['ID_PESS'].astype(str)
id_to_econ = od_df[['ID_PESS', 'econ_group']].drop_duplicates().set_index('ID_PESS')['econ_group']

wide_df[('ID_PESS', 'Unnamed: 0_level_1')] = wide_df[('ID_PESS', 'Unnamed: 0_level_1')].astype(str)

wide_df['econ_group'] = wide_df[('ID_PESS', 'Unnamed: 0_level_1')].map(id_to_econ)

motive_df = wide_df.xs('motive', level=1, axis=1)
fevia_df = wide_df.xs('tripexpfactor', level=1, axis=1)


In [5]:
# Calculating Normalised All-day Density for Both Groups 
sel_groups = ['High (A)','Upper middle (B1/B2)','Lower middle (C1/C2)','Low (D/E)']
result = {}
urban_hex_gdf = gpd.sjoin(hex_gdf, urbanised_gdf, how='inner', predicate='intersects').drop(columns=['index_right'])
urban_hex_set = set(urban_hex_gdf['hexagon'].values)

for group in sel_groups:
    group_wide = wide_df[wide_df['econ_group'] == group]
    motive_mat = group_wide.xs('motive', level=1, axis=1)
    fevia_mat = group_wide.xs('tripexpfactor', level=1, axis=1)
    
    # Removing the mask, calculating for all motives
    fevia_all = fevia_mat.replace('transit', np.nan).apply(pd.to_numeric, errors='coerce')
    fevia_all = fevia_all.fillna(0)
    hex_mat = group_wide.xs('hexagon', level=1, axis=1)
    fulltrip_long = []
    for i in range(96):
        hex_col = hex_mat.iloc[:,i]
        expf_col = fevia_all.iloc[:,i]
        df = pd.DataFrame({'hexagon': hex_col.astype(str), 'expf': expf_col})
        df = df[df['hexagon'] != 'transit']
        fulltrip_long.append(df.groupby('hexagon')['expf'].sum())
    all_time_hex_density = pd.concat(fulltrip_long, axis=1).sum(axis=1)
    all_time_hex_density = all_time_hex_density[all_time_hex_density.index.isin(urban_hex_set)]
    total = all_time_hex_density.sum()
    norm_density = all_time_hex_density / total if total > 0 else all_time_hex_density
    result[group] = norm_density

# Defining Strongholds
urban_hex_gdf = urban_hex_gdf.set_index('hexagon')
urban_hex_gdf['HighA_density'] = result['High (A)'].fillna(0)
urban_hex_gdf['UpperMiddle_density'] = result['Upper middle (B1/B2)'].fillna(0)
urban_hex_gdf['LowerMiddle_density'] = result['Lower middle (C1/C2)'].fillna(0)
urban_hex_gdf['LowDE_density'] = result['Low (D/E)'].fillna(0)

highA_75 = urban_hex_gdf['HighA_density'].quantile(0.75)
uppermid_B1B2_75 = urban_hex_gdf['UpperMiddle_density'].quantile(0.75)
lowermidC1C2_75 = urban_hex_gdf['LowerMiddle_density'].quantile(0.75)
lowDE_75 = urban_hex_gdf['LowDE_density'].quantile(0.75)


In [6]:
# Creating boolean masks for each group
highA_mask = urban_hex_gdf['HighA_density'] > highA_75
uppermid_mask = urban_hex_gdf['UpperMiddle_density'] > uppermid_B1B2_75
lowermid_mask = urban_hex_gdf['LowerMiddle_density'] > lowermidC1C2_75
lowDE_mask = urban_hex_gdf['LowDE_density'] > lowDE_75


In [7]:
import matplotlib.pyplot as plt
from matplotlib.patches import Patch

fig, ax = plt.subplots(figsize=(16, 14))
urbanised_gdf.plot(ax=ax, facecolor='black', edgecolor='gray', linewidth=0.2, zorder=1)

# Plotting only hexes above threshold in a single colour for each group
urban_hex_gdf[lowDE_mask].plot(
    ax=ax, color='red', edgecolor='red', linewidth=0.18, alpha=1, zorder=2
)
urban_hex_gdf[lowermid_mask].plot(
    ax=ax, color='purple', edgecolor='purple', linewidth=0.18, alpha=1, zorder=2
)
urban_hex_gdf[uppermid_mask].plot(
    ax=ax, color='green', edgecolor='green', linewidth=0.18, alpha=1, zorder=2
)
urban_hex_gdf[highA_mask].plot(
    ax=ax, color='blue', edgecolor='blue', linewidth=0.18, alpha=1, zorder=2
)

# Adding municipal boundaries
municipal_gdf.boundary.plot(ax=ax,facecolor='lightgray', color='white', linewidth=0.1, zorder=0)

# Legend
handles = [
    Patch(facecolor='blue', edgecolor='black', label='High (A)'),
    Patch(facecolor='green', edgecolor='black', label='Upper middle (B1/B2)'),
    Patch(facecolor='purple', edgecolor='black', label='Lower middle (C1/C2)'),
    Patch(facecolor='red', edgecolor='black', label='Low (D/E)')
]
ax.legend(handles=handles, loc='upper right', frameon=True, framealpha=1, fontsize=14, title="Group", title_fontsize=16)

plt.title('Hexagons Above 75th Percentile for Each Economic Group', fontsize=17)
plt.axis('off')
plt.tight_layout()
plt.savefig("econgroup_75p_hexagons_singlecolors.png", dpi=300, bbox_inches='tight', pad_inches=0.1)
plt.close()


In [8]:
# Loading entropy results
econ_entropy_path = os.path.join(output_dir, 'econ_entropy.csv')
econ_entropy = pd.read_csv(econ_entropy_path)

motive_entropy_path = os.path.join(output_dir,'motive_entropy.csv')
motive_entropy = pd.read_csv(motive_entropy_path)

In [9]:
# Combining trip motive entropies with stronghold hexes

highA_hexes    = urban_hex_gdf[highA_mask].index.tolist()
uppermid_hexes = urban_hex_gdf[uppermid_mask].index.tolist()
lowermid_hexes = urban_hex_gdf[lowermid_mask].index.tolist()
lowDE_hexes    = urban_hex_gdf[lowDE_mask].index.tolist()

main_hexes_highA_tm = urban_hex_gdf.loc[highA_hexes].join(
    motive_entropy.set_index('hexagon')[['motive_entropy']], how='inner'
)
main_hexes_upper_tm = urban_hex_gdf.loc[uppermid_hexes].join(
    motive_entropy.set_index('hexagon')[['motive_entropy']], how='inner'
)
main_hexes_lower_tm = urban_hex_gdf.loc[lowermid_hexes].join(
    motive_entropy.set_index('hexagon')[['motive_entropy']], how='inner'
)
main_hexes_lowDE_tm = urban_hex_gdf.loc[lowDE_hexes].join(
    motive_entropy.set_index('hexagon')[['motive_entropy']], how='inner'
)

In [10]:
# Trip motive Entropy results in strongholds
print("Entropy in High (A) strongholds:\n", main_hexes_highA_tm['motive_entropy'].describe())
print("Entropy in Upper middle (B1/B2) strongholds:\n", main_hexes_upper_tm['motive_entropy'].describe())
print("Entropy in Lower middle (C1/C2) strongholds:\n", main_hexes_lower_tm['motive_entropy'].describe())
print("Entropy in Low (D/E) strongholds:\n", main_hexes_lowDE_tm['motive_entropy'].describe())


Entropy in High (A) strongholds:
 count    744.000000
mean       0.595728
std        0.600763
min       -0.000000
25%       -0.000000
50%        0.543492
75%        0.999572
max        2.268424
Name: motive_entropy, dtype: float64
Entropy in Upper middle (B1/B2) strongholds:
 count    1937.000000
mean        0.562436
std         0.613334
min        -0.000000
25%         0.000000
50%         0.386665
75%         0.999328
max         2.464847
Name: motive_entropy, dtype: float64
Entropy in Lower middle (C1/C2) strongholds:
 count    2734.000000
mean        0.460985
std         0.578567
min        -0.000000
25%         0.000000
50%        -0.000000
75%         0.912678
max         2.464847
Name: motive_entropy, dtype: float64
Entropy in Low (D/E) strongholds:
 count    656.000000
mean       0.350407
std        0.535732
min       -0.000000
25%       -0.000000
50%       -0.000000
75%        0.693419
max        2.217557
Name: motive_entropy, dtype: float64


In [11]:
import matplotlib.pyplot as plt

# Calculating overall min/max of motive_entropy for vmin/vmax
vmin = min(
    main_hexes_highA_tm['motive_entropy'].min(),
    main_hexes_upper_tm['motive_entropy'].min(),
    main_hexes_lower_tm['motive_entropy'].min(),
    main_hexes_lowDE_tm['motive_entropy'].min()
)
vmax = max(
    main_hexes_highA_tm['motive_entropy'].max(),
    main_hexes_upper_tm['motive_entropy'].max(),
    main_hexes_lower_tm['motive_entropy'].max(),
    main_hexes_lowDE_tm['motive_entropy'].max()
)

# Plotting with fixed vmin/vmax for legend scale
for df, name, cmap in [
    (main_hexes_highA_tm, 'High (A)', 'Blues'),
    (main_hexes_upper_tm, 'Upper middle (B1/B2)', 'Greens'),
    (main_hexes_lower_tm, 'Lower middle (C1/C2)', 'Purples'),
    (main_hexes_lowDE_tm, 'Low (D/E)', 'Reds')
]:
    fig, ax = plt.subplots(figsize=(13, 12))
    urbanised_gdf.plot(ax=ax, facecolor='black', edgecolor='gray', linewidth=0.2, zorder=1)
    df.plot(
        column='motive_entropy',
        cmap=cmap,
        edgecolor='black',
        alpha=1,
        linewidth=0.12,
        legend=True,
        ax=ax,
        zorder=2,
        vmin=vmin,
        vmax=vmax,
        legend_kwds={"label": "Trip Motive Entropy",
                     'shrink': 0.5  # halves the legend size
                    }
    )
    municipal_gdf.plot(ax=ax, facecolor='lightgray', edgecolor='white', linewidth=0.2, zorder=0)
    ax.set_title(f"Trip Motive Entropy in {name} Residential Strongholds", fontsize=16)
    ax.axis('off')
    plt.tight_layout()
    plt.savefig(f"motive_entropy_stronghold_{name.replace(' ','_').replace('(','').replace(')','').replace('/','_')}.png", dpi=300, bbox_inches='tight', pad_inches=0.1)
    plt.close()


In [12]:
# Combining economic group entropies with stronghold hexes

main_hexes_highA_econ = econ_entropy[econ_entropy['hexagon'].isin(highA_hexes)]
main_hexes_upper_econ = econ_entropy[econ_entropy['hexagon'].isin(uppermid_hexes)]
main_hexes_lower_econ = econ_entropy[econ_entropy['hexagon'].isin(lowermid_hexes)]
main_hexes_lowDE_econ = econ_entropy[econ_entropy['hexagon'].isin(lowDE_hexes)]

main_hexes_highA_econ = urban_hex_gdf.loc[highA_hexes].join(
    econ_entropy.set_index('hexagon')[['econ_entropy']], how='inner'
)
main_hexes_upper_econ = urban_hex_gdf.loc[uppermid_hexes].join(
    econ_entropy.set_index('hexagon')[['econ_entropy']], how='inner'
)
main_hexes_lower_econ = urban_hex_gdf.loc[lowermid_hexes].join(
    econ_entropy.set_index('hexagon')[['econ_entropy']], how='inner'
)
main_hexes_lowDE_econ = urban_hex_gdf.loc[lowDE_hexes].join(
    econ_entropy.set_index('hexagon')[['econ_entropy']], how='inner'
)

In [13]:
print("Entropy in High (A) strongholds:\n", main_hexes_highA_econ['econ_entropy'].describe())
print("Entropy in Upper middle (B1/B2) strongholds:\n", main_hexes_upper_econ['econ_entropy'].describe())
print("Entropy in Lower middle (C1/C2) strongholds:\n", main_hexes_lower_econ['econ_entropy'].describe())
print("Entropy in Low (D/E) strongholds:\n", main_hexes_lowDE_econ['econ_entropy'].describe())


Entropy in High (A) strongholds:
 count    744.000000
mean       0.839216
std        0.564116
min       -0.000000
25%        0.199674
50%        0.930896
75%        1.308390
max        1.941788
Name: econ_entropy, dtype: float64
Entropy in Upper middle (B1/B2) strongholds:
 count    1937.000000
mean        0.754782
std         0.498509
min        -0.000000
25%         0.329537
50%         0.890776
75%         1.028968
max         1.941788
Name: econ_entropy, dtype: float64
Entropy in Lower middle (C1/C2) strongholds:
 count    2734.000000
mean        0.507464
std         0.522703
min        -0.000000
25%         0.000000
50%         0.437375
75%         0.970514
max         1.940829
Name: econ_entropy, dtype: float64
Entropy in Low (D/E) strongholds:
 count    656.000000
mean       0.722263
std        0.576162
min       -0.000000
25%        0.000000
50%        0.875852
75%        1.168448
max        1.941788
Name: econ_entropy, dtype: float64


In [14]:
import matplotlib.pyplot as plt

# Calculating global min/max for econ_entropy across all groups for consistent colorbar/legend
vmin = min(
    main_hexes_highA_econ['econ_entropy'].min(),
    main_hexes_upper_econ['econ_entropy'].min(),
    main_hexes_lower_econ['econ_entropy'].min(),
    main_hexes_lowDE_econ['econ_entropy'].min()
)
vmax = max(
    main_hexes_highA_econ['econ_entropy'].max(),
    main_hexes_upper_econ['econ_entropy'].max(),
    main_hexes_lower_econ['econ_entropy'].max(),
    main_hexes_lowDE_econ['econ_entropy'].max()
)

# Plotting each group's stronghold map with fixed vmin/vmax for direct comparison
for df, name, cmap in [
    (main_hexes_highA_econ, 'High (A)', 'Blues'),
    (main_hexes_upper_econ, 'Upper middle (B1/B2)', 'Greens'),
    (main_hexes_lower_econ, 'Lower middle (C1/C2)', 'Purples'),
    (main_hexes_lowDE_econ, 'Low (D/E)', 'Reds')
]:
    fig, ax = plt.subplots(figsize=(13, 12))
    urbanised_gdf.plot(ax=ax, facecolor='black', linewidth=0.2, zorder=1)
    df.plot(
        column='econ_entropy',
        cmap=cmap,
        edgecolor='black',
        alpha=1,
        linewidth=0.12,
        legend=True,
        ax=ax,
        zorder=2,
        vmin=vmin,
        vmax=vmax,
        legend_kwds={'label': "Economic Group Entropy",
        'shrink': 0.5  # halves the legend size
                    }
    )
    municipal_gdf.plot(ax=ax, facecolor='lightgray', edgecolor='white', linewidth=0.2, zorder=0)
    ax.set_title(f"Economic Entropy in {name} Residential Strongholds", fontsize=16)
    ax.axis('off')
    plt.tight_layout()
    plt.savefig(f"econ_entropy_stronghold_{name.replace(' ','_').replace('(','').replace(')','').replace('/','_')}.png", dpi=300, bbox_inches='tight', pad_inches=0.1)
    plt.close()