In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from matplotlib import colormaps
import geopandas as gpd
from scipy.stats import linregress
from scipy.stats import norm
import seaborn as sns
import colorcet as cc
import pandas as pd

In [2]:
"""
Every data set is loaded with each of the individual risks.
"""

jan = gpd.read_file("./Data/df_comarques_total.gpkg")
javi = pd.read_csv("./Data/javi.csv", sep=",", encoding="utf-8-sig")
german_df = pd.read_csv("./Data/german.csv", sep=",", encoding="utf-8-sig")
eulalia = pd.read_csv("./Data/eulalia.csv", sep=",", encoding="utf-8-sig")
ale_df = pd.read_csv("./Data/ale.csv", sep=",", encoding="utf-8-sig")

javi_df = javi[["comarca", "porc_norm"]]
ale_df["indice_dist"] = 1-ale_df["indice_dist"]




In [5]:
"""
We noramlise values to a (0,1) scale for better risk assesment.
"""
def normalize_percentile_trim(df, col, low_pct=0, high_pct=100):
    values = df[col].astype(float).values

    if len(values) < 3:
        raise ValueError("Need at least 3 values for percentile trimming.")

    low = np.percentile(values, low_pct)
    high = np.percentile(values, high_pct)
    if low == high:
        raise ValueError("Percentile bounds collapsed to a single value.")

    # Clip values to percentile range
    clipped = np.clip(values, low, high)

    # Min-max normalize to 0-1
    scale = high
    if scale == 0:
        scale = 1e-9

    normalized = np.clip(clipped / scale, 0, 1)

    norm_col = f"{col}_norm"
    df[norm_col] = normalized

    return df[["comarca", norm_col]]

jan_total = normalize_percentile_trim(jan, "total_coverage", high_pct = 98)
jan_ohca = normalize_percentile_trim(jan, "ohca rel")

jan_df = jan_total.merge(
    jan_ohca[['comarca', 'ohca rel_norm']],  # only the column you want
    on='comarca',
    how='left'  # keep all rows from jan_df
)

In [6]:
"""
We compute a very simple risk index related to our data. We assume that vehicles have a 1 risk as we never know if we
could be close to them or not.
"""

access_map = {
'High': 0,
'Medium': 0.5,
'Low': 1,
'Vehicle': 1
}

# Map the values
eulalia['access_value'] = eulalia['accessibilitat'].map(access_map)

df_grouped = eulalia.groupby('comarca').agg(
accessibility_sum=('access_value', 'sum'),
count=('access_value', 'count')
).reset_index()
df_grouped['acces norm'] = df_grouped['accessibility_sum'] / df_grouped['count']


eulalia_df = df_grouped[["comarca", "acces norm"]]
eulalia_df = normalize_percentile_trim(eulalia_df,"acces norm")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[norm_col] = normalized


In [7]:
"""
Text normalisation to avoid mispelled data
"""
import unicodedata

def normalize(text):
    if isinstance(text, str):
        text = text.lower().strip()
        text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('utf-8')
        text = text.replace('-', ' ').replace("'", "").replace("’", "")
        if text == "val daran":
            return "aran"
        return text
    return text

jan_df['comarca'] = jan_df['comarca'].apply(normalize)
javi_df['comarca'] = javi_df['comarca'].apply(normalize)
eulalia_df['comarca'] = eulalia_df['comarca'].apply(normalize)
german_df['comarca'] = german_df['comarca'].apply(normalize)
ale_df['comarca'] = ale_df['comarca'].apply(normalize)

name_map = dict(zip(javi_df["comarca"], javi["comarca"]))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  javi_df['comarca'] = javi_df['comarca'].apply(normalize)


In [8]:
"""
We eliminate the llucanes comarca, and continue.
"""
javi_df = javi_df[javi_df['comarca'] != 'llucanes']
jan_df = jan_df[jan_df['comarca'] != 'llucanes']
german_df = german_df[german_df['comarca'] != 'llucanes']
eulalia_df = eulalia_df[eulalia_df['comarca'] != 'llucanes']
ale_df = ale_df[ale_df['comarca'] != 'llucanes']

final = jan_df.merge(javi_df, on='comarca', how='outer')
final = final.merge(german_df, on='comarca', how='outer')
final = final.merge(eulalia_df, on='comarca', how='outer')
final = final.merge(ale_df, on='comarca', how='outer')

#We invert the total coverage norm so that a 1 indicates a higher risk
final["total_coverage_norm"] = 1-final["total_coverage_norm"]
final

Unnamed: 0,comarca,total_coverage_norm,ohca rel_norm,porc_norm,indice_pca_norm,acces norm_norm,indice_dist
0,alt camp,0.710322,0.486476,0.28721,0.537578,0.592171,0.363305
1,alt emporda,0.649115,0.537276,0.238623,0.469938,0.920784,0.298092
2,alt penedes,0.644228,0.47848,0.185105,0.425215,0.54279,0.2378
3,alt urgell,0.474923,0.825703,0.49218,0.566693,0.443346,0.97391
4,alta ribagorca,0.033099,0.438536,0.310331,0.344013,0.25266,0.87332
5,anoia,0.435495,0.52735,0.208685,0.366418,0.673949,0.320887
6,aran,0.436933,0.357552,0.050329,0.231923,0.293411,0.520633
7,bages,0.759746,0.661825,0.361998,0.358903,0.669842,0.376067
8,baix camp,0.812002,0.458543,0.206711,0.344675,0.525022,0.363968
9,baix ebre,0.702285,0.729196,0.489898,0.810657,0.497777,0.531566


In [9]:
"""
We rename the columns and map the comarca names to original names
"""
rename_map = {
    "total_coverage_norm" : "cov_risk",
    "ohca rel_norm": "ohca_risk",
    "porc_norm": "age_risk",
    "indice_pca_norm": "poverty_risk",
    "acces norm_norm": "acessibility_risk",
    "indice_dist": "distance_risk"
}

final = final.rename(columns=rename_map)

valid_comarques = set(final["comarca"])

# Keep only the keys that match comarques in `final`
name_map = {k: v for k, v in name_map.items() if k in valid_comarques}

final["comarca"] = final["comarca"].map(name_map)
final

Unnamed: 0,comarca,cov_risk,ohca_risk,age_risk,poverty_risk,acessibility_risk,distance_risk
0,Alt Camp,0.710322,0.486476,0.28721,0.537578,0.592171,0.363305
1,Alt Empordà,0.649115,0.537276,0.238623,0.469938,0.920784,0.298092
2,Alt Penedès,0.644228,0.47848,0.185105,0.425215,0.54279,0.2378
3,Alt Urgell,0.474923,0.825703,0.49218,0.566693,0.443346,0.97391
4,Alta Ribagorça,0.033099,0.438536,0.310331,0.344013,0.25266,0.87332
5,Anoia,0.435495,0.52735,0.208685,0.366418,0.673949,0.320887
6,Val d'Aran,0.436933,0.357552,0.050329,0.231923,0.293411,0.520633
7,Bages,0.759746,0.661825,0.361998,0.358903,0.669842,0.376067
8,Baix Camp,0.812002,0.458543,0.206711,0.344675,0.525022,0.363968
9,Baix Ebre,0.702285,0.729196,0.489898,0.810657,0.497777,0.531566


In [11]:
"""
The following function plots a spyder hexagonal plot for a number of comarques showcasing the risk associated
to every comarca.
"""


def fifa_multi(final, row_names, risks, labels, colors=None):
    num_vars = len(labels)
    angles = np.linspace(0, 2*np.pi, num_vars, endpoint=False)

    # Outer reference polygon
    x_outer = np.cos(angles)
    y_outer = np.sin(angles)
    x_outer = np.append(x_outer, x_outer[0])
    y_outer = np.append(y_outer, y_outer[0])

    if colors is None:
        colors = plt.cm.tab10(np.linspace(0, 1, len(row_names)))

    fig, ax = plt.subplots(figsize=(10,10))

    # Draw outer polygon
    ax.plot(x_outer, y_outer, color='grey', linewidth=1, linestyle='--')
    ax.fill(x_outer, y_outer, color='grey', alpha=0.05)

    grid_vals = np.arange(0.2, 1.01, 0.2)
    
    for r in grid_vals:
        xs = np.cos(angles) * r
        ys = np.sin(angles) * r
        xs = np.append(xs, xs[0])
        ys = np.append(ys, ys[0])
    
        ax.plot(xs, ys, color='lightgrey', linewidth=0.8, linestyle='--')

    #  Plot each comarca polygon
    for idx, row_name in enumerate(row_names):
        row = final[final["comarca"] == row_name]

        if row.empty:
            print(f"Warning: '{row_name}' not found in dataframe.")
            continue

        vals = row[risks].values.flatten().astype(float)

        x = np.cos(angles) * vals
        y = np.sin(angles) * vals
        x = np.append(x, x[0])
        y = np.append(y, y[0])

        ax.plot(x, y, color=colors[idx], linewidth=2, label=row_name)
        ax.fill(x, y, color=colors[idx], alpha=0.15)

    for angle in angles:
        ax.plot([0, np.cos(angle)], [0, np.sin(angle)], 
                color='grey', linewidth=0.8, linestyle='--')

    for i in range(num_vars):
        angle = angles[i]
        x_text = np.cos(angle) * 1.15
        y_text = np.sin(angle) * 1.15
        ax.text(x_text, y_text, labels[i], ha='center', va='center', fontsize=10)

    ax.set_aspect('equal')
    ax.axis('off')

    ax.legend(
        loc='center left',
        bbox_to_anchor=(0.9, 0.9),
        title="Comarques"
    )
    plt.tight_layout()
    plt.savefig("./plots/comarques_fifa.png")
    plt.close()

risks = ["cov_risk", "ohca_risk", "age_risk", "poverty_risk", "acessibility_risk", "distance_risk"]
labels = ["Coverage", "OHCA", "> 65 years", "Poverty", "Acessability", "Distance"]

comarques_to_plot = ["Val d'Aran", "Ribera d'Ebre"]

fifa_multi(final, comarques_to_plot, risks, labels)

In [10]:
"""
We finally plot a spiral bar plot with all risk per comarca, which would showcase the diversity and how some comarcas
have a worse risk plus AED coverage total. 
"""
risks = ["acessibility_risk", "distance_risk", "ohca_risk", "poverty_risk", "age_risk", "cov_risk"]
labels = ["Acessability", "Distance", "OHCA", "Poverty", "> 65 years", "Coverage"]


final["total_risk"] = final[risks].sum(axis=1)
final_sorted = final.sort_values("total_risk", ascending=True)
risk_values = final_sorted[risks].values
comarques = final_sorted["comarca"].values
num_comarques = len(comarques)

theta = np.linspace(0, 2*np.pi, num_comarques, endpoint=False)
width = 2*np.pi/num_comarques * 0.9

colors = plt.cm.viridis(np.linspace(0, 1, len(risks)))

fig, ax = plt.subplots(figsize=(10,10), subplot_kw=dict(polar=True))

ax.grid(False)

ax.set_yticklabels([])
"""
circle_radii = [2, 4]
theta_full = np.linspace(0, 2*np.pi, 500)

for r in circle_radii:
    ax.plot(theta_full, np.full_like(theta_full, r),
            linestyle='--', linewidth=0.8, color='black', alpha=0.6)
"""
ax.set_xticklabels([])
ax.spines['polar'].set_visible(False)

ax.set_theta_zero_location("E")  
ax.set_theta_direction(1)        

for i, risk in enumerate(risks):
    bottom = risk_values[:, :i].sum(axis=1) if i > 0 else None
    ax.bar(theta, risk_values[:, i], width=width, bottom=bottom,
           color=colors[i], edgecolor='k', alpha=0.8, label=risk)

# Add labels along bars with flipping on left side, for easier reading
offset = 0.2  
for i, angle in enumerate(theta):
    r_end = risk_values[i, :].sum() + offset
    
    angle_deg = np.degrees(angle)
    
    # Flip text if on left side
    if 90 < angle_deg < 270:
        rotation = angle_deg + 180
        ha = 'right'
    else:
        rotation = angle_deg
        ha = 'left'
    
    ax.text(angle, r_end, comarques[i],
            rotation=rotation,
            rotation_mode='anchor',
            ha=ha, va='center', fontsize=8)
    
#ax.set_title("Stacked Risk per Comarca", fontsize=14)
ax.legend(title="Risks", labels=labels, loc="upper right", bbox_to_anchor=(1.1, 0.9))
plt.savefig("./plots/total.png")
plt.close()

In [12]:
final.to_csv("./data/final.csv", index=False, encoding="utf-8")