In [17]:
import pandas as pd
import numpy as np

## Reading csv

In [18]:
africa = pd.read_csv("../resources/og/Africa_aggregated_data_up_to-2025-10-18.csv")
asia_pacific = pd.read_csv("../resources/og/Asia-Pacific_aggregated_data_up_to-2025-10-11_0.csv")
europe_central_asia = pd.read_csv("../resources/og/Europe-Central-Asia_aggregated_data_up_to-2025-10-11.csv")
latin_america_caribbean = pd.read_csv("../resources/og/Latin-America-the-Caribbean_aggregated_data_up_to-2025-10-18.csv")
middle_east = pd.read_csv("../resources/og/Middle-East_aggregated_data_up_to-2025-10-18.csv")
us_canada = pd.read_csv("../resources/og/US-and-Canada_aggregated_data_up_to-2025-10-11_0.csv")

## Cleaning

In [19]:
def preprocess_region(df):
    
    df_cleaned = df.dropna(subset=["POPULATION_EXPOSURE"])
    df_cleaned = df_cleaned.drop(columns=[ "REGION","ID", "CENTROID_LATITUDE", "CENTROID_LONGITUDE"])

    countries_to_keep = ["Russia", "Ukraine", "Israel", "Palestine", "Mexico"]

    df_cleaned = df_cleaned[df_cleaned["COUNTRY"].isin(countries_to_keep)]
    
    
    return df_cleaned

ukraine_russia_cleaned = preprocess_region(europe_central_asia)
mexico_cleaned = preprocess_region(latin_america_caribbean)
israel_palestine_cleaned = preprocess_region(middle_east)

ukraine_russia_cleaned.to_csv("../resources/ukraine_russia_cleaned.csv", index=False)
mexico_cleaned.to_csv("../resources/mexico_cleaned.csv", index=False)
israel_palestine_cleaned .to_csv("../resources/israel_palestine_cleaned.csv", index=False)


## Code to preprocess the data for the specific visualizations

In [20]:
sub_event_to_delete = [
    "Protest with intervention", "Peaceful protest", "Agreement",
    "Strategic developments", "Arrests", "Change to group/activity",
    "Disrupted weapons use", "Headquarters or base established",
    "Non-violent transfer of territory", "Other"
]

In [21]:
def prepare_combined_boxplot(df, sub_event_to_delete, output_path):

    df = df[~df["SUB_EVENT_TYPE"].isin(sub_event_to_delete)]
    df = df.groupby(["WEEK", "COUNTRY"], as_index=False)["POPULATION_EXPOSURE"].sum()
    df = df[["COUNTRY", "POPULATION_EXPOSURE"]]
    df.to_csv(output_path, index=False)

    return df

ukraine_russia = prepare_combined_boxplot(ukraine_russia_cleaned, sub_event_to_delete, "../resources/plots/sectiontwo/ukraine_russia.csv")
mexico = prepare_combined_boxplot(mexico_cleaned, sub_event_to_delete, "../resources/plots/sectiontwo/mexico.csv")
israel_palestine = prepare_combined_boxplot(israel_palestine_cleaned, sub_event_to_delete, "../resources/plots/sectiontwo/israel_palestine.csv")

In [22]:
numeric_cols = ["POPULATION_EXPOSURE"]

# Summary statistics
display(ukraine_russia[numeric_cols].describe())
display(mexico[numeric_cols].describe())
display(israel_palestine[numeric_cols].describe())

Unnamed: 0,POPULATION_EXPOSURE
count,588.0
mean,2315943.0
std,2587347.0
min,4059.0
25%,287823.2
50%,872046.0
75%,4569387.0
max,13038940.0


Unnamed: 0,POPULATION_EXPOSURE
count,304.0
mean,8690024.0
std,1561991.0
min,2616779.0
25%,7701322.0
50%,8594174.0
75%,9503968.0
max,13927340.0


Unnamed: 0,POPULATION_EXPOSURE
count,598.0
mean,905089.0
std,1131141.0
min,112.0
25%,134711.2
50%,443532.0
75%,1035493.0
max,5127321.0


###  Ridgeline Plot: 
una riga per conflitto, onde per fatalities e per population exposure

In [23]:
def prepare_combined_ridgelineplot(df, sub_event_to_delete, output_path):

    df = df[~df["SUB_EVENT_TYPE"].isin(sub_event_to_delete)]
    df = df.groupby(["WEEK", "COUNTRY"], as_index=False)[["POPULATION_EXPOSURE", "FATALITIES"]].sum()
    df = df[["COUNTRY", "POPULATION_EXPOSURE", "FATALITIES"]]
    df.to_csv(output_path, index=False)

    return df

ukraine_russia_cleaned= pd.read_csv("../resources/ukraine_russia_cleaned.csv")
mexico_cleaned= pd.read_csv("../resources/mexico_cleaned.csv")
israel_palestine_cleaned = pd.read_csv("../resources/israel_palestine_cleaned.csv")

ridgelineplot_ukraine_russia= prepare_combined_ridgelineplot(ukraine_russia_cleaned, sub_event_to_delete, "../resources/plots/sectiontwo/ridgelineplot_ukraine_russia.csv")
ridgelineplot_mexico = prepare_combined_ridgelineplot(mexico_cleaned, sub_event_to_delete, "../resources/plots/sectiontwo/ridgelineplot_mexico.csv")
ridgelineplot_israel_palestine = prepare_combined_ridgelineplot(israel_palestine_cleaned, sub_event_to_delete, "../resources/plots/sectiontwo/ridgelineplot_israel_palestine.csv")

In [26]:
# Install dependencies if you haven't already
!pip install pandas matplotlib joypy

import matplotlib.pyplot as plt
from joypy import joyplot

# === Load your data ===
ridgelineplot_ukraine_russia = pd.read_csv("../resources/plots/sectiontwo/ridgelineplot_ukraine_russia.csv")
ridgelineplot_mexico = pd.read_csv("../resources/plots/sectiontwo/ridgelineplot_mexico.csv")
ridgelineplot_israel_palestine = pd.read_csv("../resources/plots/sectiontwo/ridgelineplot_israel_palestine.csv")
df = pd.concat(
    [ridgelineplot_ukraine_russia, ridgelineplot_mexico, ridgelineplot_israel_palestine],
    ignore_index=True
)

# === Check your columns ===
print(df.head())

# Example columns â€” adjust these if your CSV differs
# Assume:
#   - "WEEK" is the time variable (x-axis)
#   - "COUNTRY" is the category (each ridge)
#   - "FATALITIES" is the value to plot

# === Make the ridgeline plot ===
plt.figure(figsize=(12, 8))
joyplot(
    data=df,
    by="COUNTRY",               # grouping variable
    column="POPULATION_EXPOSURE",        # numeric value
    figsize=(12, 8),
    title="Ridgeline Plot of Fatalities by Country",
    colormap=plt.cm.viridis,    # choose a color map (optional)
    linewidth=1
)

plt.xlabel("Fatalities")
plt.ylabel("Country")
plt.tight_layout()
plt.show()

Collecting joypy
  Downloading joypy-0.2.6-py2.py3-none-any.whl.metadata (812 bytes)
Downloading joypy-0.2.6-py2.py3-none-any.whl (8.6 kB)
Installing collected packages: joypy
Successfully installed joypy-0.2.6


ModuleNotFoundError: No module named 'joypy'