In [19]:
import pandas as pd
import numpy as np

<h3>Reading csv</h3>

In [20]:
africa = pd.read_csv("resources/og/Africa_aggregated_data_up_to-2025-10-18.csv")
asia_pacific = pd.read_csv("resources/og/Asia-Pacific_aggregated_data_up_to-2025-10-11_0.csv")
europe_central_asia = pd.read_csv("resources/og/Europe-Central-Asia_aggregated_data_up_to-2025-10-11.csv")
latin_america_caribbean = pd.read_csv("resources/og/Latin-America-the-Caribbean_aggregated_data_up_to-2025-10-18.csv")
middle_east = pd.read_csv("resources/og/Middle-East_aggregated_data_up_to-2025-10-18.csv")
us_canada = pd.read_csv("resources/og/US-and-Canada_aggregated_data_up_to-2025-10-11_0.csv")

<h3>Cleaning</h3>

In [21]:
def preprocess_region(df):
    df_cleaned = df.dropna(subset=["POPULATION_EXPOSURE"])
    df_cleaned = df_cleaned.drop(columns=["WEEK", "COUNTRY", "ADMIN1", "FATALITIES", "ID", "CENTROID_LATITUDE", "CENTROID_LONGITUDE"])
    
    return df_cleaned

africa_cleaned = preprocess_region(africa)
asia_pacific_cleaned = preprocess_region(asia_pacific)
europe_central_asia_cleaned = preprocess_region(europe_central_asia)
latin_america_caribbean_cleaned = preprocess_region(latin_america_caribbean)
middle_east_cleaned = preprocess_region(middle_east)
us_canada_cleaned = preprocess_region(us_canada)

africa_cleaned.to_csv("resources/africa_cleaned.csv", index=False)
asia_pacific_cleaned.to_csv("resources/asia_pacific_cleaned.csv", index=False)
europe_central_asia_cleaned.to_csv("resources/europe_central_asia_cleaned.csv", index=False)
latin_america_caribbean_cleaned.to_csv("resources/latin_america_caribbean_cleaned.csv", index=False)
middle_east_cleaned.to_csv("resources/middle_east_cleaned.csv", index=False)
us_canada_cleaned.to_csv("resources/us_canada_cleaned.csv", index=False)

### Code to preprocess the data for the specific visualizations

In [22]:
# Bar chart: events per region

# AFRICA
africa_bar = africa_cleaned["EVENTS"].sum()
# ASIA PACIFIC
asia_pacific_bar = asia_pacific_cleaned["EVENTS"].sum()
# EUROPE CENTRAL ASIA
europe_central_asia_bar = europe_central_asia_cleaned["EVENTS"].sum()
# LATIN AMERICA CARRIBEAN
latin_america_caribbean_bar = latin_america_caribbean_cleaned["EVENTS"].sum()
# MIDDLE EAST
middle_east_bar = middle_east_cleaned["EVENTS"].sum()
# US CANADA
us_canada_bar = us_canada_cleaned["EVENTS"].sum()

# Create the dataframe and save it as csv
bar_data = pd.DataFrame({
	"Region": ["Africa", "Asia Pacific", "Europe Central Asia", "Latin America Caribbean", "Middle East", "US Canada"],
	"Total Events": [africa_bar, asia_pacific_bar, europe_central_asia_bar, latin_america_caribbean_bar, middle_east_bar, us_canada_bar]
})

bar_data.to_csv("resources/plots/bar_data.csv", index=False)

In [23]:
# Grouped bar chart: battles vs violence against civilians per region

# AFRICA
africa_cleaned_grouped = africa_cleaned.groupby("EVENT_TYPE")["EVENTS"].sum()
africa_battles = africa_cleaned_grouped.get("Battles", 0)
africa_violence_against_civilians = africa_cleaned_grouped.get("Violence against civilians", 0)

# ASIA PACIFIC
asia_pacific_cleaned_grouped = asia_pacific_cleaned.groupby("EVENT_TYPE")["EVENTS"].sum()
asia_pacific_battles = asia_pacific_cleaned_grouped.get("Battles", 0)
asia_pacific_violence_against_civilians = asia_pacific_cleaned_grouped.get("Violence against civilians", 0)

# EUROPE CENTRAL ASIA
europe_central_asia_cleaned_grouped = europe_central_asia_cleaned.groupby("EVENT_TYPE")["EVENTS"].sum()
europe_central_asia_battles = europe_central_asia_cleaned_grouped.get("Battles", 0)
europe_central_asia_violence_against_civilians = europe_central_asia_cleaned_grouped.get("Violence against civilians", 0)

# LATIN AMERICA CARRIBEAN
latin_america_caribbean_grouped = latin_america_caribbean_cleaned.groupby("EVENT_TYPE")["EVENTS"].sum()
latin_america_caribbean_battles = latin_america_caribbean_grouped.get("Battles", 0)
latin_america_caribbean_violence_against_civilians = latin_america_caribbean_grouped.get("Violence against civilians", 0)

# MIDDLE EAST
middle_east_grouped = middle_east_cleaned.groupby("EVENT_TYPE")["EVENTS"].sum()
middle_east_battles = middle_east_grouped.get("Battles", 0)
middle_east_violence_against_civilians = middle_east_grouped.get("Violence against civilians", 0)

# US CANADA
us_canada_grouped = us_canada_cleaned.groupby("EVENT_TYPE")["EVENTS"].sum()
us_canada_battles = us_canada_grouped.get("Battles", 0)
us_canada_violence_against_civilians = us_canada_grouped.get("Violence against civilians", 0)

# Create the dataframe and save it as csv
grouped_bar_data = pd.DataFrame({
	"Region": ["Africa", "Asia Pacific", "Europe Central Asia", "Latin America Caribbean", "Middle East", "US Canada"],
	"Battles": [africa_battles, asia_pacific_battles, europe_central_asia_battles, latin_america_caribbean_battles, middle_east_battles, us_canada_battles],
	"Violence Against Civilians": [africa_violence_against_civilians, asia_pacific_violence_against_civilians, europe_central_asia_violence_against_civilians, 
                                latin_america_caribbean_violence_against_civilians, middle_east_violence_against_civilians, us_canada_violence_against_civilians]
})

grouped_bar_data.to_csv("resources/plots/grouped_bar_data.csv", index=False)

In [None]:
# Heatmap: population exposure based on the region and the type of event

# AFRICA
