In [2]:
import pandas as pd
import numpy as np

## Reading csv

In [3]:
africa = pd.read_csv("resources/og/Africa_aggregated_data_up_to-2025-10-18.csv")
asia_pacific = pd.read_csv("resources/og/Asia-Pacific_aggregated_data_up_to-2025-10-11_0.csv")
europe_central_asia = pd.read_csv("resources/og/Europe-Central-Asia_aggregated_data_up_to-2025-10-11.csv")
latin_america_caribbean = pd.read_csv("resources/og/Latin-America-the-Caribbean_aggregated_data_up_to-2025-10-18.csv")
middle_east = pd.read_csv("resources/og/Middle-East_aggregated_data_up_to-2025-10-18.csv")
us_canada = pd.read_csv("resources/og/US-and-Canada_aggregated_data_up_to-2025-10-11_0.csv")

## Cleaning

In [4]:
def preprocess_region(df):
    df_cleaned = df.dropna(subset=["POPULATION_EXPOSURE"])
    df_cleaned = df_cleaned.drop(columns=["WEEK", "COUNTRY", "ADMIN1", "FATALITIES", "ID", "CENTROID_LATITUDE", "CENTROID_LONGITUDE"])
    
    return df_cleaned

africa_cleaned = preprocess_region(africa)
asia_pacific_cleaned = preprocess_region(asia_pacific)
europe_central_asia_cleaned = preprocess_region(europe_central_asia)
latin_america_caribbean_cleaned = preprocess_region(latin_america_caribbean)
middle_east_cleaned = preprocess_region(middle_east)
us_canada_cleaned = preprocess_region(us_canada)

africa_cleaned.to_csv("resources/africa_cleaned.csv", index=False)
asia_pacific_cleaned.to_csv("resources/asia_pacific_cleaned.csv", index=False)
europe_central_asia_cleaned.to_csv("resources/europe_central_asia_cleaned.csv", index=False)
latin_america_caribbean_cleaned.to_csv("resources/latin_america_caribbean_cleaned.csv", index=False)
middle_east_cleaned.to_csv("resources/middle_east_cleaned.csv", index=False)
us_canada_cleaned.to_csv("resources/us_canada_cleaned.csv", index=False)

## Code to preprocess the data for the specific visualizations

### Bar chart
Event per region

In [5]:
# AFRICA
africa_bar = africa_cleaned["EVENTS"].sum()
# ASIA PACIFIC
asia_pacific_bar = asia_pacific_cleaned["EVENTS"].sum()
# EUROPE CENTRAL ASIA
europe_central_asia_bar = europe_central_asia_cleaned["EVENTS"].sum()
# LATIN AMERICA CARRIBEAN
latin_america_caribbean_bar = latin_america_caribbean_cleaned["EVENTS"].sum()
# MIDDLE EAST
middle_east_bar = middle_east_cleaned["EVENTS"].sum()
# US CANADA
us_canada_bar = us_canada_cleaned["EVENTS"].sum()

# Create the dataframe and save it as csv
bar_data = pd.DataFrame({
	"Region": ["Africa", "Asia Pacific", "Europe Central Asia", "Latin America Caribbean", "Middle East", "US Canada"],
	"TotalEvents": [africa_bar, asia_pacific_bar, europe_central_asia_bar, latin_america_caribbean_bar, middle_east_bar, us_canada_bar]
})

bar_data.to_csv("resources/plots/bar_data.csv", index=False)

### Grouped bar chart
Battles vs violence against civilians per region

In [6]:
# AFRICA
africa_cleaned_grouped = africa_cleaned.groupby("EVENT_TYPE")["EVENTS"].sum()
africa_battles = africa_cleaned_grouped.get("Battles", 0)
africa_violence_against_civilians = africa_cleaned_grouped.get("Violence against civilians", 0)

# ASIA PACIFIC
asia_pacific_cleaned_grouped = asia_pacific_cleaned.groupby("EVENT_TYPE")["EVENTS"].sum()
asia_pacific_battles = asia_pacific_cleaned_grouped.get("Battles", 0)
asia_pacific_violence_against_civilians = asia_pacific_cleaned_grouped.get("Violence against civilians", 0)

# EUROPE CENTRAL ASIA
europe_central_asia_cleaned_grouped = europe_central_asia_cleaned.groupby("EVENT_TYPE")["EVENTS"].sum()
europe_central_asia_battles = europe_central_asia_cleaned_grouped.get("Battles", 0)
europe_central_asia_violence_against_civilians = europe_central_asia_cleaned_grouped.get("Violence against civilians", 0)

# LATIN AMERICA CARRIBEAN
latin_america_caribbean_grouped = latin_america_caribbean_cleaned.groupby("EVENT_TYPE")["EVENTS"].sum()
latin_america_caribbean_battles = latin_america_caribbean_grouped.get("Battles", 0)
latin_america_caribbean_violence_against_civilians = latin_america_caribbean_grouped.get("Violence against civilians", 0)

# MIDDLE EAST
middle_east_grouped = middle_east_cleaned.groupby("EVENT_TYPE")["EVENTS"].sum()
middle_east_battles = middle_east_grouped.get("Battles", 0)
middle_east_violence_against_civilians = middle_east_grouped.get("Violence against civilians", 0)

# US CANADA
us_canada_grouped = us_canada_cleaned.groupby("EVENT_TYPE")["EVENTS"].sum()
us_canada_battles = us_canada_grouped.get("Battles", 0)
us_canada_violence_against_civilians = us_canada_grouped.get("Violence against civilians", 0)

# Create the dataframe and save it as csv
grouped_bar_data = pd.DataFrame({
	"Region": ["Africa", "Asia/Pacific", "Europe/CentralAsia", "LatinAmerica/Caribbean", "MiddleEast", "US/Canada"],
	"Battles": [africa_battles, asia_pacific_battles, europe_central_asia_battles, latin_america_caribbean_battles, middle_east_battles, us_canada_battles],
	"ViolenceAgainstCivilians": [africa_violence_against_civilians, asia_pacific_violence_against_civilians, europe_central_asia_violence_against_civilians, 
                                latin_america_caribbean_violence_against_civilians, middle_east_violence_against_civilians, us_canada_violence_against_civilians]
})

grouped_bar_data.to_csv("resources/plots/grouped_bar_data.csv", index=False)

### Heat map
Average population exposure based on the region and the type of event

In [7]:
# AFRICA
africa_heatmap = africa_cleaned.drop(columns=["REGION", "SUB_EVENT_TYPE", "EVENTS", "DISORDER_TYPE"])
africa_heatmap = africa_heatmap.groupby("EVENT_TYPE")["POPULATION_EXPOSURE"].mean()

# ASIA PACIFIC
asia_pacific_heatmap = asia_pacific_cleaned.drop(columns=["REGION", "SUB_EVENT_TYPE", "EVENTS", "DISORDER_TYPE"])
asia_pacific_heatmap = asia_pacific_heatmap.groupby("EVENT_TYPE")["POPULATION_EXPOSURE"].mean()

# EUROPE CENTRAL ASIA
europe_central_asia_heatmap = europe_central_asia_cleaned.drop(columns=["REGION", "SUB_EVENT_TYPE", "EVENTS", "DISORDER_TYPE"])
europe_central_asia_heatmap = europe_central_asia_heatmap.groupby("EVENT_TYPE")["POPULATION_EXPOSURE"].mean()

# LATIN AMERICA CARRIBEAN
latin_america_caribbean_heatmap = latin_america_caribbean_cleaned.drop(columns=["REGION", "SUB_EVENT_TYPE", "EVENTS", "DISORDER_TYPE"])
latin_america_caribbean_heatmap = latin_america_caribbean_heatmap.groupby("EVENT_TYPE")["POPULATION_EXPOSURE"].mean()

# MIDDLE EAST
middle_east_heatmap = middle_east_cleaned.drop(columns=["REGION", "SUB_EVENT_TYPE", "EVENTS", "DISORDER_TYPE"])
middle_east_heatmap = middle_east_heatmap.groupby("EVENT_TYPE")["POPULATION_EXPOSURE"].mean()

# US CANADA
us_canada_heatmap = us_canada_cleaned.drop(columns=["REGION", "SUB_EVENT_TYPE", "EVENTS", "DISORDER_TYPE"])
us_canada_heatmap = us_canada_heatmap.groupby("EVENT_TYPE")["POPULATION_EXPOSURE"].mean()

# Create the dataframe and save it as csv

list_of_events = africa_heatmap.index.tolist()

heatmap_data = pd.DataFrame({
	"EVENT_TYPE": list_of_events,
	"Africa": africa_heatmap,
	"Asia/Pacific": asia_pacific_heatmap,
	"Europe/CentralAsia": europe_central_asia_heatmap,
	"LatinAmerica/Caribbean": latin_america_caribbean_heatmap,
	"MiddleEast": middle_east_heatmap,
	"US/Canada": us_canada_heatmap
})

heatmap_data.to_csv("resources/plots/heatmap_data.csv", index=False)

### Stacked bar chart
Percentage of non violent and violent protests per region (peaceful protests and violent demonstrations)

In [8]:
# AFRICA
africa_peaceful = africa_cleaned[africa_cleaned["SUB_EVENT_TYPE"] == "Peaceful protest"].shape[0]
africa_violent = africa_cleaned[africa_cleaned["SUB_EVENT_TYPE"] == "Violent demonstration"].shape[0]
africa_protests = africa_peaceful + africa_violent
africa_peaceful_percentage = (africa_peaceful / africa_protests) * 100
africa_violent_percentage = (africa_violent / africa_protests) * 100

# ASIA PACIFIC
asia_pacific_peaceful = asia_pacific_cleaned[asia_pacific_cleaned["SUB_EVENT_TYPE"] == "Peaceful protest"].shape[0]
asia_pacific_violent = asia_pacific_cleaned[asia_pacific_cleaned["SUB_EVENT_TYPE"] == "Violent demonstration"].shape[0]
asia_pacific_protests = asia_pacific_peaceful + asia_pacific_violent
asia_pacific_peaceful_percentage = (asia_pacific_peaceful / asia_pacific_protests) * 100
asia_pacific_violent_percentage = (asia_pacific_violent / asia_pacific_protests) * 100

# EUROPE CENTRAL ASIA
europe_central_asia_peaceful = europe_central_asia_cleaned[europe_central_asia_cleaned["SUB_EVENT_TYPE"] == "Peaceful protest"].shape[0]
europe_central_asia_violent = europe_central_asia_cleaned[europe_central_asia_cleaned["SUB_EVENT_TYPE"] == "Violent demonstration"].shape[0]
europe_central_asia_protests = europe_central_asia_peaceful + europe_central_asia_violent
europe_central_asia_peaceful_percentage = (europe_central_asia_peaceful / europe_central_asia_protests) * 100
europe_central_asia_violent_percentage = (europe_central_asia_violent / europe_central_asia_protests) * 100

# LATIN AMERICA CARRIBEAN
latin_america_caribbean_peaceful = latin_america_caribbean_cleaned[latin_america_caribbean_cleaned["SUB_EVENT_TYPE"] == "Peaceful protest"].shape[0]
latin_america_caribbean_violent = latin_america_caribbean_cleaned[latin_america_caribbean_cleaned["SUB_EVENT_TYPE"] == "Violent demonstration"].shape[0]
latin_america_caribbean_protests = latin_america_caribbean_peaceful + latin_america_caribbean_violent
latin_america_caribbean_peaceful_percentage = (latin_america_caribbean_peaceful / latin_america_caribbean_protests) * 100
latin_america_caribbean_violent_percentage = (latin_america_caribbean_violent / latin_america_caribbean_protests) * 100

# MIDDLE EAST
middle_east_peaceful = middle_east_cleaned[middle_east_cleaned["SUB_EVENT_TYPE"] == "Peaceful protest"].shape[0]
middle_east_violent = middle_east_cleaned[middle_east_cleaned["SUB_EVENT_TYPE"] == "Violent demonstration"].shape[0]
middle_east_protests = middle_east_peaceful + middle_east_violent
middle_east_peaceful_percentage = (middle_east_peaceful / middle_east_protests) * 100
middle_east_violent_percentage = (middle_east_violent / middle_east_protests) * 100

# US CANADA
us_canada_peaceful = us_canada_cleaned[us_canada_cleaned["SUB_EVENT_TYPE"] == "Peaceful protest"].shape[0]
us_canada_violent = us_canada_cleaned[us_canada_cleaned["SUB_EVENT_TYPE"] == "Violent demonstration"].shape[0]
us_canada_protests = us_canada_peaceful + us_canada_violent
us_canada_peaceful_percentage = (us_canada_peaceful / us_canada_protests) * 100
us_canada_violent_percentage = (us_canada_violent / us_canada_protests) * 100

# Create the dataframe and save it as csv
stacked_bar_data = pd.DataFrame({
	"Region": ["Africa", "Asia/Pacific", "Europe/Central Asia", "Latin America/Caribbean", "Middle East", "US/Canada"],
	"PeacefulProtests (%)": [africa_peaceful_percentage, asia_pacific_peaceful_percentage, europe_central_asia_peaceful_percentage, latin_america_caribbean_peaceful_percentage, middle_east_peaceful_percentage, us_canada_peaceful_percentage],
	"ViolentDemonstrations (%)": [africa_violent_percentage, asia_pacific_violent_percentage, europe_central_asia_violent_percentage, latin_america_caribbean_violent_percentage, middle_east_violent_percentage, us_canada_violent_percentage]
})

stacked_bar_data.to_csv("resources/plots/stacked_bar_data.csv", index=False)

### Waffle chart
Kinds of events per region (percentage)

In [9]:
# AFRICA
africa_total_events = africa_cleaned["EVENTS"].sum()
africa_event_types = africa_cleaned.groupby("EVENT_TYPE")["EVENTS"].sum()
africa_event_types.apply(lambda x: (x / africa_total_events) * 100)

# ASIA PACIFIC
asia_pacific_total_events = asia_pacific_cleaned["EVENTS"].sum()
asia_pacific_event_types = asia_pacific_cleaned.groupby("EVENT_TYPE")["EVENTS"].sum()
asia_pacific_event_types.apply(lambda x: (x / asia_pacific_total_events) * 100)

# EUROPE CENTRAL ASIA
europe_central_asia_total_events = europe_central_asia_cleaned["EVENTS"].sum()
europe_central_asia_event_types = europe_central_asia_cleaned.groupby("EVENT_TYPE")["EVENTS"].sum()
europe_central_asia_event_types.apply(lambda x: (x / europe_central_asia_total_events) * 100)

# LATIN AMERICA CARRIBEAN
latin_america_caribbean_total_events = latin_america_caribbean_cleaned["EVENTS"].sum()
latin_america_caribbean_event_types = latin_america_caribbean_cleaned.groupby("EVENT_TYPE")["EVENTS"].sum()
latin_america_caribbean_event_types.apply(lambda x: (x / latin_america_caribbean_total_events) * 100)

# MIDDLE EAST
middle_east_total_events = middle_east_cleaned["EVENTS"].sum()
middle_east_event_types = middle_east_cleaned.groupby("EVENT_TYPE")["EVENTS"].sum()
middle_east_event_types.apply(lambda x: (x / middle_east_total_events) * 100)

# US CANADA
us_canada_total_events = us_canada_cleaned["EVENTS"].sum()
us_canada_event_types = us_canada_cleaned.groupby("EVENT_TYPE")["EVENTS"].sum()
us_canada_event_types.apply(lambda x: (x / us_canada_total_events) * 100)


EVENT_TYPE
Battles                        0.033890
Explosions/Remote violence     0.021181
Protests                      97.669000
Riots                          1.812059
Violence against civilians     0.463870
Name: EVENTS, dtype: float64

In [11]:
# Create the dataframe and save it as csv
waffle_data = pd.DataFrame({
	"REGION": ["Africa", "Asia/Pacific", "Europe/Central Asia", "Latin America/Caribbean", "Middle East", "US/Canada"],
	"Battles (%)": [ (africa_event_types.get("Battles", 0) / africa_total_events) * 100,
					(asia_pacific_event_types.get("Battles", 0) / asia_pacific_total_events) * 100,
					(europe_central_asia_event_types.get("Battles", 0) / europe_central_asia_total_events) * 100,
					(latin_america_caribbean_event_types.get("Battles", 0) / latin_america_caribbean_total_events) * 100,
					(middle_east_event_types.get("Battles", 0) / middle_east_total_events) * 100,
					(us_canada_event_types.get("Battles", 0) / us_canada_total_events) * 100],
	"Explosions/Remote violence (%)": [ (africa_event_types.get("Explosions/Remote violence", 0) / africa_total_events) * 100,
					(asia_pacific_event_types.get("Explosions/Remote violence", 0) / asia_pacific_total_events) * 100,
					(europe_central_asia_event_types.get("Explosions/Remote violence", 0) / europe_central_asia_total_events) * 100,
					(latin_america_caribbean_event_types.get("Explosions/Remote violence", 0) / latin_america_caribbean_total_events) * 100,
					(middle_east_event_types.get("Explosions/Remote violence", 0) / middle_east_total_events) * 100,
					(us_canada_event_types.get("Explosions/Remote violence", 0) / us_canada_total_events) * 100],
	"Violence against civilians (%)": [ (africa_event_types.get("Violence against civilians", 0) / africa_total_events) * 100,
					(asia_pacific_event_types.get("Violence against civilians", 0) / asia_pacific_total_events) * 100,
					(europe_central_asia_event_types.get("Violence against civilians", 0) / europe_central_asia_total_events) * 100,
					(latin_america_caribbean_event_types.get("Violence against civilians", 0) / latin_america_caribbean_total_events) * 100,
					(middle_east_event_types.get("Violence against civilians", 0) / middle_east_total_events) * 100,
					(us_canada_event_types.get("Violence against civilians", 0) / us_canada_total_events) * 100],
	"Protests (%)": [ (africa_event_types.get("Protests", 0) / africa_total_events) * 100,
					(asia_pacific_event_types.get("Protests", 0) / asia_pacific_total_events) * 100,
					(europe_central_asia_event_types.get("Protests", 0) / europe_central_asia_total_events) * 100,
					(latin_america_caribbean_event_types.get("Protests", 0) / latin_america_caribbean_total_events) * 100,
					(middle_east_event_types.get("Protests", 0) / middle_east_total_events) * 100,
					(us_canada_event_types.get("Protests", 0) / us_canada_total_events) * 100],
	"Riots (%)": [ (africa_event_types.get("Riots", 0) / africa_total_events) * 100,
					(asia_pacific_event_types.get("Riots", 0) / asia_pacific_total_events) * 100,
					(europe_central_asia_event_types.get("Riots", 0) / europe_central_asia_total_events) * 100,
					(latin_america_caribbean_event_types.get("Riots", 0) / latin_america_caribbean_total_events) * 100,
					(middle_east_event_types.get("Riots", 0) / middle_east_total_events) * 100,
					(us_canada_event_types.get("Riots", 0) / us_canada_total_events) * 100],
})

waffle_data.to_csv("resources/plots/waffle_data.csv", index=False)