# WYWM Data Capstone
Alex Highfield

07/01/2025

### Imports and setup

In [1]:
import pandas as pd
import numpy as np

In [2]:
pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', None)

### Reading in data and basic summary stats

In [3]:
acled_df = pd.read_csv('2021-12-14-2025-01-07.csv', sep=',')

In [4]:
acled_df.columns

Index(['event_id_cnty', 'event_date', 'year', 'time_precision',
       'disorder_type', 'event_type', 'sub_event_type', 'actor1',
       'assoc_actor_1', 'inter1', 'actor2', 'assoc_actor_2', 'inter2',
       'interaction', 'civilian_targeting', 'iso', 'region', 'country',
       'admin1', 'admin2', 'admin3', 'location', 'latitude', 'longitude',
       'geo_precision', 'source', 'source_scale', 'notes', 'fatalities',
       'tags', 'timestamp', 'population_best'],
      dtype='object')

In [5]:
acled_df.describe()

Unnamed: 0,year,time_precision,iso,latitude,longitude,geo_precision,fatalities,timestamp,population_best
count,1020957.0,1020957.0,1020957.0,1020957.0,1020957.0,1020957.0,1020957.0,1020957.0,919885.0
mean,2023.023,1.059967,484.4919,26.73766,22.74033,1.297616,0.5647446,1709845000.0,30087.37
std,0.8387847,0.2489612,267.7745,20.36541,58.58351,0.4955397,4.396402,24890380.0,77333.77
min,2021.0,1.0,0.0,-77.842,-178.1562,1.0,0.0,1641834000.0,0.0
25%,2022.0,1.0,275.0,14.7978,2.3522,1.0,0.0,1695751000.0,2136.0
50%,2023.0,1.0,466.0,31.5876,35.3294,1.0,0.0,1719353000.0,8858.0
75%,2024.0,1.0,762.0,43.1399,46.183,2.0,0.0,1730157000.0,25399.0
max,2024.0,3.0,894.0,78.2231,179.368,3.0,1000.0,1734480000.0,2776234.0


### Cleaning data

##### Convert time variable to yyyy-mm-dd format

In [6]:
acled_df['event_date_var'] = pd.to_datetime(acled_df['event_date'], format='%d %B %Y')

acled_df[['event_date_var', 'event_date']].head()

Unnamed: 0,event_date_var,event_date
0,2024-12-13,13 December 2024
1,2024-12-13,13 December 2024
2,2024-12-13,13 December 2024
3,2024-12-13,13 December 2024
4,2024-12-13,13 December 2024


##### Data starts half way through Dec21 so filter out incomplete months

In [7]:
acled_df = acled_df[acled_df['event_date_var'] >= '2022-01-01']

acled_df.sort_values('event_date_var').head()

Unnamed: 0,event_id_cnty,event_date,year,time_precision,disorder_type,event_type,sub_event_type,actor1,assoc_actor_1,inter1,actor2,assoc_actor_2,inter2,interaction,civilian_targeting,iso,region,country,admin1,admin2,admin3,location,latitude,longitude,geo_precision,source,source_scale,notes,fatalities,tags,timestamp,population_best,event_date_var
1009448,UKR50245,01 January 2022,2022,1,Political violence,Explosions/Remote violence,Shelling/artillery/missile attack,NAF: United Armed Forces of Novorossiya,,Rebel group,Military Forces of Ukraine (2019-),,State forces,State forces-Rebel group,,804,Europe,Ukraine,Donetsk,Bakhmutskyi,Svitlodarska,Zaitseve,48.4292,38.0094,2,Ministry of Defence of Ukraine,Other,"On 1 January 2022, the NAF rebel forces fired ...",0,,1734479950,358.0,2022-01-01
1009148,IND104740,01 January 2022,2022,1,Political violence,Violence against civilians,Attack,CPI (Maoist): Communist Party of India (Maoist),,Rebel group,Civilians (India),Police Forces of India (2014-),Civilians,Rebel group-Civilians,Civilian targeting,356,South Asia,India,Chhattisgarh,Bijapur,Usur,Timmapur,18.5352,80.8859,1,South Asia Terrorism Portal,Other,"On 1 January 2022, a group of CPI-Maoist cadre...",1,,1664291746,896.0,2022-01-01
1009149,NIG27330,01 January 2022,2022,1,Political violence,Violence against civilians,Abduction/forced disappearance,Zamfara Communal Militia (Nigeria),,Identity militia,Civilians (Nigeria),,Civilians,Identity militia-Civilians,Civilian targeting,566,Western Africa,Nigeria,Zamfara,Tsafe,,Bilbis,11.8557,6.9535,1,"Risk and Strategic Management, Corporation",Local partner-International,"On 1 January 2022, Zamfara militia men abducte...",0,,1666026860,169.0,2022-01-01
1009150,IRQ35629,01 January 2022,2022,1,Strategic developments,Strategic developments,Looting/property destruction,Police Forces of Iraq (2020-2022),,State forces,Unidentified Armed Group (Iraq),,Political militia,State forces-Political militia,,368,Middle East,Iraq,Baghdad,Al Kadhmiyah,Al Taji,Taji,33.529,44.278,2,Al Sumaria TV,National,"Property destruction: On 1 January 2022, Iraqi...",0,,1667259136,,2022-01-01
1009151,UGA7646,01 January 2022,2022,1,Political violence,Riots,Mob violence,Rioters (Uganda),Vigilante Group (Uganda),Rioters,Civilians (Uganda),,Civilians,Rioters-Civilians,Civilian targeting,800,Eastern Africa,Uganda,Kiryandongo,Kibanda,Kiryandongo Town Council,Kiryandongo,1.8763,32.0623,2,URN (Uganda),National,"On 1 January 2022, residents lynched a suspect...",1,crowd size=no report,1667868633,6242.0,2022-01-01


### Some new variables for analysis

##### Classify events by general category

In [8]:
acled_df['event_general'] = acled_df['event_type'].case_when([
    (
        acled_df['event_type'].isin(['Battles', 'Explosions/Remote violence', 'Violence against civilians']), 
        'Violent events'
    ),
    (
        acled_df['event_type'].isin(['Protests', 'Riots']),
        'Demonstrations'
    ),
    (
        acled_df['event_type'].isin(['Strategic developments']),
        'Non-violent actions'),
    (
        pd.Series(True),
        np.nan
    )
])

acled_df['event_general'].unique()

array(['Non-violent actions', 'Violent events', 'Demonstrations'],
      dtype=object)

### Summary stats

##### Count of events by date, type and location

In [9]:
categories = [
    # 'year',
    'event_date',

    'event_general',
    # 'event_type',
    # 'sub_event_type',

    'region',
    'country'
]

values = [
    'event_id_cnty'
]

grouped_df = acled_df[categories + values].groupby(categories).count()
grouped_df.to_csv('grouped.csv')
grouped_df.head()


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,event_id_cnty
event_date,event_general,region,country,Unnamed: 4_level_1
01 April 2022,Demonstrations,Caribbean,Dominican Republic,1
01 April 2022,Demonstrations,Caucasus and Central Asia,Armenia,2
01 April 2022,Demonstrations,Caucasus and Central Asia,Georgia,1
01 April 2022,Demonstrations,Caucasus and Central Asia,Kazakhstan,5
01 April 2022,Demonstrations,Central America,Costa Rica,1


##### Look at splits of events by category

##### Covid data

In [10]:
covid_df = pd.read_csv('owid-covid-data.csv', sep=',')
covid_df.sort_values('date', ascending=False).head(100)

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million,total_tests,new_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,positive_rate,tests_per_case,tests_units,total_vaccinations,people_vaccinated,people_fully_vaccinated,total_boosters,new_vaccinations,new_vaccinations_smoothed,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,total_boosters_per_hundred,new_vaccinations_smoothed_per_million,new_people_vaccinated_smoothed,new_people_vaccinated_smoothed_per_hundred,stringency_index,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,population,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
230301,MYS,Asia,Malaysia,2024-08-14,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,72657290.0,28138570.0,27551140.0,17185820.0,14.0,11.0,214.09,82.91,81.18,50.64,0.0,2.0,0.0,,96.254,29.9,6.293,3.407,26808.164,0.1,260.942,16.74,1.0,42.4,,1.9,76.16,0.81,33938216,,,,
120252,OWID_EUR,,Europe,2024-08-14,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1399334000.0,523814300.0,493751300.0,365099900.0,,,187.88,70.33,66.29,49.02,,,,,,,,,,,,,,,,,,,744807803,,,,
217093,LTU,Europe,Lithuania,2024-08-14,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4604865.0,1958300.0,1881106.0,948621.0,1.0,2.0,167.45,71.21,68.4,34.49,1.0,0.0,0.0,,45.135,43.5,19.002,13.778,29524.265,0.7,342.989,3.67,21.3,38.0,,6.56,75.93,0.882,2750058,,,,
424412,OWID_WRL,,World,2024-08-14,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,13578770000.0,5631264000.0,5177943000.0,2817381000.0,,,170.26,70.61,64.93,35.33,,,,,58.045,30.9,8.696,5.355,15469.207,10.0,233.07,8.51,6.434,34.635,60.13,2.705,72.58,0.737,7975105024,,,,
408137,OWID_UMC,,Upper-middle-income countries,2024-08-14,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5449981000.0,2109015000.0,1990653000.0,1265985000.0,,,215.76,83.49,78.81,50.12,,,,,,,,,,,,,,,,,,,2525921300,,,,
121602,OWID_EUN,,European Union (27),2024-08-14,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,951113300.0,338119600.0,327967400.0,282438800.0,,,211.29,75.11,72.86,62.74,,,,,,,,,,,,,,,,,,,450146793,,,,
165198,OWID_HIC,,High-income countries,2024-08-14,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2840880000.0,998723000.0,929256000.0,839231500.0,,,227.18,79.86,74.31,67.11,,,,,,,,,,,,,,,,,,,1250514600,,,,
21775,OWID_ASI,,Asia,2024-08-14,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,9104305000.0,3689439000.0,3462095000.0,1815177000.0,,,192.83,78.14,73.33,38.45,,,,,,,,,,,,,,,,,,,4721383370,,,,
424411,OWID_WRL,,World,2024-08-13,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,13578770000.0,5631264000.0,5177943000.0,2817381000.0,,,170.26,70.61,64.93,35.33,,,,,58.045,30.9,8.696,5.355,15469.207,10.0,233.07,8.51,6.434,34.635,60.13,2.705,72.58,0.737,7975105024,,,,
217092,LTU,Europe,Lithuania,2024-08-13,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4604864.0,1958300.0,1881106.0,948621.0,6.0,2.0,167.45,71.21,68.4,34.49,1.0,0.0,0.0,,45.135,43.5,19.002,13.778,29524.265,0.7,342.989,3.67,21.3,38.0,,6.56,75.93,0.882,2750058,,,,


### Insight 1 - events over time

##### Find earliest reporting date for each country

In [11]:
by = ['event_date_var', 'region', 'country', 'time_precision']

acled_sorted_df = acled_df.sort_values(by=by)

earliest_df = acled_sorted_df.drop_duplicates(subset=['country'], keep='first')
earliest_df = earliest_df.rename({'event_date_var': 'event_date_first'}, axis=1)

acled_temporal_df = acled_df.merge(earliest_df[['country', 'event_date_first']], 
                                   on='country', 
                                   how='left')

acled_temporal_df = acled_temporal_df[acled_temporal_df['event_date_first'].dt.to_period('M') == '2022-01']

In [12]:
categories = [
    # 'year',
    'event_date',

    'event_general',
    'event_type',
    'sub_event_type',

    'region',
    'country'
]

grouped_df = acled_temporal_df.groupby(categories).agg(
    event_count=('event_id_cnty', 'count')
)

grouped_df.to_csv('grouped.csv')

grouped_df.head()


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,event_count
event_date,event_general,event_type,sub_event_type,region,country,Unnamed: 6_level_1
01 April 2022,Demonstrations,Protests,Excessive force against protesters,Western Africa,Ghana,1
01 April 2022,Demonstrations,Protests,Peaceful protest,Caucasus and Central Asia,Armenia,2
01 April 2022,Demonstrations,Protests,Peaceful protest,Caucasus and Central Asia,Georgia,1
01 April 2022,Demonstrations,Protests,Peaceful protest,Caucasus and Central Asia,Kazakhstan,5
01 April 2022,Demonstrations,Protests,Peaceful protest,Central America,Costa Rica,1


### Insight 2 - population/geographical impacts

##### Bring in populations

In [13]:
pop_df = pd.read_csv('API_SP.POP.TOTL_DS2_en_csv_v2_900\API_SP.POP.TOTL_DS2_en_csv_v2_900.csv', sep=',')

# minor cleaning
pop_df.loc[pop_df['Country Name'] == 'Russian Federation', 'Country Name'] = 'Russia'

pop_df = pop_df[['Country Name', 'Country Code', '2022', '2023']]
pop_df = pop_df.rename({'Country Name': 'country'}, axis=1)
pop_df.head()

Unnamed: 0,country,Country Code,2022,2023
0,Aruba,ABW,107310.0,107359.0
1,Africa Eastern and Southern,AFE,731821393.0,750503764.0
2,Afghanistan,AFG,40578842.0,41454761.0
3,Africa Western and Central,AFW,497387180.0,509398589.0
4,Angola,AGO,35635029.0,36749906.0


##### Join population to acled

In [14]:
acled_df['Country Code'] = acled_df['event_id_cnty'].str.slice(stop=3)

acled_pop_df = acled_df.merge(pop_df,
                              how='left',
                              on='country')

acled_pop_df['pop_count'] = acled_pop_df['country'].case_when([
    (
        acled_pop_df['year'] == 2022,
        acled_pop_df['2022']
    ),
    (
        acled_pop_df['year'] == 2023,
        acled_pop_df['2023']
    ),
    (
        pd.Series(True),
        np.nan
    )
])

acled_pop_df = acled_pop_df.dropna(subset=['country', 'pop_count', '2022', '2023'])

acled_pop_df[['country','pop_count', '2022', '2023']].head()


Unnamed: 0,country,pop_count,2022,2023
359764,Jordan,11439213.0,11256263.0,11439213.0
359766,Burkina Faso,23025776.0,22509038.0,23025776.0
359767,Burkina Faso,23025776.0,22509038.0,23025776.0
359768,Uganda,48656601.0,47312719.0,48656601.0
359771,Gabon,2484789.0,2430747.0,2484789.0


##### Data validation

In [17]:
# a_df = acled_df[['country', 'Country Code']].drop_duplicates()
# print(a_df.shape)

# p_df = pop_df.drop_duplicates(subset='Country Name')
# print(p_df.shape)

# j_df = a_df.merge(p_df,
#                   how='left',
#                   on='Country Code')

# print(j_df.shape)

# j_df[j_df['Country Name'].isna()]

# j_df.isnull()

##### Band populations

In [18]:
pop_var = 'pop_count'

acled_pop_df['population_band'] = acled_pop_df[pop_var].case_when([
    (
        acled_pop_df[pop_var] <= 0,
        'X. Invalid'
    ),
    (
        (acled_pop_df[pop_var] > 0) & (acled_pop_df[pop_var] < 5_000_000),
        'A. < 5 mil'
    ),
    (
        (acled_pop_df[pop_var] >= 5_000_000) & (acled_pop_df[pop_var] < 20_000_000),
        'B. < 20 mil'
    ),
    (
        (acled_pop_df[pop_var] >= 20_000_000) & (acled_pop_df[pop_var] < 100_000_000),
        'C. < 100 mil'
    ),
    (
        (acled_pop_df[pop_var] >= 100_000_000) & (acled_pop_df[pop_var] < 500_000_000),
        'D. < 500 mil'
    ),
    (
        acled_pop_df[pop_var] >= 500_000_000,
        'E. >= 500 mil'
    ),
    (
        pd.Series(True),
        'X. Invalid'
    )
])

acled_pop_df = acled_pop_df[acled_pop_df['year'].isin([2022, 2023])]

acled_pop_df['population_band'].unique()

acled_pop_df.groupby('population_band')['country'].nunique()

# acled_df[['population_best', 'country']].groupby('population_best').count()

population_band
A. < 5 mil       76
B. < 20 mil      56
C. < 100 mil     39
D. < 500 mil     11
E. >= 500 mil     2
Name: country, dtype: int64

Proportion of population exposed to conflict

In [19]:
categories = [
    'population_band',

    'event_date',

    'event_general',
    'event_type',
    'sub_event_type',
    
    'region',
    'country'
]

grouped_df = acled_pop_df.groupby(categories).agg(
    country_count=('country', 'nunique'),
    event_count=('event_id_cnty', 'count'),
    pop_exposed=('population_best', 'sum'),
    total_fatalities=('fatalities', 'sum')
)

grouped_df.to_csv('grouped.csv')
grouped_df.sort_values('total_fatalities', ascending=False).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,country_count,event_count,pop_exposed,total_fatalities
population_band,event_date,event_general,event_type,sub_event_type,region,country,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
C. < 100 mil,04 November 2023,Violent events,Battles,Non-state actor overtakes territory,Northern Africa,Sudan,1,1,3596.0,1000
C. < 100 mil,15 November 2023,Violent events,Battles,Armed clash,Europe,Ukraine,1,41,218590.0,678
B. < 20 mil,07 October 2023,Violent events,Violence against civilians,Attack,Middle East,Israel,1,18,108622.0,642
C. < 100 mil,15 March 2022,Violent events,Violence against civilians,Attack,Europe,Ukraine,1,64,773696.0,636
C. < 100 mil,26 January 2023,Violent events,Battles,Armed clash,Europe,Ukraine,1,24,449833.0,620


### Insight 3 - Ukraine/Russia deep dive

In [20]:
ukrussia_df = acled_df[acled_df['country'].isin(['Ukraine', 'Russia'])].copy()

In [21]:
ukrussia_df.head()

Unnamed: 0,event_id_cnty,event_date,year,time_precision,disorder_type,event_type,sub_event_type,actor1,assoc_actor_1,inter1,actor2,assoc_actor_2,inter2,interaction,civilian_targeting,iso,region,country,admin1,admin2,admin3,location,latitude,longitude,geo_precision,source,source_scale,notes,fatalities,tags,timestamp,population_best,event_date_var,event_general,Country Code
11,RUS27884,13 December 2024,2024,1,Political violence,Explosions/Remote violence,Air/drone strike,Military Forces of Russia (2000-) Air Force,Military Forces of Russia (2000-),State forces,Military Forces of Ukraine (2019-),,External/Other forces,State forces-External/Other forces,,643,Europe,Russia,Kursk,Sudzhanskiy,,Novaya Sorochina,51.3582,35.2268,2,Ministry of Defence of Russia,Regional,"On 13 December 2024, Russian military carried ...",0,,1734463299,,2024-12-13,Violent events,RUS
12,RUS27952,13 December 2024,2024,1,Political violence,Battles,Armed clash,Military Forces of Russia (2000-),,State forces,Military Forces of Ukraine (2019-),Military Forces of Ukraine (2019-) Marines; Mi...,External/Other forces,State forces-External/Other forces,,643,Europe,Russia,Kursk,Sudzhanskiy,,Kruglenkoe,51.3382,35.1749,2,Ministry of Defence of Russia,Regional,"On 13 December 2024, Russian forces engaged in...",1,,1734463301,,2024-12-13,Violent events,RUS
13,RUS27958,13 December 2024,2024,1,Political violence,Battles,Armed clash,Military Forces of Russia (2000-),,State forces,Military Forces of Ukraine (2019-),Military Forces of Ukraine (2019-) Marines; Mi...,External/Other forces,State forces-External/Other forces,,643,Europe,Russia,Kursk,Sudzhanskiy,,Staraya Sorochina,51.3427,35.221,2,Ministry of Defence of Russia,Regional,"On 13 December 2024, Russian forces engaged in...",1,,1734463301,,2024-12-13,Violent events,RUS
14,RUS27960,13 December 2024,2024,1,Political violence,Battles,Armed clash,Military Forces of Russia (2000-),,State forces,Military Forces of Ukraine (2019-),Military Forces of Ukraine (2019-) Marines; Mi...,External/Other forces,State forces-External/Other forces,,643,Europe,Russia,Kursk,Korenevskiy,,Viktorovka,51.2298,34.8925,2,Ministry of Defence of Russia,Regional,"On 13 December 2024, Russian forces engaged in...",1,,1734463301,,2024-12-13,Violent events,RUS
15,UKR197732,13 December 2024,2024,1,Political violence,Explosions/Remote violence,Air/drone strike,Military Forces of Russia (2000-) Air Force,,External/Other forces,,,,External/Other forces only,,804,Europe,Ukraine,Kharkiv,Iziumskyi,Borivska,Nova Kruhliakivka,49.4758,37.7203,2,Ministry of Defence of Ukraine,Other,"On 13 December 2024, Russian air forces struck...",0,,1734463302,,2024-12-13,Violent events,UKR


##### Fatalities by country/administration

In [22]:
categories = [
    'country',
    'admin1'
]

values = [
    'fatalities'
]

grouped_df = ukrussia_df[categories + values].groupby(categories).agg(
    total_fatalities=('fatalities', 'count')
)

grouped_df.to_csv('grouped.csv')

grouped_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,total_fatalities
country,admin1,Unnamed: 2_level_1
Russia,Altai,50
Russia,Amur,8
Russia,Arkhangelsk,24
Russia,Astrakhan,36
Russia,Belgorod,11491


##### Fatalities and civilian targeting

In [24]:
ukrussia_df['civilian_targeting_2'] = ukrussia_df['civilian_targeting'].case_when([
    (
        ukrussia_df['civilian_targeting'] == 'Civilian targeting',
        'Yes'
    ),
    (
        ukrussia_df['civilian_targeting'] == np.nan,
        'N/A'
    ),
    (
        pd.Series(True),
        'N/A'
    )
])

ukrussia_df['fatalities_reported'] = ukrussia_df['fatalities'].case_when([
    (
        ukrussia_df['fatalities'] > 0,
        'Yes'
    ),
    (
        pd.Series(True),
        'No'
    )
])

ukrussia_df['country'].unique()


array(['Russia', 'Ukraine'], dtype=object)

In [25]:
categories = [
    'country',

    'event_general',
    'event_type',
    'sub_event_type',

    'civilian_targeting_2',
    'fatalities_reported'
]

grouped_df = ukrussia_df.groupby(categories).agg(
    total_events=('event_id_cnty', 'count'),
    total_fatalities=('fatalities', 'sum')
)

grouped_df.to_csv('grouped.csv')

grouped_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,total_events,total_fatalities
country,event_general,event_type,sub_event_type,civilian_targeting_2,fatalities_reported,Unnamed: 6_level_1,Unnamed: 7_level_1
Russia,Demonstrations,Protests,Excessive force against protesters,Yes,No,1,0
Russia,Demonstrations,Protests,Peaceful protest,,No,1553,0
Russia,Demonstrations,Protests,Protest with intervention,,No,467,0
Russia,Demonstrations,Riots,Mob violence,,No,9,0
Russia,Demonstrations,Riots,Mob violence,,Yes,2,14


### Insight 4 - geographical impacts

In [26]:
categories = [
    'country'
]

grouped_df = acled_df.groupby(categories).agg(
    event_count=('event_id_cnty', 'count'),
    total_fatalities=('fatalities', 'sum'),
    pop_exposed=('population_best', 'sum')
)

grouped_df.to_csv('grouped.csv')
grouped_df.head()

Unnamed: 0_level_0,event_count,total_fatalities,pop_exposed
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Afghanistan,5651,6413,327291393.0
Akrotiri and Dhekelia,7,0,1727.0
Albania,345,2,20916081.0
Algeria,964,75,18061816.0
American Samoa,4,0,4592.0
