In [None]:
# Librairies
import numpy as np # Matrix calculations
import pandas as pd # Data structures
import re # regular expressions
import csv
import json

In [None]:
# Path to dataset
path = "data/ucdp/ucdp-dyadic-191.csv"

# We import the data
raw_df = pd.read_csv(path, sep=',')

# Print nbr of rows
print("Nbr of rows : " + str(len(raw_df.index)))

# Preview df
raw_df.head(10)

In [None]:
# Only relevant columns
clean_df = raw_df[['location', 'year', 'type_of_conflict', 'region']]

# Remove rows with NaN
clean_df = clean_df.dropna(subset=['location', 'year'])

# We only keep Africa
def onlyAfrica(x):
    if(len(x.split(",")) > 1):
        return '4' in str(x)
    else:
        return int(x) == 4
        
clean_df = clean_df[clean_df['region'].apply(lambda x: onlyAfrica(x))]
clean_df = clean_df.drop(columns=['region'])

# We remove extrasystemic conflicts
clean_df = clean_df[clean_df['type_of_conflict'].apply(lambda x: int(x) != 1)]
clean_df = clean_df.drop(columns=['type_of_conflict'])

# Print nbr of rows
print("Nbr of rows : " + str(len(clean_df.index)))

# Preview df
clean_df.head(10)

In [None]:
# Init a dict that will contain the total value of arms import per year per country
conflicts_dict = {}

minYear = int(min(clean_df['year'].unique()))
maxYear = int(max(clean_df['year'].unique()))
locationDict = clean_df['location'].unique()

for i, key in enumerate(locationDict):
    conflicts_dict[key] = {}
    
    for i in range(minYear, maxYear+1):
        conflicts_dict[key][str(i)] = 0

In [None]:
# Populate the dict with the data
for index, row in clean_df.iterrows():
    
    location = str(row["location"])
    year = str(row["year"])
    
    conflicts_dict[location][year] = 1

In [None]:
# Save the dict to a json file
with open('data/dataviz/conflicts.json', 'w') as fp:
    json.dump(conflicts_dict, fp)