In [None]:
# Librairies
import numpy as np # Matrix calculations
import pandas as pd # Data structures
import re # regular expressions
import csv
import json

In [None]:
# Path to dataset
path = "data/gdp/API_NY.GDP.MKTP.CD_DS2_en_csv_v2_511444.csv"

# We import the data
raw_df = pd.read_csv(path, sep=',', index_col=0)

# remove the country code
raw_df = raw_df.drop(columns=['Country Code'])

# Print nbr of rows
print("Nbr of rows : " + str(len(raw_df.index)))

# Transpose
raw_df = raw_df.transpose()

# fill nan with column average
raw_df = raw_df.replace(0.0, np.NaN)
raw_df = raw_df.replace(0.0, raw_df.mean(axis=0))
raw_df = raw_df.fillna(raw_df.mean(axis=0), axis=0)

# preview
raw_df.head(10)

In [None]:
# Get years as array
years = np.array(raw_df[raw_df.columns[0]].keys())

## Dataviz Output

In [None]:
# Load the african country dictionary
dict_path = "data/output/african_countries.json"

with open(dict_path) as json_file:
    ccDict = json.load(json_file)

In [None]:
# Init a dict that will contain the total value of arms import per year per country
population_dict = {}

minYear = int(min(years))
maxYear = int(max(years))

for i, key in enumerate(ccDict):
    population_dict[key] = {}
    
    for year in range(minYear, maxYear+1):
        population_dict[key][year] = 0

In [None]:
# Populate the dict with the data
for i, key in enumerate(ccDict):
    
    # get name
    name = ccDict[key]['name']
    
    if(name in raw_df.columns):
        
        # go through years
        for year in range(minYear, maxYear+1):
            population_dict[key][year] = raw_df[name][str(year)]
            
    else:
        print(name)

In [None]:
# Save the dict to a json file
with open('data/output/gdp.json', 'w') as fp:
    json.dump(population_dict, fp)