In [None]:
# Librairies
import numpy as np # Matrix calculations
import pandas as pd # Data structures
import matplotlib.pyplot as plt # Graphics
import re # regular expressions

# Plotly Graphic Librairy
import plotly.figure_factory as ff
import plotly.express as px
import plotly.graph_objects as go

In [None]:
"""
value: The conversion rate is calculated using the average for the year in question, which has
been supplied by Oanda.

"""

In [None]:
# Path to dataset
path = "./arms_trades_exports.csv"
header = 'Reporter_Code,Reporter_Name,Partner_Code,Partner_Name,ImportOrExport,Year,Period_Start,Period_End,Weapons_Type,Units,Value,Currency,Licenses_Issued,Weight,Licenses_Refused,AuthOrDel,GovtOrInd,Data_Source,Reliability,Accuracy,SmallArmsOnly,Comment,GlobalComment'

# We import the data
raw_df = pd.read_csv(path, sep=',', usecols = ['Reporter_Code','Partner_Code','ImportOrExport','Year','Weapons_Type','Value','Reliability','Accuracy'])

# Print nbr of rows
print("Nbr of rows : " + str(len(raw_df.index)))

In [None]:
# Transform Columns
raw_df['ImportOrExport'] = raw_df['ImportOrExport'].map({'Export':0, 'Import':1})
raw_df['Accuracy'] = raw_df['Accuracy'].map({'Low':0, 'Medium':1, 'High':2})

raw_df['Reliability'] = raw_df['Reliability'].str.split("/").str[0]
raw_df['Reliability'] = raw_df['Reliability'].map({'Pri':0, 'Sec':1})

raw_df['Year'] = raw_df['Year'].str.split("-")

In [None]:
def is_float(x):
    try:
        float(x)
    except ValueError:
        return False
    return True

In [None]:
# Only relevant columns
clean_df = raw_df[['Reporter_Code', 'Partner_Code', 'ImportOrExport', 'Value', 'Year', 'Weapons_Type']]

# Remove rows with NaN
clean_df = clean_df.dropna(subset=['Reporter_Code', 'Partner_Code', 'Value'])

# Remove rows with NaN at important columns
clean_df = clean_df[clean_df['Reporter_Code'].apply(lambda x: is_float(x))]
clean_df = clean_df[clean_df['Partner_Code'].apply(lambda x: is_float(x))]
clean_df = clean_df[clean_df['Value'].apply(lambda x: is_float(x))]

# Print nbr of rows
print("Nbr of rows : " + str(len(clean_df.index)))

In [None]:
# Cast to type
clean_df['Reporter_Code'] = clean_df['Reporter_Code'].astype(int)
clean_df['Partner_Code'] = clean_df['Partner_Code'].astype(int)
clean_df['ImportOrExport'] = clean_df['ImportOrExport'].astype(int)
clean_df['Value'] = clean_df['Value'].astype(float)

In [None]:
# Preview df
clean_df.head(10)
