# Prédiction de l'impact de sanctions économiques

## Notebook 1 - Modèle "naïf" (pas de feature engineering)

In [1]:
# import required libraries

from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier

pd.set_option('display.max_columns', 80)
sns.set_style("whitegrid")
%matplotlib inline

np.random.seed(0)

In [2]:
# import the data

base_df = pd.read_csv("input/sanctions.csv.gz", index_col=0)
print(base_df.shape)
# for col, typ in zip(base_df.columns, base_df.dtypes):
#     print(col, "=>", typ)
base_df.head(3)

(204, 38)


Unnamed: 0_level_0,Sender c,Sender 2c,Sender 3c,Targetd,Goal,US casee,US unilateral casef,Foreign policy goal categoryg,First yearh,Last yeari,Policy resultj,Sanctions contributionk,Success scorel,Companion policiesm,International cooperationn,International assistanceo,Cooperating international organizationp,International organization senderq,International organization sender & target membersr,Length (years)s,Prior relationst,Regime Type (DEMOC 1st year)u,Regime Type2 (Polity2 1st year)v,Regime Type3 (Scale 1st year)w,Political stability priorx,Political stability duringy,Cost to targetz,Cost to target (percent of GNP)aa,Cost to target per capitabb,Trade linkagecc,GNP ratiodd,Health and stabilityee,Sanction typeff,Cost to sendergg,"GDP growth (percent, 5-year average)hh","Inflation (percent,  3-year average)ii",Target International Monetary Fund codejj,Country groupkk
Case no.b,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1
14-1,UNITED KINGDOM,,,GERMANY,Military victory,0,0,4,1914,1918,4,3,12,R,4,A,--,0,0,4,1,5,2,2,0,0,843,71,1258,9,1,3,"F,X,M",4,36,29,134,1
17-1,UNITED STATES,,,JAPAN,Shipping for Allies,1,1,5,1917,1918,2,2,4,--,1,--,--,0,0,1,2,5,1,2,0,0,23,8,44,205,13,3,X,2,54,256,158,1
18-1,UNITED KINGDOM,,,RUSSIA,Destabilize Bolsheviks,0,0,2,1918,1920,1,2,2,"R,Q",4,--,--,0,0,2,1,1,-1,2,1,0,446,41,249,185,1,1,"F,X,M",3,n.a.,n.a.,922,2


In [3]:
# Drop information that is unknown at decision time

unknowns = [
    "Last yeari",
    "Length (years)s",
    "Political stability duringy",
    "Cost to targetz",
    "Cost to target (percent of GNP)aa",
    "Cost to target per capitabb",
    "Cost to sendergg",
    "Target International Monetary Fund codejj"
]

df = base_df.drop(unknowns, axis=1)

print(df.shape)
df.head(3)

(204, 30)


Unnamed: 0_level_0,Sender c,Sender 2c,Sender 3c,Targetd,Goal,US casee,US unilateral casef,Foreign policy goal categoryg,First yearh,Policy resultj,Sanctions contributionk,Success scorel,Companion policiesm,International cooperationn,International assistanceo,Cooperating international organizationp,International organization senderq,International organization sender & target membersr,Prior relationst,Regime Type (DEMOC 1st year)u,Regime Type2 (Polity2 1st year)v,Regime Type3 (Scale 1st year)w,Political stability priorx,Trade linkagecc,GNP ratiodd,Health and stabilityee,Sanction typeff,"GDP growth (percent, 5-year average)hh","Inflation (percent,  3-year average)ii",Country groupkk
Case no.b,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1
14-1,UNITED KINGDOM,,,GERMANY,Military victory,0,0,4,1914,4,3,12,R,4,A,--,0,0,1,5,2,2,0,9,1,3,"F,X,M",36,29,1
17-1,UNITED STATES,,,JAPAN,Shipping for Allies,1,1,5,1917,2,2,4,--,1,--,--,0,0,2,5,1,2,0,205,13,3,X,54,256,1
18-1,UNITED KINGDOM,,,RUSSIA,Destabilize Bolsheviks,0,0,2,1918,1,2,2,"R,Q",4,--,--,0,0,1,1,-1,2,1,185,1,1,"F,X,M",n.a.,n.a.,2


In [4]:
df.columns

Index(['Sender c', 'Sender 2c', 'Sender 3c', 'Targetd', 'Goal', 'US casee',
       'US unilateral casef', 'Foreign policy goal categoryg', 'First yearh',
       'Policy resultj', 'Sanctions contributionk', 'Success \nscorel',
       'Companion policiesm', 'International cooperationn',
       'International assistanceo', 'Cooperating international organizationp',
       'International organization senderq',
       'International organization sender & target membersr',
       'Prior relationst', 'Regime Type \n(DEMOC 1st year)u',
       'Regime Type2 (Polity2 1st year)v', 'Regime Type3 \n(Scale 1st year)w',
       'Political stability priorx', 'Trade linkagecc', 'GNP \nratiodd',
       'Health and stabilityee', 'Sanction typeff',
       'GDP growth (percent, 5-year average)hh',
       'Inflation (percent,\n 3-year average)ii', 'Country groupkk'],
      dtype='object')

In [5]:
new_names = [
    'Sender1', 'Sender2', 'Sender3', 'Target', 'Goal', 'US case',
    'US unilateral case', 'Foreign policy goal category', 'First year',
    'Policy result', 'Sanctions contribution', 'Success',
    'Companion policies', 'International cooperation',
    'International assistance', 'Cooperating international organization',
    'International organization sender',
    'International organization sender & target members',
    'Prior relationst', 'Regime Type',
    'Regime Type2', 'Regime Type3',
    'Political stability prior', 'Trade linkage', 'GNP \nratio',
    'Health and stability', 'Sanction type',
    'GDP growth',
    'Inflation', 'Country group']

In [6]:
name_dict = dict(zip(df.columns, new_names))
name_dict

{'Sender c': 'Sender1',
 'Sender 2c': 'Sender2',
 'Sender 3c': 'Sender3',
 'Targetd': 'Target',
 'Goal': 'Goal',
 'US casee': 'US case',
 'US unilateral casef': 'US unilateral case',
 'Foreign policy goal categoryg': 'Foreign policy goal category',
 'First yearh': 'First year',
 'Policy resultj': 'Policy result',
 'Sanctions contributionk': 'Sanctions contribution',
 'Success \nscorel': 'Success',
 'Companion policiesm': 'Companion policies',
 'International cooperationn': 'International cooperation',
 'International assistanceo': 'International assistance',
 'Cooperating international organizationp': 'Cooperating international organization',
 'International organization senderq': 'International organization sender',
 'International organization sender & target membersr': 'International organization sender & target members',
 'Prior relationst': 'Prior relationst',
 'Regime Type \n(DEMOC 1st year)u': 'Regime Type',
 'Regime Type2 (Polity2 1st year)v': 'Regime Type2',
 'Regime Type3 \n(

In [7]:
df = df.rename(columns=name_dict)

In [8]:
df["score"] = (df['Success'] > 9).astype(int)

In [9]:
df.columns

Index(['Sender1', 'Sender2', 'Sender3', 'Target', 'Goal', 'US case',
       'US unilateral case', 'Foreign policy goal category', 'First year',
       'Policy result', 'Sanctions contribution', 'Success',
       'Companion policies', 'International cooperation',
       'International assistance', 'Cooperating international organization',
       'International organization sender',
       'International organization sender & target members',
       'Prior relationst', 'Regime Type', 'Regime Type2', 'Regime Type3',
       'Political stability prior', 'Trade linkage', 'GNP \nratio',
       'Health and stability', 'Sanction type', 'GDP growth', 'Inflation',
       'Country group', 'score'],
      dtype='object')

In [10]:
df = df[['Sender1', 'Sender2', 'Sender3', 'Target', 'Goal', 'US case',
       'US unilateral case', 'Foreign policy goal category', 'First year',
       'Policy result', 'Sanctions contribution', 'Success', 'score',
       'Companion policies', 'International cooperation',
       'International assistance', 'Cooperating international organization',
       'International organization sender',
       'International organization sender & target members',
       'Prior relationst', 'Regime Type', 'Regime Type2', 'Regime Type3',
       'Political stability prior', 'Trade linkage', 'GNP \nratio',
       'Health and stability', 'Sanction type', 'GDP growth', 'Inflation',
       'Country group']]

In [11]:
df

Unnamed: 0_level_0,Sender1,Sender2,Sender3,Target,Goal,US case,US unilateral case,Foreign policy goal category,First year,Policy result,Sanctions contribution,Success,score,Companion policies,International cooperation,International assistance,Cooperating international organization,International organization sender,International organization sender & target members,Prior relationst,Regime Type,Regime Type2,Regime Type3,Political stability prior,Trade linkage,GNP ratio,Health and stability,Sanction type,GDP growth,Inflation,Country group
Case no.b,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1
14-1,UNITED KINGDOM,,,GERMANY,Military victory,0,0,4,1914,4,3,12,1,R,4,A,--,0,0,1,5,2,2,00,9,1,3,"F,X,M",36,29,1
17-1,UNITED STATES,,,JAPAN,Shipping for Allies,1,1,5,1917,2,2,4,0,--,1,--,--,0,0,2,5,1,2,00,205,13,3,X,54,256,1
18-1,UNITED KINGDOM,,,RUSSIA,Destabilize Bolsheviks,0,0,2,1918,1,2,2,0,"R,Q",4,--,--,0,0,1,1,-1,2,01,185,1,1,"F,X,M",n.a.,n.a.,2
21-1,LEAGUE OF NATIONS,,,YUGOSLAVIA,Military disruption vs. Albania,0,0,3,1921,4,4,16,1,--,4,--,--,1,1,2,3,0,2,04,265,37,2,--,-15,n.a.,2
25-1,LEAGUE OF NATIONS,,,GREECE,Withdraw from Bulgaria,0,0,3,1925,4,4,16,1,--,4,--,--,1,1,2,0,-6,1,09,36,56,2,--,33,488,1
32-1,LEAGUE OF NATIONS,,,"(PARAGUAY), BOLIVIA",Settle the Chaco War,0,0,3,1932,3,2,6,0,--,3,--,--,1,1,2,4,2,2,00,63,322,2,X,n.a.,n.a.,3
32-1,LEAGUE OF NATIONS,,,"PARAGUAY,(BOLIVIA)",Settle the Chaco War,0,0,3,1932,3,2,6,0,--,3,--,--,1,1,2,1,-3,2,00,595,735,2,X,n.a.,n.a.,3
33-1,UNITED KINGDOM,,,USSR,Release British citizens,0,0,1,1933,4,3,12,1,--,1,--,--,0,0,1,0,-9,1,00,13,1,2,M,24,n.a.,2
35-1,LEAGUE OF NATIONS,UNITED KINGDOM,,ITALY,Withdraw from Abyssinia,0,0,3,1935,1,2,2,0,--,4,A,--,1,1,2,0,-9,1,03,16,6,3,"F,X,M",-05,-44,1
38-1,UNITED STATES,UNITED KINGDOM,,MEXICO,Expropriation dispute,1,0,1,1938,3,3,9,0,--,2,--,--,0,0,2,0,-6,1,01,695,75,3,"F,M",74,21,3


In [12]:
df_scored = df[['Sender1', 'Sender2', 'Sender3', 'Target', 'Goal', 'US case',
       'US unilateral case', 'Foreign policy goal category', 'First year', 'score',
       'Companion policies', 'International cooperation',
       'International assistance', 'Cooperating international organization',
       'International organization sender',
       'International organization sender & target members',
       'Prior relationst', 'Regime Type', 'Regime Type2', 'Regime Type3',
       'Political stability prior', 'Trade linkage', 'GNP \nratio',
       'Health and stability', 'Sanction type', 'GDP growth', 'Inflation',
       'Country group']]

## Entrainement

In [13]:
rf_pipeline = {
    'rf' : make_pipeline(StandardScaler(), RandomForestClassifier(random_state = 42))
}

rf__hyperparameters = {
    'randomforestclassifier__n_estimators' : [100, 200],
    'randomforestclassifier__max_features' : ['auto', 'sqrt', 0.33],
    'randomforestclassifier__min_samples_leaf' : [1, 3, 5, 10]
}

model = GridSearchCV(rf_pipeline, rf__hyperparameters, cv= 10, n_jobs= -1)
#model.fit(x_train, y_train)