In [None]:
import json
import pandas as pd
from fuzzywuzzy import process
from sklearn.preprocessing import MinMaxScaler
from statsmodels.iolib.smpickle import load_pickle

## Run model on new conflict

user will set these values as parameters for the model.


In [None]:
conflict_country= "Honduras"
conflict_start=2021-1
excluded_countries=[]
added_countries=[]

read in country border data

In [None]:
country_border = open('refugee_data/country_border_data.json')
countries_that_border = json.load(country_border)

get a list of touching countries

In [None]:
touching_list=[]
touching_list=countries_that_border[conflict_country]
touching_list

remove any countries that are to be excluded.


In [None]:
indexed_list={}
for i,c in enumerate(touching_list):
    indexed_list[i]=c
    
for i,ex in enumerate(excluded_countries):
    country,value, ind=process.extractOne(ex, indexed_list)
    if value>89:
        touching_list.pop(ind)
    print(country, value, ind)
    print(touching_list)

add any countries we need to 

In [None]:
for country_v in added_countries:
    touching_list.append(country_v)
touching_list

create a dataframe to build upon.


In [None]:
touching_df = pd.DataFrame (touching_list, columns = ['bording_countries'])
touching_df['conflict']=conflict_country

read in historic_pop

In [None]:
historic_pop=pd.read_csv('refugee_data/historic_pop.csv')    

Get historic populations from year before conflict


In [None]:
options=historic_pop["Country Name"]
touching_df['historic_pop']=None
historic_pop_cols=historic_pop.columns

indexed_col={}
for i,c in enumerate(historic_pop_cols):
    indexed_col[i]=c

column,ratio_year, year_column_idx = process.extractOne(str(conflict_start), indexed_col)


for kk ,border in touching_df.iterrows():
    country,ratio,ind =process.extractOne(border["bording_countries"], options)
    touching_df.loc[kk,"historic_pop"]=historic_pop.at[ind,column]

read in historic gdp per cap data


In [None]:
gdp_per_cap_historic=pd.read_csv("refugee_data/gdp_per_cap.csv")

collect gdp per cap for year before conflict


In [None]:
options=gdp_per_cap_historic["Country Name"]
touching_df['historic_gdp_per_cap']=None

#Get data from historic gdp per cap given the year.
historic_gdp_cols=gdp_per_cap_historic.columns
indexed_gdp_col={}
for i,c in enumerate(historic_gdp_cols):
    indexed_gdp_col[i]=c

    
column,ratio_year, year_column_idx = process.extractOne(str(conflict_start), indexed_gdp_col)

for kk ,border in touching_df.iterrows():
    country,ratio,ind =process.extractOne(border["bording_countries"], options)
    touching_df.loc[kk,"historic_gdp_per_cap"]=gdp_per_cap_historic.at[ind,column]

calculate historic gdp for later use if needed


In [None]:
touching_df['calculated_historic_gdp']=None
for kk ,row in touching_df.iterrows():
    try:
        touching_df.loc[kk,"calculated_historic_gdp"]=int(row['historic_pop'])*float(row['historic_gdp_per_cap'])
    except Exception as e:
        print(e, f'GDP per cap for {row["bording_countries"]} data was {row["historic_gdp_per_cap"]}' )


read in V-Dem data and subset the dataset to only enclude columns we care about

In [None]:
Dem=pd.read_csv("refugee_data/V-Dem-CY-Core-v12.csv")
columnList=["country_name","year","v2xeg_eqdr","v2x_libdem"]
country_dem=Dem[columnList]

collect liberal democracy and access to justic for women data for each country given conflict year


In [None]:
touching_df["v2x_libdem"]=None
touching_df["v2xeg_eqdr"]=None

options=country_dem['country_name'].unique()

for kk ,row in touching_df.iterrows():
    country,ratio =process.extractOne(row["bording_countries"], options)
    lib=country_dem.loc[(country_dem["country_name"]==country) & (country_dem["year"]==int(conflict_start))]['v2x_libdem']
    eqdr=country_dem.loc[(country_dem["country_name"]==country) & (country_dem["year"]==int(conflict_start))]['v2xeg_eqdr']
    touching_df.loc[kk,"v2xeg_eqdr"]=eqdr.to_list()[0]
    touching_df.loc[kk,"v2x_libdem"]=lib.to_list()[0]

read in historic gdp from world bank not calculated manually.

In [None]:
historic_GDP=pd.read_csv('refugee_data/GDP_historic.csv')

Get data from historic gdp per cap given the year.


In [None]:
options=historic_GDP["Country Name"]
touching_df['historic_GDP']=None
historic_GDP_cols=historic_GDP.columns
indexed_GDP_col={}
for i,c in enumerate(historic_GDP_cols):
    indexed_GDP_col[i]=c

    
column,ratio_year, year_column_idx = process.extractOne(str(conflict_start), indexed_GDP_col)

for kk ,border in touching_df.iterrows():
    country,ratio,ind =process.extractOne(border["bording_countries"], options)
    touching_df.loc[kk,"historic_GDP"]=historic_GDP.at[ind,column]

In [None]:
touching_df

Normalize the columns we will run the model on that are not already an index. For our current model that is only historic_GDP.

In [None]:

cols_to_scale = ['historic_GDP']
touching_df=touching_df.rename(columns={'bording_countries':'country'})

In [None]:
scaler = MinMaxScaler()
for col in cols_to_scale:
    print(col)
    normed = pd.DataFrame()
    
    for y, x in touching_df.groupby('conflict'):
        print(y,x)
        norm_ = [i[0] for i in scaler.fit_transform(x[col].values.reshape(-1,1))]
        countries = x['country']
        conflict_ = x['conflict']
        res = pd.DataFrame(tuple(zip(countries,conflict_,norm_)), columns=['country','conflict',f"{col}_norm"])
        normed = normed.append(res)
    normalized_data = pd.merge(touching_df, normed, left_on=['country','conflict'], right_on=['country','conflict'], how='right')

In [None]:
normalized_data

Load the prebuild model 

In [None]:
new_results = load_pickle("refugee_model_results.pickle")

set independant variables 


In [None]:
features_cols = [
                    'historic_GDP_norm', 
                    'v2x_libdem',
]
features_normalized = data[features_cols]

In [None]:
data[['country',"conflict"]+features_cols]

Currently if a country does not have data for a column we are dropping that country. If you need to not drop a country you need to fill in the missing data manually or some other method.


In [None]:
data=data.dropna()

Select only features that are needed and use the model to predict 


In [None]:
features_to_predict=data[features_cols]
shares = new_results.predict(features_to_predict)
data['predicted_shares'] = shares

save results to a csv file for next step.

In [None]:
output_results = data[['country',"conflict",'predicted_shares']]
output_results.to_csv(f'outputs/{conflict_country}_output_results.csv',index=False)

In [None]:
output_results