In [None]:
import pandas as pd
import numpy as np
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler
from fuzzywuzzy import process

Import collected data.

In [None]:
# import data
data=pd.read_csv('refugee_data/refugee_data_final.csv')
data = data[data['country']!='Russian Federation']

 read in and collect liberal democracy index and access to justic for women. 

In [None]:
country_dem=pd.read_csv('refugee_data/country_dem.csv')
data["v2x_libdem"]=None
data["v2xeg_eqdr"]=None

options=country_dem['country_name'].unique()

for kk ,row in data.iterrows():
    country,ratio =process.extractOne(row["country"], options)
    lib=country_dem.loc[(country_dem["country_name"]==country) & (country_dem["year"]==int(row['conflict_start_year']-1))]['v2x_libdem']
    eqdr=country_dem.loc[(country_dem["country_name"]==country) & (country_dem["year"]==int(row['conflict_start_year']-1))]['v2xeg_eqdr']
    data.loc[kk,"v2xeg_eqdr"]=eqdr.to_list()[0]
    data.loc[kk,"v2x_libdem"]=lib.to_list()[0]

In [None]:
data[['v2xeg_eqdr',"v2x_libdem",'country',"conflict_start_year"]].tail()

Read in Historic GDP and get values for haven countries

In [None]:

historic_GDP=pd.read_csv('refugee_data/GDP_historic.csv')
options=historic_GDP["Country Name"]
data['historic_GDP']=None
historic_GDP_cols=historic_GDP.columns
indexed_GDP_col={}
for i,c in enumerate(historic_GDP_cols):
    indexed_GDP_col[i]=c

for kk ,border in data.iterrows():
    
    column,ratio_year, year_column_idx = process.extractOne(str(border['conflict_start_year']-1), indexed_GDP_col)
    
    country,ratio,ind =process.extractOne(border["country"], options)
    data.loc[kk,"historic_GDP"]=historic_GDP.at[ind,column]

In [None]:
# if we want to recalc percent total of conflict after dropping Russia...
data['pct_tot'] = data['individualPerCountry'] / data.groupby('conflict')['individualPerCountry'].transform('sum')

In [None]:
data['bilateral_migration_percap'] = data['bilateral_migration']/data['population']

In [None]:
data['gdp_per_cap'] = data['gdp_millions']*1000000/data['population']

In [None]:
data['migrants_per_cap'] = data['total_recored_migrants']/data['population']

In [None]:
cols_to_scale = ['bilateral_migration','historic_GDP','gdp_millions','population', 'remittances']

In [None]:
scaler = MinMaxScaler()
for col in cols_to_scale:
    print(col)
    normed = pd.DataFrame()
    
    for y, x in data.groupby('conflict'):
        norm_ = [i[0] for i in scaler.fit_transform(x[col].values.reshape(-1,1))]
        countries = x['country']
        conflict_ = x['conflict']
        res = pd.DataFrame(tuple(zip(countries,conflict_,norm_)), columns=['country','conflict',f"{col}_norm"])
        normed = normed.append(res)
    data = pd.merge(data, normed, left_on=['country','conflict'], right_on=['country','conflict'], how='right')

Create a dataframe of just Ukraine conflict

In [None]:
ukr = data[data['conflict']=='Ukraine']

Create a dataframe without Ukraine to train the model

In [None]:
withoutUkrainData=data[(data["Ukraine"]!=1) & (data['touching']==1)]

Set dependant variable

In [None]:
y=withoutUkrainData['pct_tot']

Set independant variables 


In [None]:
features_cols = [
                    'historic_GDP_norm', 
                    'v2x_libdem',
]
features_normalized = withoutUkrainData[features_cols]

In [None]:
Run the linear regression.

In [None]:
# Run the linear regression. 
import statsmodels.api as sm

In [None]:
results=sm.OLS(y,features_normalized.astype(float)).fit()

In [None]:
results.summary()

In [None]:
features_to_predict=data[features_cols]
shares = results.predict(features_to_predict)
data['predicted_shares'] = shares
ukr_results = data[data['Ukraine']==1][['country','pct_tot','predicted_shares']]
ukr_results.to_csv('outputs/ukraine_model_results.csv',index=False)

In [None]:
ukr_results

In [None]:
results.save("refugee_model_results2.pickle")

# Run with Ukraine

In [None]:
withUkrainData=data[data['touching']==1]
y=withUkrainData['pct_tot']

In [None]:
features_cols = [
                    'historic_GDP_norm', 
                    'v2x_libdem',
]
features_normalized = withUkrainData[features_cols]

In [None]:
results_WU=sm.OLS(y,features_normalized.astype(float)).fit()

In [None]:
results_WU.summary()

In [None]:
features_to_predict=data[features_cols]
shares = results_WU.predict(features_to_predict)
data['predicted_shares'] = shares
ukr_results_WU = data[data['Ukraine']==1][['country','pct_tot','predicted_shares']]
ukr_results_WU.to_csv('outputs/ukraine_model_results_WU.csv',index=False)

In [None]:
ukr_results_WU