In [71]:
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from sklearn.model_selection import train_test_split
import csv
import pandas as pd
import numpy as np

In [72]:
def MAPE(y_true, y_pred): 
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

## Data Setup

In [73]:
# read in UN data
un_df = pd.read_csv('../data/UN_data_clean.csv')
# ground truth data for all of the models
y = np.array((un_df[(un_df['age_group'] == 'Total') & (un_df['sex'] == 'both sexes') & (un_df['year'] == 2019)]\
          ['migrant_pop']))

In [74]:
# read in combined fb_un_data
fb_df = pd.read_csv('../data/facebook_un_combined.csv')
predictors = fb_df.values [:, 2:177]
gt = fb_df['migrant_2019'].values

In [77]:
# hyperparameters
depth = 5
random = 0

### Simple Autoregressive Model

In [88]:
# format data
gt = y
X = fb_df[['un_total_age16_2017', 'un_total_age16_2015']].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [89]:
# random forest
rf_regr = RandomForestRegressor(max_depth=depth, random_state=random, criterion="mae")
rf_regr.fit(X_train, y_train)
y_pred = rf_regr.predict(X_test)
print('Mean Absolute Percentage Error: {0}'.format(MAPE(y_test, y_pred)))

Mean Absolute Percentage Error: 354.1300745781506


In [90]:
# adaboost 
ada_regr = AdaBoostRegressor(random_state=random)
ada_regr.fit(X_train, y_train)
y_pred = ada_regr.predict(X_test)
print('Mean Absolute Percentage Error: {0}'.format(MAPE(y_test, y_pred)))

Mean Absolute Percentage Error: 980.7322046110004


### Complete Model

In [78]:
X = predictors
y = gt
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

In [79]:
# random forest
rf_regr = RandomForestRegressor(max_depth=depth, random_state=random, criterion="mae")
rf_regr.fit(X_train, y_train)
y_pred = rf_regr.predict(X_test)
print('Mean Absolute Percentage Error: {0}'.format(MAPE(y_test, y_pred)))

Mean Absolute Percentage Error: 26.441598134479392


In [80]:
# adaboost 
ada_regr = AdaBoostRegressor(random_state=random)
ada_regr.fit(X_train, y_train)
y_pred = ada_regr.predict(X_test)
print('Mean Absolute Percentage Error: {0}'.format(MAPE(y_test, y_pred)))

Mean Absolute Percentage Error: 1229.4799041098702
