
# UNHCR Forcibly Displaced Populations Casptone Project

## Neural Network Classifier

---


In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.preprocessing import LabelBinarizer, StandardScaler
from sklearn.preprocessing import label_binarize
from sklearn import preprocessing
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.impute import SimpleImputer as Imputer
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import recall_score, precision_score, f1_score, classification_report
from sklearn.metrics import  plot_confusion_matrix, plot_roc_curve, plot_precision_recall_curve, roc_auc_score, average_precision_score
import scikitplot as skplt
import warnings

In [None]:
df = pd.read_csv('/Users/dayosangowawa/Desktop/GA/DSI20-lessons/projects/project-capstone/My Capstone/Forcibly displaced persons - Capstone Dayo Sangowawa/cleaned_unhcrdf_final.csv')
df.head()

In [None]:
df.drop(columns = ['Unnamed: 0'], inplace=True)

In [None]:
y = df.pop('accommodation_type')
X = pd.get_dummies(df, drop_first=True)

In [None]:
print('Baseline prediction: ', y.value_counts(normalize=True).max())

In [None]:
lb = preprocessing.LabelBinarizer()
y = lb.fit_transform(y)

In [None]:
y
#'0' if it is 'I 'and '1' if it is 'other'. 

In [None]:
y = y.ravel()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, stratify=y, test_size=0.2, random_state=1)

In [None]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
nnc = MLPClassifier(solver='lbfgs',
                    alpha=10**(-10),
                    hidden_layer_sizes=1,
                    activation='identity',
                    random_state=1,
                    batch_size='auto')

nnc.fit(X_train, y_train)

In [None]:
print('Accuracy score - train: ', nnc.score(X_train, y_train))
print('Accuracy score - test: ', nnc.score(X_test, y_test))
print('Mean CV score - train: ', cross_val_score(nnc, X_train, y_train, cv=5).mean())

### GridSearch

In [None]:
nnc_blank = MLPClassifier()

In [None]:
params = {'solver':['adam', 'sgd'],
            'hidden_layer_sizes':(range(1, 20)),
            'activation':['identity', 'relu', 'logistic', 'tanh'],
             'max_iter': [300]}

In [None]:
nnc_gs = GridSearchCV(estimator=nnc_blank, param_grid=params, cv=5)

In [None]:
nnc_gs.fit(X_train, y_train)

In [None]:
nnc_gs.best_params_

In [None]:
nnc_gs.best_score_

In [None]:
nnc_best = MLPClassifier(activation = 'logistic', hidden_layer_sizes = 2, solver ='lbfgs')

In [None]:
nnc_best.fit(X_train, y_train)

In [None]:
print('Accuracy score - train: ', nnc_best.score(X_train, y_train))
print('Accuracy score - test: ', nnc_best.score(X_test, y_test))
print('Mean CV score - train: ', cross_val_score(nnc_best, X_train, y_train, cv=5).mean())

In [None]:
plot_confusion_matrix(nnc_best, X_test, y_test, cmap='Blues', labels=[1, 0], values_format='.0f')
plt.show()

In [None]:
y_test_pred = nnc_best.predict(X_test)

In [None]:
print(classification_report(y_test, y_test_pred))

In [None]:
probabilities_train = nnc_best.predict_proba(X_train)

In [None]:
from matplotlib.colors import ListedColormap
cmap = ListedColormap(sns.color_palette("husl", len(nnc_best.classes_)))

In [None]:
skplt.metrics.plot_precision_recall(y_train, probabilities_train, cmap=cmap)
plt.show()

In [None]:
print('Average precision score: ', average_precision_score(y_train, probabilities_train[:, 1]))

In [None]:
skplt.metrics.plot_roc(y_train, probabilities_train, cmap=cmap)
plt.show()

In [None]:
print('Area under the curve : ', roc_auc_score(y_train, probabilities_train[:, 1]))