In [62]:
# Dependencies
import numpy as np
import pandas as pd
from collections import Counter
from pathlib import Path

from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import confusion_matrix
from imblearn.metrics import classification_report_imbalanced

# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func


In [63]:
"postgresql://postgres:Foureyes1!@localhost/Capstone"

'postgresql://postgres:Foureyes1!@localhost/Capstone'

In [64]:
db_string = f"postgresql://postgres:Foureyes1!@127.0.0.1:5432/Capstone"

In [65]:
engine = create_engine(db_string)

In [66]:
# Create our session (link) from Python to the DB
session = Session(engine)

In [67]:
happy_2015_df = pd.read_sql_query("select * from world_happiness", con=engine)
happy_2015_df.head(10)

Unnamed: 0,region,happiness_rank,happiness_score,economy_gdp_per_capita,family,health_life_expectancy,freedom,trust_government_corruption,generosity
0,10,1,1.0,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678
1,10,2,1.0,1.30232,1.40223,0.94784,0.62877,0.14145,0.4363
2,10,3,1.0,1.32548,1.36058,0.87464,0.64938,0.48357,0.34139
3,10,4,1.0,1.459,1.33095,0.88521,0.66973,0.36503,0.34699
4,6,5,1.0,1.32629,1.32261,0.90563,0.63297,0.32957,0.45811
5,10,6,1.0,1.29025,1.31826,0.88911,0.64169,0.41372,0.23351
6,10,7,1.0,1.32944,1.28017,0.89284,0.61576,0.31814,0.4761
7,10,8,1.0,1.33171,1.28907,0.91087,0.6598,0.43844,0.36262
8,1,9,1.0,1.25018,1.31967,0.90837,0.63938,0.42922,0.47501
9,1,10,1.0,1.33358,1.30923,0.93156,0.65124,0.35637,0.43562


In [68]:
happy_2015_df = happy_2015_df.set_index("region")

In [69]:
happy_2015_df = happy_2015_df.drop(columns=["happiness_rank"], axis=1)

In [70]:
happy_2015_df.head(5)

Unnamed: 0_level_0,happiness_score,economy_gdp_per_capita,family,health_life_expectancy,freedom,trust_government_corruption,generosity
region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
10,1.0,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678
10,1.0,1.30232,1.40223,0.94784,0.62877,0.14145,0.4363
10,1.0,1.32548,1.36058,0.87464,0.64938,0.48357,0.34139
10,1.0,1.459,1.33095,0.88521,0.66973,0.36503,0.34699
6,1.0,1.32629,1.32261,0.90563,0.63297,0.32957,0.45811


In [71]:
# Create our features
X = happy_2015_df.drop('happiness_score', axis=1)
X.columns

y = happy_2015_df['happiness_score']

In [72]:
happy_2015_df.columns

Index(['happiness_score', 'economy_gdp_per_capita', 'family',
       'health_life_expectancy', 'freedom', 'trust_government_corruption',
       'generosity'],
      dtype='object')

In [73]:
y.value_counts()

1.0    93
0.0    65
Name: happiness_score, dtype: int64

In [74]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [75]:
print(Counter(y_train))
print(Counter(y_test))

Counter({1.0: 69, 0.0: 49})
Counter({1.0: 24, 0.0: 16})


In [76]:
# Random Forest Classifier

In [77]:
# Resample the training data with the BalancedRandomForestClassifier
from imblearn.ensemble import BalancedRandomForestClassifier

brfc = BalancedRandomForestClassifier(n_estimators=100, random_state=1)

brfc.fit(X_train, y_train)

BalancedRandomForestClassifier(random_state=1)

In [78]:
y_pred = brfc.predict(X_test)
balanced_accuracy_score(y_test, y_pred)

0.8333333333333333

In [79]:
confusion_matrix = confusion_matrix(y_test, y_pred)
print(confusion_matrix)

[[14  2]
 [ 5 19]]


In [80]:
print(classification_report_imbalanced(y_test, y_pred))

                   pre       rec       spe        f1       geo       iba       sup

        0.0       0.74      0.88      0.79      0.80      0.83      0.70        16
        1.0       0.90      0.79      0.88      0.84      0.83      0.69        24

avg / total       0.84      0.82      0.84      0.83      0.83      0.69        40



In [81]:
# Easy Ensemble AdaBoost Classifier

In [82]:
from imblearn.ensemble import EasyEnsembleClassifier

eec = EasyEnsembleClassifier(n_estimators=100, random_state=1)


eec.fit(X_train, y_train)


EasyEnsembleClassifier(n_estimators=100, random_state=1)

In [83]:
y_pred = eec.predict(X_test)
balanced_accuracy_score(y_test, y_pred)

0.8125

In [84]:
from sklearn.metrics import confusion_matrix

confusion_matrix = confusion_matrix(y_test, y_pred)
print(confusion_matrix)

[[14  2]
 [ 6 18]]


In [85]:
print(classification_report_imbalanced(y_test, y_pred))

                   pre       rec       spe        f1       geo       iba       sup

        0.0       0.70      0.88      0.75      0.78      0.81      0.66        16
        1.0       0.90      0.75      0.88      0.82      0.81      0.65        24

avg / total       0.82      0.80      0.82      0.80      0.81      0.65        40



In [86]:
# Random Oversampling

In [87]:
from imblearn.over_sampling import RandomOverSampler

ros = RandomOverSampler(random_state=1)

X_resampled, y_resampled = ros.fit_resample(X_train, y_train)
Counter(y_resampled)

Counter({1.0: 69, 0.0: 69})

In [88]:
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression(solver='lbfgs', random_state=1)

logreg.fit(X_resampled, y_resampled)

LogisticRegression(random_state=1)

In [94]:
y_pred = logreg.predict(X_test)

from sklearn.metrics import balanced_accuracy_score
balanced_accuracy_score(y_test, y_pred)


0.875

In [96]:
y_predict_train = logreg.predict(X_train)

from sklearn.metrics import balanced_accuracy_score
balanced_accuracy_score(y_train, y_predict_train)


0.8778467908902692

In [90]:
from sklearn.metrics import confusion_matrix
confusion_matrix = confusion_matrix(y_test, y_pred)
print(confusion_matrix)

[[14  2]
 [ 3 21]]


In [91]:
from imblearn.metrics import classification_report_imbalanced
print(classification_report_imbalanced(y_test, y_pred))

                   pre       rec       spe        f1       geo       iba       sup

        0.0       0.82      0.88      0.88      0.85      0.88      0.77        16
        1.0       0.91      0.88      0.88      0.89      0.88      0.77        24

avg / total       0.88      0.88      0.88      0.88      0.88      0.77        40



In [92]:
# Undersampling

In [97]:
from imblearn.under_sampling import ClusterCentroids

cc = ClusterCentroids(random_state=1)
X_resampled, y_resampled = cc.fit_resample(X_train, y_train)
Counter(y_resampled)

Counter({0.0: 49, 1.0: 49})

In [98]:
logreg.fit(X_resampled, y_resampled)

LogisticRegression(random_state=1)

In [99]:
y_pred = logreg.predict(X_test)
balanced_accuracy_score(y_test, y_pred)

0.875

In [100]:
from sklearn.metrics import confusion_matrix
confusion_matrix = confusion_matrix(y_test, y_pred)
print(confusion_matrix)

[[14  2]
 [ 3 21]]


In [101]:
print(classification_report_imbalanced(y_test, y_pred))

                   pre       rec       spe        f1       geo       iba       sup

        0.0       0.82      0.88      0.88      0.85      0.88      0.77        16
        1.0       0.91      0.88      0.88      0.89      0.88      0.77        24

avg / total       0.88      0.88      0.88      0.88      0.88      0.77        40



In [102]:
# Combination (Over and Under) Sampling

In [103]:
from imblearn.combine import SMOTEENN
smoteenn = SMOTEENN(random_state=1)
X_resampled, y_resampled = smoteenn.fit_resample(X_train, y_train)
Counter(y_resampled)

Counter({0.0: 51, 1.0: 46})

In [104]:
logreg.fit(X_resampled, y_resampled)

LogisticRegression(random_state=1)

In [105]:
y_pred = logreg.predict(X_test)
balanced_accuracy_score(y_test, y_pred)

0.90625

In [106]:
from sklearn.metrics import confusion_matrix

confusion_matrix(y_test, y_pred)

array([[15,  1],
       [ 3, 21]])

In [107]:
print(classification_report_imbalanced(y_test, y_pred))


                   pre       rec       spe        f1       geo       iba       sup

        0.0       0.83      0.94      0.88      0.88      0.91      0.83        16
        1.0       0.95      0.88      0.94      0.91      0.91      0.82        24

avg / total       0.91      0.90      0.91      0.90      0.91      0.82        40

