In [186]:
from configparser import ConfigParser
import psycopg2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from IPython.display import display
from sklearn.ensemble import RandomForestClassifier
from  sklearn import metrics
from sklearn.metrics import classification_report, recall_score, precision_score, accuracy_score
from imblearn.over_sampling import SMOTE


Import Data

In [187]:
x_train = pd.read_csv("x_train.csv")#read input data
y_train = pd.read_csv("y_train.csv")#read input data
x_test = pd.read_csv("x_test.csv")#read input data
y_test = pd.read_csv("y_test.csv")#read input data

In [188]:
x_train.drop(columns=x_train.columns[0], axis=1, inplace=True)
y_train.drop(columns=y_train.columns[0], axis=1, inplace=True)
x_test.drop(columns=x_test.columns[0], axis=1, inplace=True)
y_test.drop(columns=y_test.columns[0], axis=1, inplace=True)

In [189]:
RandomForest = RandomForestClassifier(n_estimators=1, max_depth=3)

In [190]:
RandomForest.fit(x_train, y_train.values.ravel())

RandomForestClassifier(max_depth=3, n_estimators=1)

In [191]:
prediction = RandomForest.predict(x_test)

In [192]:
print("Accuracy:", metrics.accuracy_score(y_test, prediction))
print("Recall - Macro:", metrics.recall_score(y_test, prediction, average="macro"))
print("Precision - Macro:", metrics.precision_score(y_test, prediction, average="macro"))
print("Recall - Micro:", metrics.recall_score(y_test, prediction, average="micro"))
print("Precision - Micro:", metrics.precision_score(y_test, prediction, average="micro"))

Accuracy: 0.9386973180076629
Recall - Macro: 0.8027472527472528
Precision - Macro: 0.9428571428571428
Recall - Micro: 0.9386973180076629
Precision - Micro: 0.9386973180076629


In [193]:
print(y_train.QOL_Measure.value_counts())
print(y_test.QOL_Measure.value_counts())

5    632
1    262
4    186
3    119
2     17
Name: QOL_Measure, dtype: int64
5    271
1    112
4     80
3     52
2      7
Name: QOL_Measure, dtype: int64


In [194]:
sm = SMOTE(random_state=42)
x_res, y_res = sm.fit_resample(x_train, y_train)

In [195]:
print(y_res.QOL_Measure.value_counts())
print(y_test.QOL_Measure.value_counts())

5    632
3    632
4    632
2    632
1    632
Name: QOL_Measure, dtype: int64
5    271
1    112
4     80
3     52
2      7
Name: QOL_Measure, dtype: int64


In [219]:
RandomForestSmote = RandomForestClassifier(n_estimators=3, max_depth=3)
RandomForestSmote.fit(x_res, y_res.values.ravel())

RandomForestClassifier(max_depth=3, n_estimators=3)

In [220]:
predictionSmote = RandomForestSmote.predict(x_test)

In [221]:
print("Accuracy:", metrics.accuracy_score(y_test, predictionSmote))
print("Recall - Macro:", metrics.recall_score(y_test, predictionSmote, average="macro"))
print("Precision - Macro:", metrics.precision_score(y_test, predictionSmote, average="macro"))
print("Recall - Micro:", metrics.recall_score(y_test, predictionSmote, average="micro"))
print("Precision - Micro:", metrics.precision_score(y_test, predictionSmote, average="micro"))

Accuracy: 0.9272030651340997
Recall - Macro: 0.9098914277604315
Precision - Macro: 0.7858571212154002
Recall - Micro: 0.9272030651340997
Precision - Micro: 0.9272030651340997


Import Reduced Data Set and Apply Smote

In [199]:
x_train_reduced = pd.read_csv("x_train_reduced.csv")#read input data
y_train_reduced = pd.read_csv("y_train_reduced.csv")#read input data
x_test_reduced = pd.read_csv("x_test_reduced.csv")#read input data
y_test_reduced = pd.read_csv("y_test_reduced.csv")#read input data

x_train_reduced.drop(columns=x_train_reduced.columns[0], axis=1, inplace=True)
y_train_reduced.drop(columns=y_train_reduced.columns[0], axis=1, inplace=True)
x_test_reduced.drop(columns=x_test_reduced.columns[0], axis=1, inplace=True)
y_test_reduced.drop(columns=y_test_reduced.columns[0], axis=1, inplace=True)

display(y_test.head())
sm_reduced = SMOTE(random_state=42)
x_res_reduced, y_res_reduced = sm_reduced.fit_resample(x_train_reduced, y_train_reduced)



Unnamed: 0,QOL_Measure
0,5
1,1
2,5
3,5
4,5


In [200]:
print(y_res_reduced.QOL_Measure.value_counts())
print(y_test_reduced.QOL_Measure.value_counts())

5    632
4    632
1    632
2    632
3    632
Name: QOL_Measure, dtype: int64
5    271
1    112
4     80
3     52
2      7
Name: QOL_Measure, dtype: int64


In [216]:
RandomForestSmoteReduced = RandomForestClassifier(n_estimators=3, max_depth=3)
RandomForestSmoteReduced.fit(x_res_reduced, y_res_reduced.values.ravel())

RandomForestClassifier(max_depth=3, n_estimators=3)

In [217]:
predictionSmoteReduced = RandomForestSmoteReduced.predict(x_test_reduced)

In [218]:
print("Accuracy:", metrics.accuracy_score(y_test_reduced, predictionSmoteReduced))
print("Recall - Macro:", metrics.recall_score(y_test_reduced, predictionSmoteReduced, average="macro"))
print("Precision - Macro:", metrics.precision_score(y_test_reduced, predictionSmoteReduced, average="macro"))
print("Recall - Micro:", metrics.recall_score(y_test_reduced, predictionSmoteReduced, average="micro"))
print("Precision - Micro:", metrics.precision_score(y_test_reduced, predictionSmoteReduced, average="micro"))

display(y_test_reduced.head())
print(predictionSmoteReduced)

Accuracy: 0.9176245210727969
Recall - Macro: 0.9221678966789668
Precision - Macro: 0.8181818181818181
Recall - Micro: 0.9176245210727969
Precision - Micro: 0.9176245210727969


Unnamed: 0,QOL_Measure
0,4
1,5
2,5
3,5
4,5


[4 5 5 5 5 4 5 5 5 1 1 3 1 1 4 3 5 3 2 1 3 3 3 5 5 5 5 1 5 1 5 5 3 4 3 5 5
 1 5 5 5 1 5 3 1 5 5 5 1 5 4 1 1 1 5 5 3 3 4 1 5 5 5 3 4 3 5 5 5 5 5 3 1 5
 5 5 5 5 2 3 5 5 3 1 3 3 3 1 4 5 3 5 5 1 3 1 5 5 5 5 4 5 5 5 4 3 5 4 3 3 5
 4 1 5 5 5 1 4 5 1 5 5 5 4 1 5 5 4 5 3 5 5 3 1 5 3 1 3 5 5 5 5 3 3 5 3 5 3
 5 5 5 5 1 5 5 4 5 5 1 3 5 5 5 3 5 1 1 5 3 5 3 5 4 4 4 1 5 1 5 1 4 1 3 4 3
 1 3 5 5 5 1 3 5 5 5 5 5 5 5 1 3 5 5 3 1 5 1 1 3 1 3 2 2 3 5 5 5 4 5 5 5 5
 5 5 5 1 5 5 4 5 5 1 3 3 5 5 5 5 1 5 1 1 4 5 1 4 3 4 5 3 1 5 5 5 4 4 5 4 5
 5 5 4 5 4 5 3 1 5 1 5 5 1 5 5 5 5 5 5 5 3 5 4 5 1 1 5 1 5 5 4 5 5 1 3 3 3
 5 1 5 5 5 2 5 5 4 4 2 5 3 1 5 3 1 5 5 1 1 5 4 5 1 1 4 5 5 5 5 1 5 5 5 1 2
 3 5 5 3 5 5 3 5 2 1 1 1 1 5 3 5 4 3 5 1 5 5 5 5 5 1 5 5 5 4 5 5 3 5 4 5 3
 5 1 5 2 5 5 5 1 1 5 3 5 3 5 5 5 3 5 3 3 1 5 5 1 5 4 4 5 5 5 3 3 3 5 5 5 3
 5 5 3 4 5 1 2 2 1 5 5 5 2 4 5 1 5 4 1 4 1 3 5 2 5 5 4 5 1 1 1 3 5 5 5 1 5
 1 5 5 1 1 5 1 5 5 1 5 4 5 5 5 5 1 1 3 1 5 1 4 5 1 3 5 5 1 1 3 4 3 4 1 1 5
 5 5 5 3 1 4 5 5 3 1 5 5 