In [1]:
# CML3014N Machine Learning Assignment 2
# Data Source:
#     1. Student Performance https://archive.ics.uci.edu/dataset/320/student+performance
#     2. Wine Quality https://archive.ics.uci.edu/dataset/186/wine+quality

# Dataset Description:
#    1. Student Performance mixed dataset with both numerical and categorical data
#       G1, G2, G3 - numerical from 0 to 20 (the target variable. G3 is the final grade)
#    2. Wine Quality dataset with numerical continuous
#       quality - score between 0 and 10 (the target variable)

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler, StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

pd.set_option("display.max_columns", None)

In [2]:
# Step 1: Load the dataset
wineRed_df = pd.read_csv('data/winequality-red.csv', delimiter=';')
wineWhite_df = pd.read_csv('data/winequality-white.csv', delimiter=';')
stdMath_df = pd.read_csv('data/student-mat.csv', delimiter=';')
stdPor_df = pd.read_csv('data/student-por.csv', delimiter=';')

# Display the first few rows of the dataset
print("Wine Red Dataset")
print("Total number of columns: ", wineRed_df.shape[1], "Total number of rows: ", wineRed_df.shape[0])
display(wineRed_df.head())

print("Wine White Dataset")
print("Total number of columns: ", wineWhite_df.shape[1], "Total number of rows: ", wineWhite_df.shape[0])
display(wineWhite_df.head())

print("Student Math Dataset")
print("Total number of columns: ", stdMath_df.shape[1], "Total number of rows: ", stdMath_df.shape[0])
display(stdMath_df.head())

print("Student Portuguese Dataset")
print("Total number of columns: ", stdPor_df.shape[1], "Total number of rows: ", stdPor_df.shape[0])
display(stdPor_df.head())

Wine Red Dataset
Total number of columns:  12 Total number of rows:  1599


Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


Wine White Dataset
Total number of columns:  12 Total number of rows:  4898


Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6


Student Math Dataset
Total number of columns:  33 Total number of rows:  395


Unnamed: 0,school,sex,age,address,famsize,Pstatus,Medu,Fedu,Mjob,Fjob,reason,guardian,traveltime,studytime,failures,schoolsup,famsup,paid,activities,nursery,higher,internet,romantic,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3
0,GP,F,18,U,GT3,A,4,4,at_home,teacher,course,mother,2,2,0,yes,no,no,no,yes,yes,no,no,4,3,4,1,1,3,6,5,6,6
1,GP,F,17,U,GT3,T,1,1,at_home,other,course,father,1,2,0,no,yes,no,no,no,yes,yes,no,5,3,3,1,1,3,4,5,5,6
2,GP,F,15,U,LE3,T,1,1,at_home,other,other,mother,1,2,3,yes,no,yes,no,yes,yes,yes,no,4,3,2,2,3,3,10,7,8,10
3,GP,F,15,U,GT3,T,4,2,health,services,home,mother,1,3,0,no,yes,yes,yes,yes,yes,yes,yes,3,2,2,1,1,5,2,15,14,15
4,GP,F,16,U,GT3,T,3,3,other,other,home,father,1,2,0,no,yes,yes,no,yes,yes,no,no,4,3,2,1,2,5,4,6,10,10


Student Portuguese Dataset
Total number of columns:  33 Total number of rows:  649


Unnamed: 0,school,sex,age,address,famsize,Pstatus,Medu,Fedu,Mjob,Fjob,reason,guardian,traveltime,studytime,failures,schoolsup,famsup,paid,activities,nursery,higher,internet,romantic,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3
0,GP,F,18,U,GT3,A,4,4,at_home,teacher,course,mother,2,2,0,yes,no,no,no,yes,yes,no,no,4,3,4,1,1,3,4,0,11,11
1,GP,F,17,U,GT3,T,1,1,at_home,other,course,father,1,2,0,no,yes,no,no,no,yes,yes,no,5,3,3,1,1,3,2,9,11,11
2,GP,F,15,U,LE3,T,1,1,at_home,other,other,mother,1,2,0,yes,no,no,no,yes,yes,yes,no,4,3,2,2,3,3,6,12,13,12
3,GP,F,15,U,GT3,T,4,2,health,services,home,mother,1,3,0,no,yes,no,yes,yes,yes,yes,yes,3,2,2,1,1,5,0,14,14,14
4,GP,F,16,U,GT3,T,3,3,other,other,home,father,1,2,0,no,yes,no,no,yes,yes,no,no,4,3,2,1,2,5,0,11,13,13


In [3]:
# Step 2: Preprocess the dataset
# * The model is MLP, only handle numerical data.
# * Wine data has only numerical data, each feature is in different scale, thus need to normalize the data.
# * Student data has both numerical and categorical data, need to handle categorical data with string indexer and one-hot encoding.

# Step 2.1 Preprocess the wine dataset
# Prepare both normalized and standardized dataset
scaler = MinMaxScaler()
wineRed_df_norm = pd.DataFrame(scaler.fit_transform(wineRed_df), columns=wineRed_df.columns)
wineWhite_df_norm = pd.DataFrame(scaler.fit_transform(wineWhite_df), columns=wineWhite_df.columns)

scaler = StandardScaler()
wineRed_df_std = pd.DataFrame(scaler.fit_transform(wineRed_df), columns=wineRed_df.columns)
wineWhite_df_std = pd.DataFrame(scaler.fit_transform(wineWhite_df), columns=wineWhite_df.columns)

print("Wine Red Dataset Normalized")
print("Total number of columns: ", wineRed_df_norm.shape[1], "Total number of rows: ", wineRed_df_norm.shape[0])
display(wineRed_df_norm.head())
print("Wine Red Dataset Standardized")
print("Total number of columns: ", wineRed_df_std.shape[1], "Total number of rows: ", wineRed_df_std.shape[0])
display(wineRed_df_std.head())

print("Wine White Dataset Normalized")
print("Total number of columns: ", wineWhite_df_norm.shape[1], "Total number of rows: ", wineWhite_df_norm.shape[0])
display(wineWhite_df_norm.head())
print("Wine White Dataset Standardized")
print("Total number of columns: ", wineWhite_df_std.shape[1], "Total number of rows: ", wineWhite_df_std.shape[0])
display(wineWhite_df_std.head())

# Step 2.2 Preprocess the student dataset
# Handle categorical data using label encoder
# Column to process School, sex, address, famsize, pstatus, medu, fedu, mjob, fjob, reason, guardian, schoolsup, famsup, paid, activities, nursery, higher, internet, romantic

# First encode the categorical data
columns = ['school', 'sex', 'address', 'famsize', 'Pstatus', 'Mjob', 'Fjob', 'reason', 'guardian', 'schoolsup', 'famsup', 'paid', 'activities', 'nursery', 'higher', 'internet', 'romantic']
numerical_columns = ['age', 'Medu', 'Fedu', 'traveltime', 'studytime', 'failures', 'famrel', 'freetime', 'goout', 'Dalc', 'Walc', 'health', 'absences', 'G1', 'G2', 'G3']
# Label encoding for categorical data
labelEncoder = LabelEncoder()
stdMath_df_le = stdMath_df.copy()
stdPor_df_le = stdPor_df.copy()
for column in columns:
    stdMath_df_le[column] = labelEncoder.fit_transform(stdMath_df[column])
    stdPor_df_le[column] = labelEncoder.fit_transform(stdPor_df[column])

oneHotEncoder = OneHotEncoder(sparse_output=False)
stdMath_df_ohe = pd.DataFrame(oneHotEncoder.fit_transform(stdMath_df[columns]), columns=oneHotEncoder.get_feature_names_out(columns))
stdPor_df_ohe = pd.DataFrame(oneHotEncoder.fit_transform(stdPor_df[columns]), columns=oneHotEncoder.get_feature_names_out(columns))
stdMath_df_ohe = pd.concat([stdMath_df_ohe, stdMath_df[numerical_columns]], axis=1)
stdPor_df_ohe = pd.concat([stdPor_df_ohe, stdPor_df[numerical_columns]], axis=1)

# Second normalize and standardize the dataset
scaler = MinMaxScaler()
stdMath_df_le_norm = pd.DataFrame(scaler.fit_transform(stdMath_df_le), columns=stdMath_df_le.columns)
stdMath_df_ohe_norm = pd.DataFrame(scaler.fit_transform(stdMath_df_ohe), columns=stdMath_df_ohe.columns)
stdPor_df_le_norm = pd.DataFrame(scaler.fit_transform(stdPor_df_le), columns=stdPor_df_le.columns)
stdPor_df_ohe_norm = pd.DataFrame(scaler.fit_transform(stdPor_df_ohe), columns=stdPor_df_ohe.columns)

scaler = StandardScaler()
stdMath_df_le_std = pd.DataFrame(scaler.fit_transform(stdMath_df_le), columns=stdMath_df_le.columns)
stdMath_df_ohe_std = pd.DataFrame(scaler.fit_transform(stdMath_df_ohe), columns=stdMath_df_ohe.columns)
stdPor_df_le_std = pd.DataFrame(scaler.fit_transform(stdPor_df_le), columns=stdPor_df_le.columns)
stdPor_df_ohe_std = pd.DataFrame(scaler.fit_transform(stdPor_df_ohe), columns=stdPor_df_ohe.columns)

# For student Math dataset
print("Student Math Dataset Label Encoded")
print("Total number of columns: ", stdMath_df_le.shape[1], "Total number of rows: ", stdMath_df_le.shape[0])
display(stdMath_df_le.head())
print("Student Math Dataset Label Encoded Normalized")
print("Total number of columns: ", stdMath_df_le_norm.shape[1], "Total number of rows: ", stdMath_df_le_norm.shape[0])
display(stdMath_df_le_norm.head())
print("Student Math Dataset Label Encoded Standardized")
print("Total number of columns: ", stdMath_df_le_std.shape[1], "Total number of rows: ", stdMath_df_le_std.shape[0])
display(stdMath_df_le_std.head())

print("Student Math Dataset One Hot Encoded")
print("Total number of columns: ", stdMath_df_ohe.shape[1], "Total number of rows: ", stdMath_df_ohe.shape[0])
display(stdMath_df_ohe.head())
print("Student Math Dataset One Hot Encoded Normalized")
print("Total number of columns: ", stdMath_df_ohe_norm.shape[1], "Total number of rows: ", stdMath_df_ohe_norm.shape[0])
display(stdMath_df_ohe_norm.head())
print("Student Math Dataset One Hot Encoded Standardized")
print("Total number of columns: ", stdMath_df_ohe_std.shape[1], "Total number of rows: ", stdMath_df_ohe_std.shape[0])
display(stdMath_df_ohe_std.head())

# For student Portuguese dataset
print("Student Portuguese Dataset Label Encoded")
print("Total number of columns: ", stdPor_df_le.shape[1], "Total number of rows: ", stdPor_df_le.shape[0])
display(stdPor_df_le.head())
print("Student Portuguese Dataset Label Encoded Normalized")
print("Total number of columns: ", stdPor_df_le_norm.shape[1], "Total number of rows: ", stdPor_df_le_norm.shape[0])
display(stdPor_df_le_norm.head())
print("Student Portuguese Dataset Label Encoded Standardized")
print("Total number of columns: ", stdPor_df_le_std.shape[1], "Total number of rows: ", stdPor_df_le_std.shape[0])
display(stdPor_df_le_std.head())

print("Student Portuguese Dataset One Hot Encoded")
print("Total number of columns: ", stdPor_df_ohe.shape[1], "Total number of rows: ", stdPor_df_ohe.shape[0])
display(stdPor_df_ohe.head())
print("Student Portuguese Dataset One Hot Encoded Normalized")
print("Total number of columns: ", stdPor_df_ohe_norm.shape[1], "Total number of rows: ", stdPor_df_ohe_norm.shape[0])
display(stdPor_df_ohe_norm.head())
print("Student Portuguese Dataset One Hot Encoded Standardized")
print("Total number of columns: ", stdPor_df_ohe_std.shape[1], "Total number of rows: ", stdPor_df_ohe_std.shape[0])
display(stdPor_df_ohe_std.head())

Wine Red Dataset Normalized
Total number of columns:  12 Total number of rows:  1599


Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,0.247788,0.39726,0.0,0.068493,0.106845,0.140845,0.09894,0.567548,0.606299,0.137725,0.153846,0.4
1,0.283186,0.520548,0.0,0.116438,0.143573,0.338028,0.215548,0.494126,0.362205,0.209581,0.215385,0.4
2,0.283186,0.438356,0.04,0.09589,0.133556,0.197183,0.169611,0.508811,0.409449,0.191617,0.215385,0.4
3,0.584071,0.109589,0.56,0.068493,0.105175,0.225352,0.190813,0.582232,0.330709,0.149701,0.215385,0.6
4,0.247788,0.39726,0.0,0.068493,0.106845,0.140845,0.09894,0.567548,0.606299,0.137725,0.153846,0.4


Wine Red Dataset Standardized
Total number of columns:  12 Total number of rows:  1599


Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,-0.52836,0.961877,-1.391472,-0.453218,-0.243707,-0.466193,-0.379133,0.558274,1.288643,-0.579207,-0.960246,-0.787823
1,-0.298547,1.967442,-1.391472,0.043416,0.223875,0.872638,0.624363,0.028261,-0.719933,0.12895,-0.584777,-0.787823
2,-0.298547,1.297065,-1.18607,-0.169427,0.096353,-0.083669,0.229047,0.134264,-0.331177,-0.048089,-0.584777,-0.787823
3,1.654856,-1.384443,1.484154,-0.453218,-0.26496,0.107592,0.4115,0.664277,-0.979104,-0.46118,-0.584777,0.450848
4,-0.52836,0.961877,-1.391472,-0.453218,-0.243707,-0.466193,-0.379133,0.558274,1.288643,-0.579207,-0.960246,-0.787823


Wine White Dataset Normalized
Total number of columns:  12 Total number of rows:  4898


Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,0.307692,0.186275,0.216867,0.308282,0.106825,0.149826,0.37355,0.267785,0.254545,0.267442,0.129032,0.5
1,0.240385,0.215686,0.204819,0.015337,0.118694,0.041812,0.285383,0.132832,0.527273,0.313953,0.241935,0.5
2,0.413462,0.196078,0.240964,0.096626,0.121662,0.097561,0.204176,0.154039,0.490909,0.255814,0.33871,0.5
3,0.326923,0.147059,0.192771,0.121166,0.145401,0.156794,0.410673,0.163678,0.427273,0.209302,0.306452,0.5
4,0.326923,0.147059,0.192771,0.121166,0.145401,0.156794,0.410673,0.163678,0.427273,0.209302,0.306452,0.5


Wine White Dataset Standardized
Total number of columns:  12 Total number of rows:  4898


Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,0.172097,-0.08177,0.21328,2.821349,-0.035355,0.569932,0.744565,2.331512,-1.246921,-0.349184,-1.393152,0.13787
1,-0.657501,0.215896,0.048001,-0.944765,0.147747,-1.253019,-0.149685,-0.009154,0.740029,0.001342,-0.824276,0.13787
2,1.475751,0.017452,0.543838,0.100282,0.193523,-0.312141,-0.973336,0.358665,0.475102,-0.436816,-0.336667,0.13787
3,0.409125,-0.478657,-0.117278,0.415768,0.559727,0.687541,1.121091,0.525855,0.01148,-0.787342,-0.499203,0.13787
4,0.409125,-0.478657,-0.117278,0.415768,0.559727,0.687541,1.121091,0.525855,0.01148,-0.787342,-0.499203,0.13787


Student Math Dataset Label Encoded
Total number of columns:  33 Total number of rows:  395


Unnamed: 0,school,sex,age,address,famsize,Pstatus,Medu,Fedu,Mjob,Fjob,reason,guardian,traveltime,studytime,failures,schoolsup,famsup,paid,activities,nursery,higher,internet,romantic,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3
0,0,0,18,1,0,0,4,4,0,4,0,1,2,2,0,1,0,0,0,1,1,0,0,4,3,4,1,1,3,6,5,6,6
1,0,0,17,1,0,1,1,1,0,2,0,0,1,2,0,0,1,0,0,0,1,1,0,5,3,3,1,1,3,4,5,5,6
2,0,0,15,1,1,1,1,1,0,2,2,1,1,2,3,1,0,1,0,1,1,1,0,4,3,2,2,3,3,10,7,8,10
3,0,0,15,1,0,1,4,2,1,3,1,1,1,3,0,0,1,1,1,1,1,1,1,3,2,2,1,1,5,2,15,14,15
4,0,0,16,1,0,1,3,3,2,2,1,0,1,2,0,0,1,1,0,1,1,0,0,4,3,2,1,2,5,4,6,10,10


Student Math Dataset Label Encoded Normalized
Total number of columns:  33 Total number of rows:  395


Unnamed: 0,school,sex,age,address,famsize,Pstatus,Medu,Fedu,Mjob,Fjob,reason,guardian,traveltime,studytime,failures,schoolsup,famsup,paid,activities,nursery,higher,internet,romantic,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3
0,0.0,0.0,0.428571,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.5,0.333333,0.333333,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.75,0.5,0.75,0.0,0.0,0.5,0.08,0.125,0.315789,0.3
1,0.0,0.0,0.285714,1.0,0.0,1.0,0.25,0.25,0.0,0.5,0.0,0.0,0.0,0.333333,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.5,0.5,0.0,0.0,0.5,0.053333,0.125,0.263158,0.3
2,0.0,0.0,0.0,1.0,1.0,1.0,0.25,0.25,0.0,0.5,0.666667,0.5,0.0,0.333333,1.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.75,0.5,0.25,0.25,0.5,0.5,0.133333,0.25,0.421053,0.5
3,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.5,0.25,0.75,0.333333,0.5,0.0,0.666667,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.5,0.25,0.25,0.0,0.0,1.0,0.026667,0.75,0.736842,0.75
4,0.0,0.0,0.142857,1.0,0.0,1.0,0.75,0.75,0.5,0.5,0.333333,0.0,0.0,0.333333,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.75,0.5,0.25,0.0,0.25,1.0,0.053333,0.1875,0.526316,0.5


Student Math Dataset Label Encoded Standardized
Total number of columns:  33 Total number of rows:  395


Unnamed: 0,school,sex,age,address,famsize,Pstatus,Medu,Fedu,Mjob,Fjob,reason,guardian,traveltime,studytime,failures,schoolsup,famsup,paid,activities,nursery,higher,internet,romantic,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3
0,-0.36305,-0.948176,1.023046,0.535392,-0.636941,-2.938392,1.143856,1.360371,-1.769793,1.993149,-1.040599,0.273945,0.792251,-0.042286,-0.449944,2.597133,-1.257656,-0.919671,-1.017881,0.507899,0.23094,-2.232677,-0.70845,0.062194,-0.23601,0.801479,-0.540699,-1.003789,-0.399289,0.036424,-1.782467,-1.254791,-0.964934
1,-0.36305,-0.948176,0.23838,0.535392,-0.636941,0.340322,-1.600009,-1.39997,-1.769793,-0.325831,-1.040599,-1.591714,-0.643249,-0.042286,-0.449944,-0.38504,0.79513,-0.919671,-1.017881,-1.968894,0.23094,0.447893,-0.70845,1.17886,-0.23601,-0.097908,-0.540699,-1.003789,-0.399289,-0.213796,-1.782467,-1.520979,-0.964934
2,-0.36305,-0.948176,-1.330954,0.535392,1.570004,0.340322,-1.600009,-1.39997,-1.769793,-0.325831,0.616806,0.273945,-0.643249,-0.042286,3.589323,2.597133,-1.257656,1.087346,-1.017881,0.507899,0.23094,0.447893,-0.70845,0.062194,-0.23601,-0.997295,0.583385,0.5511,-0.399289,0.536865,-1.179147,-0.722415,-0.090739
3,-0.36305,-0.948176,-1.330954,0.535392,-0.636941,0.340322,1.143856,-0.479857,-0.954077,0.833659,-0.211896,0.273945,-0.643249,1.150779,-0.449944,-0.38504,0.79513,1.087346,0.982433,0.507899,0.23094,0.447893,1.411533,-1.054472,-1.238419,-0.997295,-0.540699,-1.003789,1.04107,-0.464016,1.234133,0.874715,1.002004
4,-0.36305,-0.948176,-0.546287,0.535392,-0.636941,0.340322,0.229234,0.440257,-0.138362,-0.325831,-0.211896,-1.591714,-0.643249,-0.042286,-0.449944,-0.38504,0.79513,1.087346,-1.017881,0.507899,0.23094,-2.232677,-0.70845,0.062194,-0.23601,-0.997295,-0.540699,-0.226345,1.04107,-0.213796,-1.480807,-0.190038,-0.090739


Student Math Dataset One Hot Encoded
Total number of columns:  59 Total number of rows:  395


Unnamed: 0,school_GP,school_MS,sex_F,sex_M,address_R,address_U,famsize_GT3,famsize_LE3,Pstatus_A,Pstatus_T,Mjob_at_home,Mjob_health,Mjob_other,Mjob_services,Mjob_teacher,Fjob_at_home,Fjob_health,Fjob_other,Fjob_services,Fjob_teacher,reason_course,reason_home,reason_other,reason_reputation,guardian_father,guardian_mother,guardian_other,schoolsup_no,schoolsup_yes,famsup_no,famsup_yes,paid_no,paid_yes,activities_no,activities_yes,nursery_no,nursery_yes,higher_no,higher_yes,internet_no,internet_yes,romantic_no,romantic_yes,age,Medu,Fedu,traveltime,studytime,failures,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3
0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,18,4,4,2,2,0,4,3,4,1,1,3,6,5,6,6
1,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,17,1,1,1,2,0,5,3,3,1,1,3,4,5,5,6
2,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,15,1,1,1,2,3,4,3,2,2,3,3,10,7,8,10
3,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,15,4,2,1,3,0,3,2,2,1,1,5,2,15,14,15
4,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,16,3,3,1,2,0,4,3,2,1,2,5,4,6,10,10


Student Math Dataset One Hot Encoded Normalized
Total number of columns:  59 Total number of rows:  395


Unnamed: 0,school_GP,school_MS,sex_F,sex_M,address_R,address_U,famsize_GT3,famsize_LE3,Pstatus_A,Pstatus_T,Mjob_at_home,Mjob_health,Mjob_other,Mjob_services,Mjob_teacher,Fjob_at_home,Fjob_health,Fjob_other,Fjob_services,Fjob_teacher,reason_course,reason_home,reason_other,reason_reputation,guardian_father,guardian_mother,guardian_other,schoolsup_no,schoolsup_yes,famsup_no,famsup_yes,paid_no,paid_yes,activities_no,activities_yes,nursery_no,nursery_yes,higher_no,higher_yes,internet_no,internet_yes,romantic_no,romantic_yes,age,Medu,Fedu,traveltime,studytime,failures,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3
0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.428571,1.0,1.0,0.333333,0.333333,0.0,0.75,0.5,0.75,0.0,0.0,0.5,0.08,0.125,0.315789,0.3
1,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.285714,0.25,0.25,0.0,0.333333,0.0,1.0,0.5,0.5,0.0,0.0,0.5,0.053333,0.125,0.263158,0.3
2,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.25,0.25,0.0,0.333333,1.0,0.75,0.5,0.25,0.25,0.5,0.5,0.133333,0.25,0.421053,0.5
3,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.5,0.0,0.666667,0.0,0.5,0.25,0.25,0.0,0.0,1.0,0.026667,0.75,0.736842,0.75
4,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.142857,0.75,0.75,0.0,0.333333,0.0,0.75,0.5,0.25,0.0,0.25,1.0,0.053333,0.1875,0.526316,0.5


Student Math Dataset One Hot Encoded Standardized
Total number of columns:  59 Total number of rows:  395


Unnamed: 0,school_GP,school_MS,sex_F,sex_M,address_R,address_U,famsize_GT3,famsize_LE3,Pstatus_A,Pstatus_T,Mjob_at_home,Mjob_health,Mjob_other,Mjob_services,Mjob_teacher,Fjob_at_home,Fjob_health,Fjob_other,Fjob_services,Fjob_teacher,reason_course,reason_home,reason_other,reason_reputation,guardian_father,guardian_mother,guardian_other,schoolsup_no,schoolsup_yes,famsup_no,famsup_yes,paid_no,paid_yes,activities_no,activities_yes,nursery_no,nursery_yes,higher_no,higher_yes,internet_no,internet_yes,romantic_no,romantic_yes,age,Medu,Fedu,traveltime,studytime,failures,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3
0,0.36305,-0.36305,0.948176,-0.948176,-0.535392,0.535392,0.636941,-0.636941,2.938392,-2.938392,2.386402,-0.306892,-0.745062,-0.593919,-0.414858,-0.23094,-0.218507,-1.104129,-0.625176,3.552561,1.313064,-0.617348,-0.316668,-0.601722,-0.543214,0.668496,-0.296908,-2.597133,2.597133,1.257656,-1.257656,0.919671,-0.919671,1.017881,-1.017881,-0.507899,0.507899,-0.23094,0.23094,2.232677,-2.232677,0.70845,-0.70845,1.023046,1.143856,1.360371,0.792251,-0.042286,-0.449944,0.062194,-0.23601,0.801479,-0.540699,-1.003789,-0.399289,0.036424,-1.782467,-1.254791,-0.964934
1,0.36305,-0.36305,0.948176,-0.948176,-0.535392,0.535392,0.636941,-0.636941,-0.340322,0.340322,2.386402,-0.306892,-0.745062,-0.593919,-0.414858,-0.23094,-0.218507,0.905691,-0.625176,-0.281487,1.313064,-0.617348,-0.316668,-0.601722,1.840894,-1.495896,-0.296908,0.38504,-0.38504,-0.79513,0.79513,0.919671,-0.919671,1.017881,-1.017881,1.968894,-1.968894,-0.23094,0.23094,-0.447893,0.447893,0.70845,-0.70845,0.23838,-1.600009,-1.39997,-0.643249,-0.042286,-0.449944,1.17886,-0.23601,-0.097908,-0.540699,-1.003789,-0.399289,-0.213796,-1.782467,-1.520979,-0.964934
2,0.36305,-0.36305,0.948176,-0.948176,-0.535392,0.535392,-1.570004,1.570004,-0.340322,0.340322,2.386402,-0.306892,-0.745062,-0.593919,-0.414858,-0.23094,-0.218507,0.905691,-0.625176,-0.281487,-0.761577,-0.617348,3.157883,-0.601722,-0.543214,0.668496,-0.296908,-2.597133,2.597133,1.257656,-1.257656,-1.087346,1.087346,1.017881,-1.017881,-0.507899,0.507899,-0.23094,0.23094,-0.447893,0.447893,0.70845,-0.70845,-1.330954,-1.600009,-1.39997,-0.643249,-0.042286,3.589323,0.062194,-0.23601,-0.997295,0.583385,0.5511,-0.399289,0.536865,-1.179147,-0.722415,-0.090739
3,0.36305,-0.36305,0.948176,-0.948176,-0.535392,0.535392,0.636941,-0.636941,-0.340322,0.340322,-0.419041,3.258473,-0.745062,-0.593919,-0.414858,-0.23094,-0.218507,-1.104129,1.599549,-0.281487,-0.761577,1.619831,-0.316668,-0.601722,-0.543214,0.668496,-0.296908,0.38504,-0.38504,-0.79513,0.79513,-1.087346,1.087346,-0.982433,0.982433,-0.507899,0.507899,-0.23094,0.23094,-0.447893,0.447893,-1.411533,1.411533,-1.330954,1.143856,-0.479857,-0.643249,1.150779,-0.449944,-1.054472,-1.238419,-0.997295,-0.540699,-1.003789,1.04107,-0.464016,1.234133,0.874715,1.002004
4,0.36305,-0.36305,0.948176,-0.948176,-0.535392,0.535392,0.636941,-0.636941,-0.340322,0.340322,-0.419041,-0.306892,1.342169,-0.593919,-0.414858,-0.23094,-0.218507,0.905691,-0.625176,-0.281487,-0.761577,1.619831,-0.316668,-0.601722,1.840894,-1.495896,-0.296908,0.38504,-0.38504,-0.79513,0.79513,-1.087346,1.087346,1.017881,-1.017881,-0.507899,0.507899,-0.23094,0.23094,2.232677,-2.232677,0.70845,-0.70845,-0.546287,0.229234,0.440257,-0.643249,-0.042286,-0.449944,0.062194,-0.23601,-0.997295,-0.540699,-0.226345,1.04107,-0.213796,-1.480807,-0.190038,-0.090739


Student Portuguese Dataset Label Encoded
Total number of columns:  33 Total number of rows:  649


Unnamed: 0,school,sex,age,address,famsize,Pstatus,Medu,Fedu,Mjob,Fjob,reason,guardian,traveltime,studytime,failures,schoolsup,famsup,paid,activities,nursery,higher,internet,romantic,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3
0,0,0,18,1,0,0,4,4,0,4,0,1,2,2,0,1,0,0,0,1,1,0,0,4,3,4,1,1,3,4,0,11,11
1,0,0,17,1,0,1,1,1,0,2,0,0,1,2,0,0,1,0,0,0,1,1,0,5,3,3,1,1,3,2,9,11,11
2,0,0,15,1,1,1,1,1,0,2,2,1,1,2,0,1,0,0,0,1,1,1,0,4,3,2,2,3,3,6,12,13,12
3,0,0,15,1,0,1,4,2,1,3,1,1,1,3,0,0,1,0,1,1,1,1,1,3,2,2,1,1,5,0,14,14,14
4,0,0,16,1,0,1,3,3,2,2,1,0,1,2,0,0,1,0,0,1,1,0,0,4,3,2,1,2,5,0,11,13,13


Student Portuguese Dataset Label Encoded Normalized
Total number of columns:  33 Total number of rows:  649


Unnamed: 0,school,sex,age,address,famsize,Pstatus,Medu,Fedu,Mjob,Fjob,reason,guardian,traveltime,studytime,failures,schoolsup,famsup,paid,activities,nursery,higher,internet,romantic,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3
0,0.0,0.0,0.428571,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.5,0.333333,0.333333,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.75,0.5,0.75,0.0,0.0,0.5,0.125,0.0,0.578947,0.578947
1,0.0,0.0,0.285714,1.0,0.0,1.0,0.25,0.25,0.0,0.5,0.0,0.0,0.0,0.333333,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.5,0.5,0.0,0.0,0.5,0.0625,0.473684,0.578947,0.578947
2,0.0,0.0,0.0,1.0,1.0,1.0,0.25,0.25,0.0,0.5,0.666667,0.5,0.0,0.333333,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.75,0.5,0.25,0.25,0.5,0.5,0.1875,0.631579,0.684211,0.631579
3,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.5,0.25,0.75,0.333333,0.5,0.0,0.666667,0.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.5,0.25,0.25,0.0,0.0,1.0,0.0,0.736842,0.736842,0.736842
4,0.0,0.0,0.142857,1.0,0.0,1.0,0.75,0.75,0.5,0.5,0.333333,0.0,0.0,0.333333,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.75,0.5,0.25,0.0,0.25,1.0,0.0,0.578947,0.684211,0.684211


Student Portuguese Dataset Label Encoded Standardized
Total number of columns:  33 Total number of rows:  649


Unnamed: 0,school,sex,age,address,famsize,Pstatus,Medu,Fedu,Mjob,Fjob,reason,guardian,traveltime,studytime,failures,schoolsup,famsup,paid,activities,nursery,higher,internet,romantic,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3
0,-0.730944,-0.833377,1.031695,0.660182,-0.648175,-2.666927,1.310216,1.540715,-1.556453,2.057248,-0.933974,0.332648,0.576718,0.083653,-0.374305,2.923032,-1.259229,-0.252853,-0.97114,0.495663,0.344914,-1.816043,-0.763496,0.072606,-0.171647,0.693785,-0.543555,-0.997695,-0.371042,0.073433,-4.15547,-0.19582,-0.280658
1,-0.730944,-0.833377,0.210137,0.660182,-0.648175,0.374963,-1.336039,-1.188832,-1.556453,-0.260728,-0.933974,-1.594926,-0.760032,0.083653,-0.374305,-0.34211,0.794137,-0.252853,-0.97114,-2.017502,0.344914,0.550648,-0.763496,1.119748,-0.171647,-0.15738,-0.543555,-0.997695,-0.371042,-0.357863,-0.87457,-0.19582,-0.280658
2,-0.730944,-0.833377,-1.43298,0.660182,1.542792,0.374963,-1.336039,-1.188832,-1.556453,-0.260728,0.745109,0.332648,-0.760032,0.083653,-0.374305,2.923032,-1.259229,-0.252853,-0.97114,0.495663,0.344914,0.550648,-0.763496,0.072606,-0.171647,-1.008546,0.538553,0.560678,-0.371042,0.50473,0.219064,0.491137,0.029116
3,-0.730944,-0.833377,-1.43298,0.660182,-0.648175,0.374963,1.310216,-0.278983,-0.754756,0.89826,-0.094432,0.332648,-0.760032,1.290114,-0.374305,-0.34211,0.794137,-0.252853,1.029717,0.495663,0.344914,0.550648,1.309764,-0.974536,-1.123771,-1.008546,-0.543555,-0.997695,1.012903,-0.789159,0.948153,0.834615,0.648663
4,-0.730944,-0.833377,-0.611422,0.660182,-0.648175,0.374963,0.428131,0.630866,0.046941,-0.260728,-0.094432,-1.594926,-0.760032,0.083653,-0.374305,-0.34211,0.794137,-0.252853,-0.97114,0.495663,0.344914,-1.816043,-0.763496,0.072606,-0.171647,-1.008546,-0.543555,-0.218508,1.012903,-0.789159,-0.145481,0.491137,0.338889


Student Portuguese Dataset One Hot Encoded
Total number of columns:  59 Total number of rows:  649


Unnamed: 0,school_GP,school_MS,sex_F,sex_M,address_R,address_U,famsize_GT3,famsize_LE3,Pstatus_A,Pstatus_T,Mjob_at_home,Mjob_health,Mjob_other,Mjob_services,Mjob_teacher,Fjob_at_home,Fjob_health,Fjob_other,Fjob_services,Fjob_teacher,reason_course,reason_home,reason_other,reason_reputation,guardian_father,guardian_mother,guardian_other,schoolsup_no,schoolsup_yes,famsup_no,famsup_yes,paid_no,paid_yes,activities_no,activities_yes,nursery_no,nursery_yes,higher_no,higher_yes,internet_no,internet_yes,romantic_no,romantic_yes,age,Medu,Fedu,traveltime,studytime,failures,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3
0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,18,4,4,2,2,0,4,3,4,1,1,3,4,0,11,11
1,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,17,1,1,1,2,0,5,3,3,1,1,3,2,9,11,11
2,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,15,1,1,1,2,0,4,3,2,2,3,3,6,12,13,12
3,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,15,4,2,1,3,0,3,2,2,1,1,5,0,14,14,14
4,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,16,3,3,1,2,0,4,3,2,1,2,5,0,11,13,13


Student Portuguese Dataset One Hot Encoded Normalized
Total number of columns:  59 Total number of rows:  649


Unnamed: 0,school_GP,school_MS,sex_F,sex_M,address_R,address_U,famsize_GT3,famsize_LE3,Pstatus_A,Pstatus_T,Mjob_at_home,Mjob_health,Mjob_other,Mjob_services,Mjob_teacher,Fjob_at_home,Fjob_health,Fjob_other,Fjob_services,Fjob_teacher,reason_course,reason_home,reason_other,reason_reputation,guardian_father,guardian_mother,guardian_other,schoolsup_no,schoolsup_yes,famsup_no,famsup_yes,paid_no,paid_yes,activities_no,activities_yes,nursery_no,nursery_yes,higher_no,higher_yes,internet_no,internet_yes,romantic_no,romantic_yes,age,Medu,Fedu,traveltime,studytime,failures,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3
0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.428571,1.0,1.0,0.333333,0.333333,0.0,0.75,0.5,0.75,0.0,0.0,0.5,0.125,0.0,0.578947,0.578947
1,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.285714,0.25,0.25,0.0,0.333333,0.0,1.0,0.5,0.5,0.0,0.0,0.5,0.0625,0.473684,0.578947,0.578947
2,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.25,0.25,0.0,0.333333,0.0,0.75,0.5,0.25,0.25,0.5,0.5,0.1875,0.631579,0.684211,0.631579
3,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.5,0.0,0.666667,0.0,0.5,0.25,0.25,0.0,0.0,1.0,0.0,0.736842,0.736842,0.736842
4,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.142857,0.75,0.75,0.0,0.333333,0.0,0.75,0.5,0.25,0.0,0.25,1.0,0.0,0.578947,0.684211,0.684211


Student Portuguese Dataset One Hot Encoded Standardized
Total number of columns:  59 Total number of rows:  649


Unnamed: 0,school_GP,school_MS,sex_F,sex_M,address_R,address_U,famsize_GT3,famsize_LE3,Pstatus_A,Pstatus_T,Mjob_at_home,Mjob_health,Mjob_other,Mjob_services,Mjob_teacher,Fjob_at_home,Fjob_health,Fjob_other,Fjob_services,Fjob_teacher,reason_course,reason_home,reason_other,reason_reputation,guardian_father,guardian_mother,guardian_other,schoolsup_no,schoolsup_yes,famsup_no,famsup_yes,paid_no,paid_yes,activities_no,activities_yes,nursery_no,nursery_yes,higher_no,higher_yes,internet_no,internet_yes,romantic_no,romantic_yes,age,Medu,Fedu,traveltime,studytime,failures,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3
0,0.730944,-0.730944,0.833377,-0.833377,-0.660182,0.660182,0.648175,-0.648175,2.666927,-2.666927,1.951258,-0.282607,-0.812309,-0.514886,-0.353247,-0.263045,-0.19168,-1.140797,-0.621894,4.126473,1.13013,-0.545894,-0.353247,-0.53161,-0.555399,0.652973,-0.259681,-2.923032,2.923032,1.259229,-1.259229,0.252853,-0.252853,0.97114,-0.97114,-0.495663,0.495663,-0.344914,0.344914,1.816043,-1.816043,0.763496,-0.763496,1.031695,1.310216,1.540715,0.576718,0.083653,-0.374305,0.072606,-0.171647,0.693785,-0.543555,-0.997695,-0.371042,0.073433,-4.15547,-0.19582,-0.280658
1,0.730944,-0.730944,0.833377,-0.833377,-0.660182,0.660182,0.648175,-0.648175,-0.374963,0.374963,1.951258,-0.282607,-0.812309,-0.514886,-0.353247,-0.263045,-0.19168,0.87658,-0.621894,-0.242338,1.13013,-0.545894,-0.353247,-0.53161,1.800508,-1.531457,-0.259681,0.34211,-0.34211,-0.794137,0.794137,0.252853,-0.252853,0.97114,-0.97114,2.017502,-2.017502,-0.344914,0.344914,-0.550648,0.550648,0.763496,-0.763496,0.210137,-1.336039,-1.188832,-0.760032,0.083653,-0.374305,1.119748,-0.171647,-0.15738,-0.543555,-0.997695,-0.371042,-0.357863,-0.87457,-0.19582,-0.280658
2,0.730944,-0.730944,0.833377,-0.833377,-0.660182,0.660182,-1.542792,1.542792,-0.374963,0.374963,1.951258,-0.282607,-0.812309,-0.514886,-0.353247,-0.263045,-0.19168,0.87658,-0.621894,-0.242338,-0.884854,-0.545894,2.830881,-0.53161,-0.555399,0.652973,-0.259681,-2.923032,2.923032,1.259229,-1.259229,0.252853,-0.252853,0.97114,-0.97114,-0.495663,0.495663,-0.344914,0.344914,-0.550648,0.550648,0.763496,-0.763496,-1.43298,-1.336039,-1.188832,-0.760032,0.083653,-0.374305,0.072606,-0.171647,-1.008546,0.538553,0.560678,-0.371042,0.50473,0.219064,0.491137,0.029116
3,0.730944,-0.730944,0.833377,-0.833377,-0.660182,0.660182,0.648175,-0.648175,-0.374963,0.374963,-0.51249,3.538479,-0.812309,-0.514886,-0.353247,-0.263045,-0.19168,-1.140797,1.607991,-0.242338,-0.884854,1.831858,-0.353247,-0.53161,-0.555399,0.652973,-0.259681,0.34211,-0.34211,-0.794137,0.794137,0.252853,-0.252853,-1.029717,1.029717,-0.495663,0.495663,-0.344914,0.344914,-0.550648,0.550648,-1.309764,1.309764,-1.43298,1.310216,-0.278983,-0.760032,1.290114,-0.374305,-0.974536,-1.123771,-1.008546,-0.543555,-0.997695,1.012903,-0.789159,0.948153,0.834615,0.648663
4,0.730944,-0.730944,0.833377,-0.833377,-0.660182,0.660182,0.648175,-0.648175,-0.374963,0.374963,-0.51249,-0.282607,1.231058,-0.514886,-0.353247,-0.263045,-0.19168,0.87658,-0.621894,-0.242338,-0.884854,1.831858,-0.353247,-0.53161,1.800508,-1.531457,-0.259681,0.34211,-0.34211,-0.794137,0.794137,0.252853,-0.252853,0.97114,-0.97114,-0.495663,0.495663,-0.344914,0.344914,1.816043,-1.816043,0.763496,-0.763496,-0.611422,0.428131,0.630866,-0.760032,0.083653,-0.374305,0.072606,-0.171647,-1.008546,-0.543555,-0.218508,1.012903,-0.789159,-0.145481,0.491137,0.338889


In [4]:
# Step 3: Split the dataset into training and testing
# * Use 80% of the data for training and 20% for testing
# * For each dataset, split into input features and target
# * For wine dataset, the target is quality
# * For student dataset, the target is G3

# Step 3.1 Split the wine dataset with train_test_split, x is features, y is target
# ! Possible Combination of feature and target for wine dataset
# ! 1. ori feature - ori target
# ! 2. norm feature - norm target
# ! 3. std feature - std target
# ! 4. norm feature - ori target
# ! 5. std feature - ori target
# Wine Red dataset
# 1. ori feature - ori target
wodeRed_x_ori = wineRed_df.drop(columns=['quality'])
wineRed_y_ori = wineRed_df['quality']  # Target variable
wineRed_x_train_ori, wineRed_x_test_ori, wineRed_y_train_ori, wineRed_y_test_ori = train_test_split(wodeRed_x_ori, wineRed_y_ori, test_size=0.2, random_state=42)
# 2. norm feature - norm target
wineRed_x_norm = wineRed_df_norm.drop(columns=['quality'])
wineRed_y_norm = wineRed_df_norm['quality']  # Target variable
wineRed_x_train_norm, wineRed_x_test_norm, wineRed_y_train_norm, wineRed_y_test_norm = train_test_split(wineRed_x_norm, wineRed_y_norm, test_size=0.2, random_state=42)
# 3. std feature - std target
wineRed_x_std = wineRed_df_std.drop(columns=['quality'])
wineRed_y_std = wineRed_df_std['quality']  # Target variable
wineRed_x_train_std, wineRed_x_test_std, wineRed_y_train_std, wineRed_y_test_std = train_test_split(wineRed_x_std, wineRed_y_std, test_size=0.2, random_state=42)
# 4. norm feature - ori target
wineRed_x_norm_ori = wineRed_df_norm.drop(columns=['quality'])
wineRed_y_norm_ori = wineRed_df['quality']  # Target variable
wineRed_x_train_norm_ori, wineRed_x_test_norm_ori, wineRed_y_train_norm_ori, wineRed_y_test_norm_ori = train_test_split(wineRed_x_norm_ori, wineRed_y_norm_ori, test_size=0.2, random_state=42)
# 5. std feature - ori target
wineRed_x_std_ori = wineRed_df_std.drop(columns=['quality'])
wineRed_y_std_ori = wineRed_df['quality']  # Target variable
wineRed_x_train_std_ori, wineRed_x_test_std_ori, wineRed_y_train_std_ori, wineRed_y_test_std_ori = train_test_split(wineRed_x_std_ori, wineRed_y_std_ori, test_size=0.2, random_state=42)

# Wine White dataset
# 1. ori feature - ori target
wineWhite_x_ori = wineWhite_df.drop(columns=['quality'])
wineWhite_y_ori = wineWhite_df['quality']  # Target variable
wineWhite_x_train_ori, wineWhite_x_test_ori, wineWhite_y_train_ori, wineWhite_y_test_ori = train_test_split(wineWhite_x_ori, wineWhite_y_ori, test_size=0.2, random_state=42)
# 2. norm feature - norm target
wineWhite_x_norm = wineWhite_df_norm.drop(columns=['quality'])
wineWhite_y_norm = wineWhite_df_norm['quality']  # Target variable
wineWhite_x_train_norm, wineWhite_x_test_norm, wineWhite_y_train_norm, wineWhite_y_test_norm = train_test_split(wineWhite_x_norm, wineWhite_y_norm, test_size=0.2, random_state=42)
# 3. std feature - std target
wineWhite_x_std = wineWhite_df_std.drop(columns=['quality'])
wineWhite_y_std = wineWhite_df_std['quality']  # Target variable
wineWhite_x_train_std, wineWhite_x_test_std, wineWhite_y_train_std, wineWhite_y_test_std = train_test_split(wineWhite_x_std, wineWhite_y_std, test_size=0.2, random_state=42)
# 4. norm feature - ori target
wineWhite_x_norm_ori = wineWhite_df_norm.drop(columns=['quality'])
wineWhite_y_norm_ori = wineWhite_df['quality']  # Target variable
wineWhite_x_train_norm_ori, wineWhite_x_test_norm_ori, wineWhite_y_train_norm_ori, wineWhite_y_test_norm_ori = train_test_split(wineWhite_x_norm_ori, wineWhite_y_norm_ori, test_size=0.2, random_state=42)
# 5. std feature - ori target
wineWhite_x_std_ori = wineWhite_df_std.drop(columns=['quality'])
wineWhite_y_std_ori = wineWhite_df['quality']  # Target variable
wineWhite_x_train_std_ori, wineWhite_x_test_std_ori, wineWhite_y_train_std_ori, wineWhite_y_test_std_ori = train_test_split(wineWhite_x_std_ori, wineWhite_y_std_ori, test_size=0.2, random_state=42)

# Step 3.2 Split the student dataset with train_test_split, x is features, y is target
# ! Possible Combination of feature and target for student dataset
# ! 1. le feature - ori target
# ! 2. le-norm feature - ori target
# ! 3. le-std feature - ori target
# ! 4. ohe feature - ori target
# ! 5. ohe-norm feature - ori target
# ! 6. ohe-std feature - ori target
# ! 7. le feature - le target
# ! 8. le-norm feature - le target
# ! 9. le-std feature - le target
# ! 10. ohe feature - ohe target
# ! 11. ohe-norm feature - ohe target
# ! 12. ohe-std feature - ohe target

# Student Math dataset
# 1. le feature - ori target
stdMath_x_le = stdMath_df_le.drop(columns=['G3'])
stdMath_y_le = stdMath_df_le['G3']  # Target variable
stdMath_x_train_le, stdMath_x_test_le, stdMath_y_train_le, stdMath_y_test_le = train_test_split(stdMath_x_le, stdMath_y_le, test_size=0.2, random_state=42)
# 2. le-norm feature - ori target
stdMath_x_le_norm = stdMath_df_le_norm.drop(columns=['G3'])
stdMath_y_le_norm = stdMath_df_le['G3']  # Target variable
stdMath_x_train_le_norm, stdMath_x_test_le_norm, stdMath_y_train_le_norm, stdMath_y_test_le_norm = train_test_split(stdMath_x_le_norm, stdMath_y_le_norm, test_size=0.2, random_state=42)
# 3. le-std feature - ori target
stdMath_x_le_std = stdMath_df_le_std.drop(columns=['G3'])
stdMath_y_le_std = stdMath_df_le['G3']  # Target variable
stdMath_x_train_le_std, stdMath_x_test_le_std, stdMath_y_train_le_std, stdMath_y_test_le_std = train_test_split(stdMath_x_le_std, stdMath_y_le_std, test_size=0.2, random_state=42)
# 4. ohe feature - ori target
stdMath_x_ohe = stdMath_df_ohe.drop(columns=['G3'])
stdMath_y_ohe = stdMath_df_ohe['G3']  # Target variable
stdMath_x_train_ohe, stdMath_x_test_ohe, stdMath_y_train_ohe, stdMath_y_test_ohe = train_test_split(stdMath_x_ohe, stdMath_y_ohe, test_size=0.2, random_state=42)
# 5. ohe-norm feature - ori target
stdMath_x_ohe_norm = stdMath_df_ohe_norm.drop(columns=['G3'])
stdMath_y_ohe_norm = stdMath_df_ohe['G3']  # Target variable
stdMath_x_train_ohe_norm, stdMath_x_test_ohe_norm, stdMath_y_train_ohe_norm, stdMath_y_test_ohe_norm = train_test_split(stdMath_x_ohe_norm, stdMath_y_ohe_norm, test_size=0.2, random_state=42)
# 6. ohe-std feature - ori target
stdMath_x_ohe_std = stdMath_df_ohe_std.drop(columns=['G3'])
stdMath_y_ohe_std = stdMath_df_ohe['G3']  # Target variable
stdMath_x_train_ohe_std, stdMath_x_test_ohe_std, stdMath_y_train_ohe_std, stdMath_y_test_ohe_std = train_test_split(stdMath_x_ohe_std, stdMath_y_ohe_std, test_size=0.2, random_state=42)
# 7. le feature - le target
stdMath_x_le = stdMath_df_le.drop(columns=['G3'])
stdMath_y_le = stdMath_df_le['G3']  # Target variable
stdMath_x_train_le, stdMath_x_test_le, stdMath_y_train_le, stdMath_y_test_le = train_test_split(stdMath_x_le, stdMath_y_le, test_size=0.2, random_state=42)
# 8. le-norm feature - le target
stdMath_x_le_norm = stdMath_df_le_norm.drop(columns=['G3'])
stdMath_y_le_norm = stdMath_df_le['G3']  # Target variable
stdMath_x_train_le_norm, stdMath_x_test_le_norm, stdMath_y_train_le_norm, stdMath_y_test_le_norm = train_test_split(stdMath_x_le_norm, stdMath_y_le_norm, test_size=0.2, random_state=42)
# 9. le-std feature - le target
stdMath_x_le_std = stdMath_df_le_std.drop(columns=['G3'])
stdMath_y_le_std = stdMath_df_le['G3']  # Target variable
stdMath_x_train_le_std, stdMath_x_test_le_std, stdMath_y_train_le_std, stdMath_y_test_le_std = train_test_split(stdMath_x_le_std, stdMath_y_le_std, test_size=0.2, random_state=42)
# 10. ohe feature - ohe target
stdMath_x_ohe = stdMath_df_ohe.drop(columns=['G3'])
stdMath_y_ohe = stdMath_df_ohe['G3']  # Target variable
stdMath_x_train_ohe, stdMath_x_test_ohe, stdMath_y_train_ohe, stdMath_y_test_ohe = train_test_split(stdMath_x_ohe, stdMath_y_ohe, test_size=0.2, random_state=42)
# 11. ohe-norm feature - ohe target
stdMath_x_ohe_norm = stdMath_df_ohe_norm.drop(columns=['G3'])
stdMath_y_ohe_norm = stdMath_df_ohe['G3']  # Target variable
stdMath_x_train_ohe_norm, stdMath_x_test_ohe_norm, stdMath_y_train_ohe_norm, stdMath_y_test_ohe_norm = train_test_split(stdMath_x_ohe_norm, stdMath_y_ohe_norm, test_size=0.2, random_state=42)
# 12. ohe-std feature - ohe target
stdMath_x_ohe_std = stdMath_df_ohe_std.drop(columns=['G3'])
stdMath_y_ohe_std = stdMath_df_ohe['G3']  # Target variable
stdMath_x_train_ohe_std, stdMath_x_test_ohe_std, stdMath_y_train_ohe_std, stdMath_y_test_ohe_std = train_test_split(stdMath_x_ohe_std, stdMath_y_ohe_std, test_size=0.2, random_state=42)

# Student Portuguese dataset
# 1. le feature - ori target
stdPor_x_le = stdPor_df_le.drop(columns=['G3'])
stdPor_y_le = stdPor_df_le['G3']  # Target variable
stdPor_x_train_le, stdPor_x_test_le, stdPor_y_train_le, stdPor_y_test_le = train_test_split(stdPor_x_le, stdPor_y_le, test_size=0.2, random_state=42)
# 2. le-norm feature - ori target
stdPor_x_le_norm = stdPor_df_le_norm.drop(columns=['G3'])
stdPor_y_le_norm = stdPor_df_le['G3']  # Target variable
stdPor_x_train_le_norm, stdPor_x_test_le_norm, stdPor_y_train_le_norm, stdPor_y_test_le_norm = train_test_split(stdPor_x_le_norm, stdPor_y_le_norm, test_size=0.2, random_state=42)
# 3. le-std feature - ori target
stdPor_x_le_std = stdPor_df_le_std.drop(columns=['G3'])
stdPor_y_le_std = stdPor_df_le['G3']  # Target variable
stdPor_x_train_le_std, stdPor_x_test_le_std, stdPor_y_train_le_std, stdPor_y_test_le_std = train_test_split(stdPor_x_le_std, stdPor_y_le_std, test_size=0.2, random_state=42)
# 4. ohe feature - ori target
stdPor_x_ohe = stdPor_df_ohe.drop(columns=['G3'])
stdPor_y_ohe = stdPor_df_ohe['G3']  # Target variable
stdPor_x_train_ohe, stdPor_x_test_ohe, stdPor_y_train_ohe, stdPor_y_test_ohe = train_test_split(stdPor_x_ohe, stdPor_y_ohe, test_size=0.2, random_state=42)
# 5. ohe-norm feature - ori target
stdPor_x_ohe_norm = stdPor_df_ohe_norm.drop(columns=['G3'])
stdPor_y_ohe_norm = stdPor_df_ohe['G3']  # Target variable
stdPor_x_train_ohe_norm, stdPor_x_test_ohe_norm, stdPor_y_train_ohe_norm, stdPor_y_test_ohe_norm = train_test_split(stdPor_x_ohe_norm, stdPor_y_ohe_norm, test_size=0.2, random_state=42)
# 6. ohe-std feature - ori target
stdPor_x_ohe_std = stdPor_df_ohe_std.drop(columns=['G3'])
stdPor_y_ohe_std = stdPor_df_ohe['G3']  # Target variable
stdPor_x_train_ohe_std, stdPor_x_test_ohe_std, stdPor_y_train_ohe_std, stdPor_y_test_ohe_std = train_test_split(stdPor_x_ohe_std, stdPor_y_ohe_std, test_size=0.2, random_state=42)
# 7. le feature - le target
stdPor_x_le = stdPor_df_le.drop(columns=['G3'])
stdPor_y_le = stdPor_df_le['G3']  # Target variable
stdPor_x_train_le, stdPor_x_test_le, stdPor_y_train_le, stdPor_y_test_le = train_test_split(stdPor_x_le, stdPor_y_le, test_size=0.2, random_state=42)
# 8. le-norm feature - le target
stdPor_x_le_norm = stdPor_df_le_norm.drop(columns=['G3'])
stdPor_y_le_norm = stdPor_df_le['G3']  # Target variable
stdPor_x_train_le_norm, stdPor_x_test_le_norm, stdPor_y_train_le_norm, stdPor_y_test_le_norm = train_test_split(stdPor_x_le_norm, stdPor_y_le_norm, test_size=0.2, random_state=42)
# 9. le-std feature - le target
stdPor_x_le_std = stdPor_df_le_std.drop(columns=['G3'])
stdPor_y_le_std = stdPor_df_le['G3']  # Target variable
stdPor_x_train_le_std, stdPor_x_test_le_std, stdPor_y_train_le_std, stdPor_y_test_le_std = train_test_split(stdPor_x_le_std, stdPor_y_le_std, test_size=0.2, random_state=42)
# 10. ohe feature - ohe target
stdPor_x_ohe = stdPor_df_ohe.drop(columns=['G3'])
stdPor_y_ohe = stdPor_df_ohe['G3']  # Target variable
stdPor_x_train_ohe, stdPor_x_test_ohe, stdPor_y_train_ohe, stdPor_y_test_ohe = train_test_split(stdPor_x_ohe, stdPor_y_ohe, test_size=0.2, random_state=42)
# 11. ohe-norm feature - ohe target
stdPor_x_ohe_norm = stdPor_df_ohe_norm.drop(columns=['G3'])
stdPor_y_ohe_norm = stdPor_df_ohe['G3']  # Target variable
stdPor_x_train_ohe_norm, stdPor_x_test_ohe_norm, stdPor_y_train_ohe_norm, stdPor_y_test_ohe_norm = train_test_split(stdPor_x_ohe_norm, stdPor_y_ohe_norm, test_size=0.2, random_state=42)
# 12. ohe-std feature - ohe target
stdPor_x_ohe_std = stdPor_df_ohe_std.drop(columns=['G3'])
stdPor_y_ohe_std = stdPor_df_ohe['G3']  # Target variable
stdPor_x_train_ohe_std, stdPor_x_test_ohe_std, stdPor_y_train_ohe_std, stdPor_y_test_ohe_std = train_test_split(stdPor_x_ohe_std, stdPor_y_ohe_std, test_size=0.2, random_state=42)

In [None]:
# Step 4: Build , Train, Evaluate the model
# * Both dataset target are continuous, use MLPClassifier
# * Evaluate the model with Classification report and Confusion matrix
# * Plot the confusion matrix

result = pd.DataFrame(columns=["Accuracy"])


# A function to train and evaluate the model
def train_and_evaluate(model, x_train, y_train, x_test, y_test, tittle):
    # * Train the model
    model.fit(x_train, y_train)

    # * Evaluate the model with error based metrics (Accuracy, Precision, Recall, F1 Score)
    y_pred = model.predict(x_test)
    # Classification report
    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy: ", accuracy)
    print("Classification Report for ", tittle)
    print(classification_report(y_test, y_pred))
    result.loc[tittle] = [accuracy]

    # * Plot the prediction vs actual target (Confusion Matrix)
    # Confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(10, 7))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Reds')
    plt.title(tittle)
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.savefig('output/1. MLPClassifier ' + tittle + '.png', dpi=600)
    plt.close()


# Step 4.1 Build the model
# For wine dataset
# 11 features with 4898 samples
wineModel = MLPClassifier(
    hidden_layer_sizes=(16, 8),
    activation='relu',
    solver='adam',
    learning_rate='adaptive',
    learning_rate_init=0.025,
    max_iter=1000,
    random_state=42,
    early_stopping=True,
    validation_fraction=0.2,
    # verbose=True,
)

# For student dataset
# 30 features with 649 samples
stdModel = MLPClassifier(
    hidden_layer_sizes=(32, 16, 8),
    activation='relu',
    solver='adam',
    learning_rate='adaptive',
    learning_rate_init=0.025,
    max_iter=1000,
    random_state=42,
    early_stopping=True,
    validation_fraction=0.2,
    # verbose=True,
)

# Standard MLPClassifier Model
standardModel = MLPClassifier(
    hidden_layer_sizes=(16, 8, 8),
    activation='relu',
    solver='adam',
    learning_rate='adaptive',
    learning_rate_init=0.025,
    max_iter=1000,
    random_state=42,
    early_stopping=True,
    validation_fraction=0.2,
    # verbose=True,
)

In [6]:
# Step 4.2 Train and Evaluate the model with custom model
# For Red Wine Dataset
# ! Possible Combination of feature and target for wine dataset
# ! 1. ori feature - ori target
# ! 2. norm feature - ori target
# ! 3. std feature - ori target
# 1. ori feature - ori target
train_and_evaluate(wineModel, wineRed_x_train_ori, wineRed_y_train_ori, wineRed_x_test_ori, wineRed_y_test_ori, "Wine Red [wineModel] - [ori feature - ori target]")
# 2. norm feature - ori target
train_and_evaluate(wineModel, wineRed_x_train_norm_ori, wineRed_y_train_norm_ori, wineRed_x_test_norm_ori, wineRed_y_test_norm_ori, "Wine Red [wineModel] - [norm feature - ori target]")
# 3. std feature - ori target
train_and_evaluate(wineModel, wineRed_x_train_std_ori, wineRed_y_train_std_ori, wineRed_x_test_std_ori, wineRed_y_test_std_ori, "Wine Red [wineModel] - [std feature - ori target]")

# For White Wine Dataset
# 1. ori feature - ori target
train_and_evaluate(wineModel, wineWhite_x_train_ori, wineWhite_y_train_ori, wineWhite_x_test_ori, wineWhite_y_test_ori, "Wine White [wineModel] - [ori feature - ori target]")
# 2. norm feature - ori target
train_and_evaluate(wineModel, wineWhite_x_train_norm_ori, wineWhite_y_train_norm_ori, wineWhite_x_test_norm_ori, wineWhite_y_test_norm_ori, "Wine White [wineModel] - [norm feature - ori target]")
# 3. std feature - ori target
train_and_evaluate(wineModel, wineWhite_x_train_std_ori, wineWhite_y_train_std_ori, wineWhite_x_test_std_ori, wineWhite_y_test_std_ori, "Wine White [wineModel] - [std feature - ori target]")


# For Student Math Dataset
# ! Possible Combination of feature and target for student dataset
# ! 1. le feature - ori target
# ! 2. le-norm feature - ori target
# ! 3. le-std feature - ori target
# ! 4. ohe feature - ori target
# ! 5. ohe-norm feature - ori target
# ! 6. ohe-std feature - ori target
# 1. le feature - ori target
train_and_evaluate(stdModel, stdMath_x_train_le, stdMath_y_train_le, stdMath_x_test_le, stdMath_y_test_le, "Student Math [stdModel] - [le feature - ori target]")
# 2. le-norm feature - ori target
train_and_evaluate(stdModel, stdMath_x_train_le_norm, stdMath_y_train_le_norm, stdMath_x_test_le_norm, stdMath_y_test_le_norm, "Student Math [stdModel] - [le-norm feature - ori target]")
# 3. le-std feature - ori target
train_and_evaluate(stdModel, stdMath_x_train_le_std, stdMath_y_train_le_std, stdMath_x_test_le_std, stdMath_y_test_le_std, "Student Math [stdModel] - [le-std feature - ori target]")
# 4. ohe feature - ori target
train_and_evaluate(stdModel, stdMath_x_train_ohe, stdMath_y_train_ohe, stdMath_x_test_ohe, stdMath_y_test_ohe, "Student Math [stdModel] - [ohe feature - ori target]")
# 5. ohe-norm feature - ori target
train_and_evaluate(stdModel, stdMath_x_train_ohe_norm, stdMath_y_train_ohe_norm, stdMath_x_test_ohe_norm, stdMath_y_test_ohe_norm, "Student Math [stdModel] - [ohe-norm feature - ori target]")
# 6. ohe-std feature - ori target
train_and_evaluate(stdModel, stdMath_x_train_ohe_std, stdMath_y_train_ohe_std, stdMath_x_test_ohe_std, stdMath_y_test_ohe_std, "Student Math [stdModel] - [ohe-std feature - ori target]")

# For Student Portuguese Dataset
# 1. le feature - ori target
train_and_evaluate(stdModel, stdPor_x_train_le, stdPor_y_train_le, stdPor_x_test_le, stdPor_y_test_le, "Student Portuguese [stdModel] - [le feature - ori target]")
# 2. le-norm feature - ori target
train_and_evaluate(stdModel, stdPor_x_train_le_norm, stdPor_y_train_le_norm, stdPor_x_test_le_norm, stdPor_y_test_le_norm, "Student Portuguese [stdModel] - [le-norm feature - ori target]")
# 3. le-std feature - ori target
train_and_evaluate(stdModel, stdPor_x_train_le_std, stdPor_y_train_le_std, stdPor_x_test_le_std, stdPor_y_test_le_std, "Student Portuguese [stdModel] - [le-std feature - ori target]")
# 4. ohe feature - ori target
train_and_evaluate(stdModel, stdPor_x_train_ohe, stdPor_y_train_ohe, stdPor_x_test_ohe, stdPor_y_test_ohe, "Student Portuguese [stdModel] - [ohe feature - ori target]")
# 5. ohe-norm feature - ori target
train_and_evaluate(stdModel, stdPor_x_train_ohe_norm, stdPor_y_train_ohe_norm, stdPor_x_test_ohe_norm, stdPor_y_test_ohe_norm, "Student Portuguese [stdModel] - [ohe-norm feature - ori target]")
# 6. ohe-std feature - ori target
train_and_evaluate(stdModel, stdPor_x_train_ohe_std, stdPor_y_train_ohe_std, stdPor_x_test_ohe_std, stdPor_y_test_ohe_std, "Student Portuguese [stdModel] - [ohe-std feature - ori target]")

Accuracy:  0.51875
Classification Report for  Wine Red [wineModel] - [ori feature - ori target]
              precision    recall  f1-score   support

           3       0.00      0.00      0.00         1
           4       0.00      0.00      0.00        10
           5       0.55      0.66      0.60       130
           6       0.49      0.61      0.54       132
           7       0.00      0.00      0.00        42
           8       0.00      0.00      0.00         5

    accuracy                           0.52       320
   macro avg       0.17      0.21      0.19       320
weighted avg       0.43      0.52      0.47       320



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy:  0.578125
Classification Report for  Wine Red [wineModel] - [norm feature - ori target]
              precision    recall  f1-score   support

           3       0.00      0.00      0.00         1
           4       0.00      0.00      0.00        10
           5       0.67      0.65      0.66       130
           6       0.53      0.70      0.60       132
           7       0.47      0.19      0.27        42
           8       0.00      0.00      0.00         5

    accuracy                           0.58       320
   macro avg       0.28      0.26      0.25       320
weighted avg       0.55      0.58      0.55       320



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy:  0.6
Classification Report for  Wine Red [wineModel] - [std feature - ori target]
              precision    recall  f1-score   support

           3       0.00      0.00      0.00         1
           4       0.00      0.00      0.00        10
           5       0.65      0.74      0.69       130
           6       0.57      0.58      0.57       132
           7       0.51      0.48      0.49        42
           8       0.00      0.00      0.00         5

    accuracy                           0.60       320
   macro avg       0.29      0.30      0.29       320
weighted avg       0.57      0.60      0.58       320



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy:  0.48673469387755103
Classification Report for  Wine White [wineModel] - [ori feature - ori target]
              precision    recall  f1-score   support

           3       0.00      0.00      0.00         5
           4       0.00      0.00      0.00        25
           5       0.57      0.30      0.40       291
           6       0.48      0.78      0.59       432
           7       0.44      0.27      0.33       192
           8       0.00      0.00      0.00        35

    accuracy                           0.49       980
   macro avg       0.25      0.23      0.22       980
weighted avg       0.47      0.49      0.44       980



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy:  0.5040816326530613
Classification Report for  Wine White [wineModel] - [norm feature - ori target]
              precision    recall  f1-score   support

           3       0.00      0.00      0.00         5
           4       0.00      0.00      0.00        25
           5       0.57      0.51      0.54       291
           6       0.49      0.73      0.58       432
           7       0.44      0.16      0.24       192
           8       0.00      0.00      0.00        35

    accuracy                           0.50       980
   macro avg       0.25      0.23      0.23       980
weighted avg       0.47      0.50      0.46       980



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy:  0.5673469387755102
Classification Report for  Wine White [wineModel] - [std feature - ori target]
              precision    recall  f1-score   support

           3       0.25      0.20      0.22         5
           4       0.00      0.00      0.00        25
           5       0.62      0.63      0.62       291
           6       0.56      0.70      0.62       432
           7       0.52      0.35      0.42       192
           8       0.00      0.00      0.00        35

    accuracy                           0.57       980
   macro avg       0.32      0.31      0.32       980
weighted avg       0.53      0.57      0.54       980

Accuracy:  0.0759493670886076
Classification Report for  Student Math [stdModel] - [le feature - ori target]
              precision    recall  f1-score   support

           0       0.50      0.20      0.29         5
           5       0.00      0.00      0.00         4
           6       0.00      0.00      0.00         6
           7       0.0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy:  0.189873417721519
Classification Report for  Student Math [stdModel] - [le-norm feature - ori target]
              precision    recall  f1-score   support

           0       0.25      0.20      0.22         5
           5       0.00      0.00      0.00         4
           6       1.00      0.17      0.29         6
           7       0.00      0.00      0.00         1
           8       0.25      0.50      0.33         6
           9       0.22      0.40      0.29         5
          10       0.14      0.09      0.11        11
          11       0.11      0.40      0.17         5
          12       0.00      0.00      0.00         5
          13       0.00      0.00      0.00         5
          14       0.25      0.33      0.29         6
          15       0.40      0.20      0.27        10
          16       0.00      0.00      0.00         4
          17       0.00      0.00      0.00         3
          18       0.20      1.00      0.33         1
          19       0.0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy:  0.17721518987341772
Classification Report for  Student Math [stdModel] - [le-std feature - ori target]
              precision    recall  f1-score   support

           0       0.14      0.20      0.17         5
           5       0.00      0.00      0.00         4
           6       1.00      0.33      0.50         6
           7       0.00      0.00      0.00         1
           8       0.18      0.33      0.24         6
           9       0.17      0.20      0.18         5
          10       0.22      0.18      0.20        11
          11       0.12      0.40      0.19         5
          12       0.00      0.00      0.00         5
          13       0.00      0.00      0.00         5
          14       0.00      0.00      0.00         6
          15       0.27      0.40      0.32        10
          16       0.00      0.00      0.00         4
          17       0.00      0.00      0.00         3
          18       0.00      0.00      0.00         1
          19       0.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy:  0.06329113924050633
Classification Report for  Student Math [stdModel] - [ohe feature - ori target]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         5
           5       0.00      0.00      0.00         4
           6       0.00      0.00      0.00         6
           7       0.00      0.00      0.00         1
           8       0.00      0.00      0.00         6
           9       0.00      0.00      0.00         5
          10       0.00      0.00      0.00        11
          11       0.06      1.00      0.12         5
          12       0.00      0.00      0.00         5
          13       0.00      0.00      0.00         5
          14       0.00      0.00      0.00         6
          15       0.00      0.00      0.00        10
          16       0.00      0.00      0.00         4
          17       0.00      0.00      0.00         3
          18       0.00      0.00      0.00         1
          19       0.00 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy:  0.02531645569620253
Classification Report for  Student Math [stdModel] - [ohe-norm feature - ori target]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         5
           5       0.00      0.00      0.00         4
           6       0.00      0.00      0.00         6
           7       0.00      0.00      0.00         1
           8       0.00      0.00      0.00         6
           9       0.00      0.00      0.00         5
          10       0.00      0.00      0.00        11
          11       0.02      0.20      0.03         5
          12       0.00      0.00      0.00         5
          13       0.00      0.00      0.00         5
          14       0.00      0.00      0.00         6
          15       0.00      0.00      0.00        10
          16       0.00      0.00      0.00         4
          17       0.08      0.33      0.12         3
          18       0.00      0.00      0.00         1
          19       

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy:  0.12658227848101267
Classification Report for  Student Math [stdModel] - [ohe-std feature - ori target]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         5
           5       0.00      0.00      0.00         4
           6       0.00      0.00      0.00         6
           7       0.00      0.00      0.00         1
           8       0.00      0.00      0.00         6
           9       0.00      0.00      0.00         5
          10       0.00      0.00      0.00        11
          11       0.04      0.40      0.07         5
          12       0.00      0.00      0.00         5
          13       0.00      0.00      0.00         5
          14       0.00      0.00      0.00         6
          15       0.32      0.80      0.46        10
          16       0.00      0.00      0.00         4
          17       0.00      0.00      0.00         3
          18       0.00      0.00      0.00         1
          19       0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy:  0.13076923076923078
Classification Report for  Student Portuguese [stdModel] - [le feature - ori target]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           7       0.00      0.00      0.00         1
           8       0.00      0.00      0.00         7
           9       0.00      0.00      0.00         5
          10       0.13      1.00      0.23        17
          11       0.00      0.00      0.00        25
          12       0.00      0.00      0.00        16
          13       0.00      0.00      0.00        13
          14       0.00      0.00      0.00        12
          15       0.00      0.00      0.00        10
          16       0.00      0.00      0.00         9
          17       0.00      0.00      0.00         5
          18       0.00      0.00      0.00         7
          19       0.00      0.00      0.00         1

    accuracy                           0.13       130
   macro avg      

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy:  0.2076923076923077
Classification Report for  Student Portuguese [stdModel] - [le-norm feature - ori target]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           7       0.00      0.00      0.00         1
           8       0.00      0.00      0.00         7
           9       0.00      0.00      0.00         5
          10       0.28      0.65      0.39        17
          11       0.00      0.00      0.00        25
          12       0.17      0.25      0.21        16
          13       0.19      0.31      0.24        13
          14       0.00      0.00      0.00        12
          15       0.18      0.80      0.29        10
          16       0.00      0.00      0.00         9
          17       0.00      0.00      0.00         5
          18       0.00      0.00      0.00         7
          19       0.00      0.00      0.00         1

    accuracy                           0.21       130
   macro avg  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy:  0.36923076923076925
Classification Report for  Student Portuguese [stdModel] - [le-std feature - ori target]
              precision    recall  f1-score   support

           0       0.33      0.50      0.40         2
           7       0.00      0.00      0.00         1
           8       0.33      0.14      0.20         7
           9       0.29      0.40      0.33         5
          10       0.37      0.41      0.39        17
          11       0.54      0.52      0.53        25
          12       0.29      0.38      0.32        16
          13       0.42      0.38      0.40        13
          14       0.33      0.25      0.29        12
          15       0.67      0.20      0.31        10
          16       0.33      0.44      0.38         9
          17       0.27      0.80      0.40         5
          18       0.00      0.00      0.00         7
          19       0.00      0.00      0.00         1

    accuracy                           0.37       130
   macro avg  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy:  0.09230769230769231
Classification Report for  Student Portuguese [stdModel] - [ohe feature - ori target]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           7       0.00      0.00      0.00         1
           8       0.00      0.00      0.00         7
           9       0.00      0.00      0.00         5
          10       0.00      0.00      0.00        17
          11       0.00      0.00      0.00        25
          12       0.00      0.00      0.00        16
          13       0.00      0.00      0.00        13
          14       0.09      1.00      0.17        12
          15       0.00      0.00      0.00        10
          16       0.00      0.00      0.00         9
          17       0.00      0.00      0.00         5
          18       0.00      0.00      0.00         7
          19       0.00      0.00      0.00         1

    accuracy                           0.09       130
   macro avg     

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy:  0.2076923076923077
Classification Report for  Student Portuguese [stdModel] - [ohe-norm feature - ori target]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           7       0.00      0.00      0.00         1
           8       0.00      0.00      0.00         7
           9       0.00      0.00      0.00         5
          10       0.23      0.18      0.20        17
          11       0.22      0.96      0.35        25
          12       0.00      0.00      0.00        16
          13       0.00      0.00      0.00        13
          14       0.00      0.00      0.00        12
          15       0.00      0.00      0.00        10
          16       0.00      0.00      0.00         9
          17       0.00      0.00      0.00         5
          18       0.00      0.00      0.00         7
          19       0.00      0.00      0.00         1

    accuracy                           0.21       130
   macro avg 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy:  0.11538461538461539
Classification Report for  Student Portuguese [stdModel] - [ohe-std feature - ori target]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           7       0.00      0.00      0.00         1
           8       0.00      0.00      0.00         7
           9       0.00      0.00      0.00         5
          10       0.15      0.35      0.21        17
          11       0.10      0.08      0.09        25
          12       0.18      0.25      0.21        16
          13       0.06      0.08      0.07        13
          14       0.07      0.17      0.10        12
          15       0.00      0.00      0.00        10
          16       0.00      0.00      0.00         9
          17       0.00      0.00      0.00         5
          18       0.00      0.00      0.00         7
          19       0.00      0.00      0.00         1

    accuracy                           0.12       130
   macro avg 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [7]:
# Step 4.2 Train and Evaluate the model with custom model
# For Red Wine Dataset
# ! Possible Combination of feature and target for wine dataset
# ! 1. ori feature - ori target
# ! 4. norm feature - ori target
# ! 5. std feature - ori target
# 1. ori feature - ori target
train_and_evaluate(standardModel, wineRed_x_train_ori, wineRed_y_train_ori, wineRed_x_test_ori, wineRed_y_test_ori, "Wine Red [standardModel] - [ori feature - ori target]")
# 2. norm feature - ori target
train_and_evaluate(standardModel, wineRed_x_train_norm_ori, wineRed_y_train_norm_ori, wineRed_x_test_norm_ori, wineRed_y_test_norm_ori, "Wine Red [standardModel] - [norm feature - ori target]")
# 3. std feature - ori target
train_and_evaluate(standardModel, wineRed_x_train_std_ori, wineRed_y_train_std_ori, wineRed_x_test_std_ori, wineRed_y_test_std_ori, "Wine Red [standardModel] - [std feature - ori target]")

# For White Wine Dataset
# 1. ori feature - ori target
train_and_evaluate(standardModel, wineWhite_x_train_ori, wineWhite_y_train_ori, wineWhite_x_test_ori, wineWhite_y_test_ori, "Wine White [standardModel] - [ori feature - ori target]")
# 2. norm feature - ori target
train_and_evaluate(standardModel, wineWhite_x_train_norm_ori, wineWhite_y_train_norm_ori, wineWhite_x_test_norm_ori, wineWhite_y_test_norm_ori, "Wine White [standardModel] - [norm feature - ori target]")
# 3. std feature - ori target
train_and_evaluate(standardModel, wineWhite_x_train_std_ori, wineWhite_y_train_std_ori, wineWhite_x_test_std_ori, wineWhite_y_test_std_ori, "Wine White [standardModel] - [std feature - ori target]")


# For Student Math Dataset
# ! Possible Combination of feature and target for student dataset
# ! 1. le feature - ori target
# ! 2. le-norm feature - ori target
# ! 3. le-std feature - ori target
# ! 4. ohe feature - ori target
# ! 5. ohe-norm feature - ori target
# ! 6. ohe-std feature - ori target

# 1. le feature - ori target
train_and_evaluate(standardModel, stdMath_x_train_le, stdMath_y_train_le, stdMath_x_test_le, stdMath_y_test_le, "Student Math [standardModel] - [le feature - ori target]")
# 2. le-norm feature - ori target
train_and_evaluate(standardModel, stdMath_x_train_le_norm, stdMath_y_train_le_norm, stdMath_x_test_le_norm, stdMath_y_test_le_norm, "Student Math [standardModel] - [le-norm feature - ori target]")
# 3. le-std feature - ori target
train_and_evaluate(standardModel, stdMath_x_train_le_std, stdMath_y_train_le_std, stdMath_x_test_le_std, stdMath_y_test_le_std, "Student Math [standardModel] - [le-std feature - ori target]")
# 4. ohe feature - ori target
train_and_evaluate(standardModel, stdMath_x_train_ohe, stdMath_y_train_ohe, stdMath_x_test_ohe, stdMath_y_test_ohe, "Student Math [standardModel] - [ohe feature - ori target]")
# 5. ohe-norm feature - ori target
train_and_evaluate(standardModel, stdMath_x_train_ohe_norm, stdMath_y_train_ohe_norm, stdMath_x_test_ohe_norm, stdMath_y_test_ohe_norm, "Student Math [standardModel] - [ohe-norm feature - ori target]")
# 6. ohe-std feature - ori target
train_and_evaluate(standardModel, stdMath_x_train_ohe_std, stdMath_y_train_ohe_std, stdMath_x_test_ohe_std, stdMath_y_test_ohe_std, "Student Math [standardModel] - [ohe-std feature - ori target]")

# For Student Portuguese Dataset
# 1. le feature - ori target
train_and_evaluate(standardModel, stdPor_x_train_le, stdPor_y_train_le, stdPor_x_test_le, stdPor_y_test_le, "Student Portuguese [standardModel] - [le feature - ori target]")
# 2. le-norm feature - ori target
train_and_evaluate(standardModel, stdPor_x_train_le_norm, stdPor_y_train_le_norm, stdPor_x_test_le_norm, stdPor_y_test_le_norm, "Student Portuguese [standardModel] - [le-norm feature - ori target]")
# 3. le-std feature - ori target
train_and_evaluate(standardModel, stdPor_x_train_le_std, stdPor_y_train_le_std, stdPor_x_test_le_std, stdPor_y_test_le_std, "Student Portuguese [standardModel] - [le-std feature - ori target]")
# 4. ohe feature - ori target
train_and_evaluate(standardModel, stdPor_x_train_ohe, stdPor_y_train_ohe, stdPor_x_test_ohe, stdPor_y_test_ohe, "Student Portuguese [standardModel] - [ohe feature - ori target]")
# 5. ohe-norm feature - ori target
train_and_evaluate(standardModel, stdPor_x_train_ohe_norm, stdPor_y_train_ohe_norm, stdPor_x_test_ohe_norm, stdPor_y_test_ohe_norm, "Student Portuguese [standardModel] - [ohe-norm feature - ori target]")
# 6. ohe-std feature - ori target
train_and_evaluate(standardModel, stdPor_x_train_ohe_std, stdPor_y_train_ohe_std, stdPor_x_test_ohe_std, stdPor_y_test_ohe_std, "Student Portuguese [standardModel] - [ohe-std feature - ori target]")

Accuracy:  0.5
Classification Report for  Wine Red [standardModel] - [ori feature - ori target]
              precision    recall  f1-score   support

           3       0.00      0.00      0.00         1
           4       0.00      0.00      0.00        10
           5       0.52      0.74      0.61       130
           6       0.47      0.42      0.44       132
           7       0.43      0.21      0.29        42
           8       0.00      0.00      0.00         5

    accuracy                           0.50       320
   macro avg       0.24      0.23      0.22       320
weighted avg       0.46      0.50      0.47       320



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy:  0.56875
Classification Report for  Wine Red [standardModel] - [norm feature - ori target]
              precision    recall  f1-score   support

           3       0.00      0.00      0.00         1
           4       0.00      0.00      0.00        10
           5       0.63      0.75      0.68       130
           6       0.55      0.57      0.56       132
           7       0.34      0.24      0.28        42
           8       0.00      0.00      0.00         5

    accuracy                           0.57       320
   macro avg       0.25      0.26      0.25       320
weighted avg       0.53      0.57      0.54       320



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy:  0.6
Classification Report for  Wine Red [standardModel] - [std feature - ori target]
              precision    recall  f1-score   support

           3       0.00      0.00      0.00         1
           4       0.00      0.00      0.00        10
           5       0.63      0.75      0.69       130
           6       0.58      0.57      0.57       132
           7       0.54      0.48      0.51        42
           8       0.00      0.00      0.00         5

    accuracy                           0.60       320
   macro avg       0.29      0.30      0.29       320
weighted avg       0.57      0.60      0.58       320



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy:  0.4448979591836735
Classification Report for  Wine White [standardModel] - [ori feature - ori target]
              precision    recall  f1-score   support

           3       0.00      0.00      0.00         5
           4       0.00      0.00      0.00        25
           5       0.43      0.48      0.45       291
           6       0.45      0.68      0.55       432
           7       0.00      0.00      0.00       192
           8       0.00      0.00      0.00        35

    accuracy                           0.44       980
   macro avg       0.15      0.19      0.17       980
weighted avg       0.33      0.44      0.38       980



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy:  0.5112244897959184
Classification Report for  Wine White [standardModel] - [norm feature - ori target]
              precision    recall  f1-score   support

           3       0.00      0.00      0.00         5
           4       0.00      0.00      0.00        25
           5       0.54      0.58      0.56       291
           6       0.50      0.68      0.58       432
           7       0.46      0.21      0.29       192
           8       0.00      0.00      0.00        35

    accuracy                           0.51       980
   macro avg       0.25      0.24      0.24       980
weighted avg       0.47      0.51      0.48       980



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy:  0.5479591836734694
Classification Report for  Wine White [standardModel] - [std feature - ori target]
              precision    recall  f1-score   support

           3       0.00      0.00      0.00         5
           4       1.00      0.08      0.15        25
           5       0.60      0.64      0.62       291
           6       0.54      0.62      0.58       432
           7       0.47      0.42      0.44       192
           8       0.00      0.00      0.00        35

    accuracy                           0.55       980
   macro avg       0.44      0.29      0.30       980
weighted avg       0.53      0.55      0.53       980



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy:  0.25316455696202533
Classification Report for  Student Math [standardModel] - [le feature - ori target]
              precision    recall  f1-score   support

           0       0.28      1.00      0.43         5
           5       0.00      0.00      0.00         4
           6       0.00      0.00      0.00         6
           7       0.00      0.00      0.00         1
           8       0.00      0.00      0.00         6
           9       0.00      0.00      0.00         5
          10       0.38      0.45      0.42        11
          11       0.05      0.20      0.08         5
          12       0.00      0.00      0.00         5
          13       0.00      0.00      0.00         5
          14       0.00      0.00      0.00         6
          15       0.33      0.90      0.49        10
          16       0.00      0.00      0.00         4
          17       0.00      0.00      0.00         3
          18       0.00      0.00      0.00         1
          19       0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy:  0.12658227848101267
Classification Report for  Student Math [standardModel] - [le-norm feature - ori target]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         5
           5       0.00      0.00      0.00         4
           6       0.00      0.00      0.00         6
           7       0.00      0.00      0.00         1
           8       0.00      0.00      0.00         6
           9       0.00      0.00      0.00         5
          10       0.24      0.55      0.33        11
          11       0.07      0.80      0.14         5
          12       0.00      0.00      0.00         5
          13       0.00      0.00      0.00         5
          14       0.00      0.00      0.00         6
          15       0.00      0.00      0.00        10
          16       0.00      0.00      0.00         4
          17       0.00      0.00      0.00         3
          18       0.00      0.00      0.00         1
          19   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy:  0.17721518987341772
Classification Report for  Student Math [standardModel] - [le-std feature - ori target]
              precision    recall  f1-score   support

           0       0.11      0.20      0.14         5
           5       0.00      0.00      0.00         4
           6       0.00      0.00      0.00         6
           7       0.00      0.00      0.00         1
           8       0.00      0.00      0.00         6
           9       0.17      0.20      0.18         5
          10       0.21      0.36      0.27        11
          11       0.33      0.60      0.43         5
          12       1.00      0.20      0.33         5
          13       0.00      0.00      0.00         5
          14       0.06      0.17      0.09         6
          15       0.67      0.20      0.31        10
          16       0.33      0.25      0.29         4
          17       0.00      0.00      0.00         3
          18       0.00      0.00      0.00         1
          19    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy:  0.22784810126582278
Classification Report for  Student Math [standardModel] - [ohe feature - ori target]
              precision    recall  f1-score   support

           0       0.40      0.40      0.40         5
           5       0.00      0.00      0.00         4
           6       0.00      0.00      0.00         6
           7       0.00      0.00      0.00         1
           8       0.11      0.33      0.17         6
           9       0.00      0.00      0.00         5
          10       0.25      0.27      0.26        11
          11       0.12      0.20      0.15         5
          12       0.00      0.00      0.00         5
          13       0.15      0.40      0.22         5
          14       0.00      0.00      0.00         6
          15       0.44      0.70      0.54        10
          16       0.00      0.00      0.00         4
          17       0.00      0.00      0.00         3
          18       0.14      1.00      0.25         1
          19       

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy:  0.11392405063291139
Classification Report for  Student Math [standardModel] - [ohe-norm feature - ori target]
              precision    recall  f1-score   support

           0       0.20      0.40      0.27         5
           5       0.00      0.00      0.00         4
           6       0.00      0.00      0.00         6
           7       0.00      0.00      0.00         1
           8       0.00      0.00      0.00         6
           9       0.00      0.00      0.00         5
          10       0.12      0.36      0.19        11
          11       0.09      0.60      0.15         5
          12       0.00      0.00      0.00         5
          13       0.00      0.00      0.00         5
          14       0.00      0.00      0.00         6
          15       0.00      0.00      0.00        10
          16       0.00      0.00      0.00         4
          17       0.00      0.00      0.00         3
          18       0.00      0.00      0.00         1
          19  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy:  0.11392405063291139
Classification Report for  Student Math [standardModel] - [ohe-std feature - ori target]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         5
           5       0.00      0.00      0.00         4
           6       0.50      0.17      0.25         6
           7       0.00      0.00      0.00         1
           8       0.20      0.33      0.25         6
           9       0.00      0.00      0.00         5
          10       0.25      0.27      0.26        11
          11       0.00      0.00      0.00         5
          12       0.00      0.00      0.00         5
          13       0.00      0.00      0.00         5
          14       0.00      0.00      0.00         6
          15       0.38      0.30      0.33        10
          16       0.00      0.00      0.00         4
          17       0.00      0.00      0.00         3
          18       0.00      0.00      0.00         1
          19   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy:  0.2692307692307692
Classification Report for  Student Portuguese [standardModel] - [le feature - ori target]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           7       0.00      0.00      0.00         1
           8       0.00      0.00      0.00         7
           9       0.00      0.00      0.00         5
          10       0.34      0.82      0.48        17
          11       0.55      0.24      0.33        25
          12       0.00      0.00      0.00        16
          13       0.23      0.85      0.36        13
          14       0.00      0.00      0.00        12
          15       0.13      0.40      0.20        10
          16       0.00      0.00      0.00         9
          17       0.00      0.00      0.00         5
          18       0.00      0.00      0.00         7
          19       0.00      0.00      0.00         1

    accuracy                           0.27       130
   macro avg  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy:  0.16153846153846155
Classification Report for  Student Portuguese [standardModel] - [le-norm feature - ori target]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           7       0.00      0.00      0.00         1
           8       1.00      0.14      0.25         7
           9       0.00      0.00      0.00         5
          10       0.17      0.53      0.26        17
          11       0.33      0.08      0.13        25
          12       0.00      0.00      0.00        16
          13       0.14      0.69      0.23        13
          14       0.00      0.00      0.00        12
          15       0.00      0.00      0.00        10
          16       0.00      0.00      0.00         9
          17       0.00      0.00      0.00         5
          18       0.00      0.00      0.00         7
          19       0.00      0.00      0.00         1

    accuracy                           0.16       130
   macro

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy:  0.3384615384615385
Classification Report for  Student Portuguese [standardModel] - [le-std feature - ori target]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           7       0.00      0.00      0.00         1
           8       0.40      0.86      0.55         7
           9       0.00      0.00      0.00         5
          10       0.28      0.29      0.29        17
          11       0.46      0.64      0.53        25
          12       0.00      0.00      0.00        16
          13       0.00      0.00      0.00        13
          14       0.23      0.25      0.24        12
          15       0.29      0.40      0.33        10
          16       0.30      0.33      0.32         9
          17       0.25      0.60      0.35         5
          18       0.80      0.57      0.67         7
          19       0.00      0.00      0.00         1

    accuracy                           0.34       130
   macro a

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy:  0.27692307692307694
Classification Report for  Student Portuguese [standardModel] - [ohe feature - ori target]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           7       0.00      0.00      0.00         1
           8       0.00      0.00      0.00         7
           9       0.00      0.00      0.00         5
          10       0.00      0.00      0.00        17
          11       0.32      0.64      0.43        25
          12       0.45      0.31      0.37        16
          13       0.28      0.62      0.38        13
          14       0.00      0.00      0.00        12
          15       0.21      0.70      0.32        10
          16       0.00      0.00      0.00         9
          17       0.00      0.00      0.00         5
          18       0.00      0.00      0.00         7
          19       0.00      0.00      0.00         1

    accuracy                           0.28       130
   macro avg

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy:  0.2
Classification Report for  Student Portuguese [standardModel] - [ohe-norm feature - ori target]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           7       0.00      0.00      0.00         1
           8       0.00      0.00      0.00         7
           9       0.00      0.00      0.00         5
          10       0.23      0.47      0.31        17
          11       0.32      0.28      0.30        25
          12       0.00      0.00      0.00        16
          13       0.15      0.77      0.25        13
          14       0.17      0.08      0.11        12
          15       0.00      0.00      0.00        10
          16       0.00      0.00      0.00         9
          17       0.00      0.00      0.00         5
          18       0.00      0.00      0.00         7
          19       0.00      0.00      0.00         1

    accuracy                           0.20       130
   macro avg       0.06

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy:  0.16153846153846155
Classification Report for  Student Portuguese [standardModel] - [ohe-std feature - ori target]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           7       0.00      0.00      0.00         1
           8       0.00      0.00      0.00         7
           9       0.00      0.00      0.00         5
          10       0.18      0.35      0.24        17
          11       0.31      0.36      0.33        25
          12       0.33      0.06      0.11        16
          13       0.09      0.31      0.14        13
          14       0.06      0.08      0.07        12
          15       0.00      0.00      0.00        10
          16       0.00      0.00      0.00         9
          17       0.00      0.00      0.00         5
          18       0.00      0.00      0.00         7
          19       0.00      0.00      0.00         1

    accuracy                           0.16       130
   macro

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
# Step 5: Compare the result
# Compare the result
pd.set_option("display.max_rows", None)
display(result)

# Step 6: Save the result
result.to_csv("output/1. MLP Evaluation Result.csv")

Unnamed: 0,Accuracy
Wine Red [wineModel] - [ori feature - ori target],0.51875
Wine Red [wineModel] - [norm feature - ori target],0.578125
Wine Red [wineModel] - [std feature - ori target],0.6
Wine White [wineModel] - [ori feature - ori target],0.486735
Wine White [wineModel] - [norm feature - ori target],0.504082
Wine White [wineModel] - [std feature - ori target],0.567347
Student Math [stdModel] - [le feature - ori target],0.075949
Student Math [stdModel] - [le-norm feature - ori target],0.189873
Student Math [stdModel] - [le-std feature - ori target],0.177215
Student Math [stdModel] - [ohe feature - ori target],0.063291
