# Step 1 - Loading the Required Libraries and Modules 

In [2]:
# Import required libraries
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import sklearn
from sklearn.neural_network import MLPClassifier
from sklearn.neural_network import MLPRegressor

# Import necessary modules
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from math import sqrt
from sklearn.metrics import r2_score

# Step 2 - Read data. Creating train, test

In [3]:
df = pd.read_csv('../result_data/final_v2.csv') 

In [4]:
df

Unnamed: 0,mssv,gioitinh,hedt_ CLC,hedt_ CNTN,hedt_ CQUI,hedt_ CTTT,hedt_ KSTN,khoa_CNPM,khoa_HTTT,khoa_KHMT,...,namhoc,sotchk,hocky_sx,dtbtl,drltl,dtbhk_truoc,drlhk_truoc,sotctl,somon_khongdat_hktruoc,xeploai
0,BE375BAAXPvAibaEXe9JDlHA4z2GHJ3/PVStCxR2,1.0,0,0,1,0,0,1,0,0,...,2013,18,2,6.020000,5.0,6.02,5.0,22.0,1.0,2
1,BE375BAAXPvAibaEXe9JDlHA4z2GHJ3/PVStCxR2,1.0,0,0,1,0,0,1,0,0,...,2014,26,3,6.525000,4.0,7.03,3.0,40.0,0.0,1
2,BE375BAAXPvAibaEXe9JDlHA4z2GHJ3/PVStCxR2,1.0,0,0,1,0,0,1,0,0,...,2014,19,4,6.483333,4.0,6.40,3.0,66.0,1.0,2
3,BE375BAAXPvAibaEXe9JDlHA4z2GHJ3/PVStCxR2,1.0,0,0,1,0,0,1,0,0,...,2015,20,5,6.615000,4.0,7.01,4.0,85.0,0.0,1
4,BE375BAAXPvAibaEXe9JDlHA4z2GHJ3/PVStCxR2,1.0,0,0,1,0,0,1,0,0,...,2015,19,6,6.562000,4.0,6.35,5.0,105.0,1.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56180,75AD7B4AXPvAibaEXe8xMCTZ03/BewoGrWSt0ZXM,1.0,0,0,1,0,0,0,0,1,...,2020,21,3,7.250000,3.0,6.95,3.0,28.0,0.0,2
56181,75AD7B4AXPvAibaEXe8xMCTZ03/BewoGrWSt0ZXM,1.0,0,0,1,0,0,0,0,1,...,2020,22,4,7.320000,3.0,7.46,3.0,49.0,0.0,3
56182,75AD7B4AXPvAibaEXe8xMCTZ03/BewoGrWSt0ZXM,1.0,0,0,1,0,0,0,0,1,...,2021,22,5,7.535000,3.0,8.18,4.0,71.0,0.0,2
56183,75AD7B4AXPvAibaEXe8xMCTZ03/BewoGrWSt0ZXM,1.0,0,0,1,0,0,0,0,1,...,2021,14,6,7.548000,4.0,7.60,5.0,93.0,0.0,2


In [5]:
# Dữ liệu train gồm các khóa 8, 9, 10, 11, 12, 13
data_train = df[df["khoahoc"]<14]

#  Dữ liệu test là khóa 14
data_test = df[df["khoahoc"]>=14]

print("Số mẫu train:", data_train.shape[0])
print("Số mẫu test:", data_test.shape[0])
print("Tỉ lệ tập test: ", data_test.shape[0]/(data_test.shape[0]+data_train.shape[0]))

Số mẫu train: 46501
Số mẫu test: 9684
Tỉ lệ tập test:  0.17235917059713446


In [6]:
# Khu vực
khuvuc = [col for col in df.columns if col.startswith("khuvuc")]

# Khoa
khoa = [col for col in df.columns if col.startswith("khoa")]

# Hệ đào tạo
hedt = [col for col in df.columns if col.startswith("hedt")]

# Thuộc tính đầu vào
# input = ["gioitinh", "xl_tt", "xl_av", "khoahoc"] + khuvuc + khoa + hedt + ["dtbtl", "drltl"]
input = ["gioitinh", "xl_tt", "xl_av"] + ["hocky_sx", "dtbtl", "drltl"]

# Output
output = "xeploai"

# Step3 - Building, Predicting, and Evaluating the Neural Network Model

In [7]:
X_train = data_train[input]
y_train = data_train[output]

In [8]:
mlp = MLPClassifier(hidden_layer_sizes=(32,32), activation='relu', solver='adam', max_iter=500)
mlp.fit(X_train,y_train)


In [9]:
for i in range(2, 8):
    X_test = data_test[input][data_test["hocky_sx"]==i]
    y_test = data_test[output][data_test["hocky_sx"]==i]
    print("HK"+str(i), ":", mlp.score(X_test, y_test))

HK2 : 0.6054664289958408
HK3 : 0.556299452221546
HK4 : 0.5985176034589252
HK5 : 0.6144121365360303
HK6 : 0.5662188099808061
HK7 : 0.5300802139037433


In [10]:
X_test = data_test[input][data_test["hocky_sx"]==2]
predict_train = mlp.predict(X_train)
predict_test = mlp.predict(X_test)


In [11]:
from sklearn.metrics import classification_report,confusion_matrix
print(confusion_matrix(y_train,predict_train))
print(classification_report(y_train,predict_train))

[[4068 2876  474  385]
 [1767 8433 3641  620]
 [ 368 3619 6716 2821]
 [ 167  662 3096 6788]]
              precision    recall  f1-score   support

           0       0.64      0.52      0.57      7803
           1       0.54      0.58      0.56     14461
           2       0.48      0.50      0.49     13524
           3       0.64      0.63      0.64     10713

    accuracy                           0.56     46501
   macro avg       0.58      0.56      0.57     46501
weighted avg       0.56      0.56      0.56     46501

