# 3.0 Support Vector Machine (Split)

### Import Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import classification_report, confusion_matrix

# Dataset used

In [2]:
### Dataset from FYP1
df1_B_imb = pd.read_csv('CSurvey1_Trans_Binary_imb.csv')
df1_5L_imb = pd.read_csv('CSurvey1_Trans_5level_imb.csv')

df1_B_ROS = pd.read_csv('CSurvey1_Trans_Binary_ROS.csv')
df1_5L_ROS = pd.read_csv('CSurvey1_Trans_5level_ROS.csv')

df1_B_SMOTE = pd.read_csv('CSurvey1_Trans_Binary_SMOTE.csv')
df1_5L_SMOTE = pd.read_csv('CSurvey1_Trans_5level_SMOTE.csv')

### Dataset from FYP2
df2_B_imb = pd.read_csv('CSurvey2_Trans_Binary_imb.csv')
df2_5L_imb = pd.read_csv('CSurvey2_Trans_5level_imb.csv')

df2_B_ROS = pd.read_csv('CSurvey2_Trans_Binary_ROS.csv')
df2_5L_ROS = pd.read_csv('CSurvey2_Trans_5level_ROS.csv')

df2_B_SMOTE = pd.read_csv('CSurvey2_Trans_Binary_SMOTE.csv')
df2_5L_SMOTE = pd.read_csv('CSurvey2_Trans_5level_SMOTE.csv')

### Dataset shape
print("df1_B_imb:", df1_B_imb.shape)
print("df1_5L_imb:", df1_5L_imb.shape)
print("df1_B_ROS:", df1_B_ROS.shape)
print("df1_5L_ROS:", df1_5L_ROS.shape)
print("df1_B_SMOTE:", df1_B_SMOTE.shape)
print("df1_5L_SMOTE:", df1_5L_SMOTE.shape)

print("df2_B_imb:", df2_B_imb.shape)
print("df2_5L_imb:", df2_5L_imb.shape)
print("df2_B_ROS:", df2_B_ROS.shape)
print("df2_5L_ROS:", df2_5L_ROS.shape)
print("df2_B_SMOTE:", df2_B_SMOTE.shape)
print("df2_5L_SMOTE:", df2_5L_SMOTE.shape)

df1_B_imb: (673, 41)
df1_5L_imb: (673, 41)
df1_B_ROS: (1338, 41)
df1_5L_ROS: (1695, 41)
df1_B_SMOTE: (1338, 41)
df1_5L_SMOTE: (1695, 41)
df2_B_imb: (614, 41)
df2_5L_imb: (614, 41)
df2_B_ROS: (1202, 41)
df2_5L_ROS: (1390, 41)
df2_B_SMOTE: (1202, 41)
df2_5L_SMOTE: (1390, 41)


In [3]:
rand_states = [7,69,101]

# Binary Classification

## Student Background

### SB before Data Balancing

In [4]:
tempB_SB_imb = []

y_train = df1_B_imb[['CGPA']]
X_train = df1_B_imb[['Age','Gender','Nationality', 'Living Status', 'Home Town', 'Faculty', 'Education Level',
                     'Years of Study', 'Completed Credit Hours']]

y_test = df2_B_imb[['CGPA']]
X_test = df2_B_imb[['Age','Gender','Nationality', 'Living Status', 'Home Town', 'Faculty', 'Education Level',
                     'Years of Study', 'Completed Credit Hours']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    tempB_SB_imb.append([100,j,acc,pre,recall,f1,param])
        
dfB_SB_imb = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=tempB_SB_imb)
dfB_SB_imb

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.978827,0.958103,0.978827,0.968354,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.978827,0.958103,0.978827,0.968354,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.978827,0.958103,0.978827,0.968354,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"


In [5]:
dfB_SB_imb.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.978827,0.958103,0.978827,0.968354


### SB after RandomOverSampler

In [6]:
tempB_SB_ROS = []

y_train = df1_B_ROS[['CGPA']]
X_train = df1_B_ROS[['Age','Gender','Nationality', 'Living Status', 'Home Town', 'Faculty', 'Education Level',
                     'Years of Study', 'Completed Credit Hours']]

y_test = df2_B_ROS[['CGPA']]
X_test = df2_B_ROS[['Age','Gender','Nationality', 'Living Status', 'Home Town', 'Faculty', 'Education Level',
                     'Years of Study', 'Completed Credit Hours']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    tempB_SB_ROS.append([100,j,acc,pre,recall,f1,param])
        
dfB_SB_ROS = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=tempB_SB_ROS)
dfB_SB_ROS

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.5,0.25,0.5,0.333333,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.5,0.25,0.5,0.333333,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.5,0.25,0.5,0.333333,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"


In [7]:
dfB_SB_ROS.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.5,0.25,0.5,0.333333


### SB after SMOTE

In [8]:
tempB_SB_SMOTE = []

y_train = df1_B_SMOTE[['CGPA']]
X_train = df1_B_SMOTE[['Age','Gender','Nationality', 'Living Status', 'Home Town', 'Faculty', 'Education Level',
                     'Years of Study', 'Completed Credit Hours']]

y_test = df2_B_SMOTE[['CGPA']]
X_test = df2_B_SMOTE[['Age','Gender','Nationality', 'Living Status', 'Home Town', 'Faculty', 'Education Level',
                     'Years of Study', 'Completed Credit Hours']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    tempB_SB_SMOTE.append([100,j,acc,pre,recall,f1,param])
        
dfB_SB_SMOTE = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=tempB_SB_SMOTE)
dfB_SB_SMOTE

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.496672,0.249165,0.496672,0.331851,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.496672,0.249165,0.496672,0.331851,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.496672,0.249165,0.496672,0.331851,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"


In [9]:
dfB_SB_SMOTE.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.496672,0.249165,0.496672,0.331851


## Student History Grades

### SHG before Data Balancing

In [10]:
tempB_SHG_imb = []

y_train = df1_B_imb[['CGPA']]
X_train = df1_B_imb[['Number of Subjects Failed','SPM Result (A)','SPM Result (B)','SPM Result (C)']]

y_test = df2_B_imb[['CGPA']]
X_test = df2_B_imb[['Number of Subjects Failed','SPM Result (A)','SPM Result (B)','SPM Result (C)']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    tempB_SHG_imb.append([100,j,acc,pre,recall,f1,param])
        
dfB_SHG_imb = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=tempB_SHG_imb)
dfB_SHG_imb

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.978827,0.958103,0.978827,0.968354,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.978827,0.958103,0.978827,0.968354,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.978827,0.958103,0.978827,0.968354,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"


In [11]:
dfB_SHG_imb.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.978827,0.958103,0.978827,0.968354


### SHG after RandomOverSampler

In [12]:
tempB_SHG_ROS = []

y_train = df1_B_ROS[['CGPA']]
X_train = df1_B_ROS[['Number of Subjects Failed','SPM Result (A)','SPM Result (B)','SPM Result (C)']]

y_test = df2_B_ROS[['CGPA']]
X_test = df2_B_ROS[['Number of Subjects Failed','SPM Result (A)','SPM Result (B)','SPM Result (C)']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    tempB_SHG_ROS.append([100,j,acc,pre,recall,f1,param])
        
dfB_SHG_ROS = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=tempB_SHG_ROS)
dfB_SHG_ROS

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.528286,0.644553,0.528286,0.409561,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.528286,0.644553,0.528286,0.409561,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.528286,0.644553,0.528286,0.409561,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"


In [13]:
dfB_SHG_ROS.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.528286,0.644553,0.528286,0.409561


### SHG after SMOTE

In [14]:
tempB_SHG_SMOTE = []

y_train = df1_B_SMOTE[['CGPA']]
X_train = df1_B_SMOTE[['Number of Subjects Failed','SPM Result (A)','SPM Result (B)','SPM Result (C)']]

y_test = df2_B_SMOTE[['CGPA']]
X_test = df2_B_SMOTE[['Number of Subjects Failed','SPM Result (A)','SPM Result (B)','SPM Result (C)']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    tempB_SHG_SMOTE.append([100,j,acc,pre,recall,f1,param])
        
dfB_SHG_SMOTE = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=tempB_SHG_SMOTE)
dfB_SHG_SMOTE

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.611481,0.727292,0.611481,0.554766,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.611481,0.727292,0.611481,0.554766,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.611481,0.727292,0.611481,0.554766,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"


In [15]:
dfB_SHG_SMOTE.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.611481,0.727292,0.611481,0.554766


## Student Opinion Towards MMU Environment

### SOTME before Data Balancing

In [16]:
tempB_SOTME_imb = []

y_train = df1_B_imb[['CGPA']]
X_train = df1_B_imb[['Reason Study at Multimedia University', 'Lecture Class Capacity','Tutorial Class Capacity', 
                     'Classroom Facilities', 'Campus Environment']]

y_test = df2_B_imb[['CGPA']]
X_test = df2_B_imb[['Reason Study at Multimedia University', 'Lecture Class Capacity','Tutorial Class Capacity', 
                     'Classroom Facilities', 'Campus Environment']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    tempB_SOTME_imb.append([100,j,acc,pre,recall,f1,param])
        
dfB_SOTME_imb = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=tempB_SOTME_imb)
dfB_SOTME_imb



Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.978827,0.958103,0.978827,0.968354,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.978827,0.958103,0.978827,0.968354,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.978827,0.958103,0.978827,0.968354,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"


In [17]:
dfB_SOTME_imb.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.978827,0.958103,0.978827,0.968354


### SOTME after RandomOverSampler

In [18]:
tempB_SOTME_ROS = []

y_train = df1_B_ROS[['CGPA']]
X_train = df1_B_ROS[['Reason Study at Multimedia University', 'Lecture Class Capacity','Tutorial Class Capacity', 
                     'Classroom Facilities', 'Campus Environment']]

y_test = df2_B_ROS[['CGPA']]
X_test = df2_B_ROS[['Reason Study at Multimedia University', 'Lecture Class Capacity','Tutorial Class Capacity', 
                     'Classroom Facilities', 'Campus Environment']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    tempB_SOTME_ROS.append([100,j,acc,pre,recall,f1,param])
        
dfB_SOTME_ROS = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=tempB_SOTME_ROS)
dfB_SOTME_ROS

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.499168,0.249792,0.499168,0.332963,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.499168,0.249792,0.499168,0.332963,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.499168,0.249792,0.499168,0.332963,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"


In [19]:
dfB_SOTME_ROS.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.499168,0.249792,0.499168,0.332963


### SOTME after SMOTE

In [20]:
tempB_SOTME_SMOTE = []

y_train = df1_B_SMOTE[['CGPA']]
X_train = df1_B_SMOTE[['Reason Study at Multimedia University', 'Lecture Class Capacity','Tutorial Class Capacity', 
                     'Classroom Facilities', 'Campus Environment']]

y_test = df2_B_SMOTE[['CGPA']]
X_test = df2_B_SMOTE[['Reason Study at Multimedia University', 'Lecture Class Capacity','Tutorial Class Capacity', 
                     'Classroom Facilities', 'Campus Environment']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    tempB_SOTME_SMOTE.append([100,j,acc,pre,recall,f1,param])
        
dfB_SOTME_SMOTE = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=tempB_SOTME_SMOTE)
dfB_SOTME_SMOTE

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.52995,0.566297,0.52995,0.455292,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.52995,0.566297,0.52995,0.455292,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.52995,0.566297,0.52995,0.455292,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"


In [21]:
dfB_SOTME_SMOTE.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.52995,0.566297,0.52995,0.455292


## Student Family Background

### SFB before Data Balancing

In [22]:
tempB_SFB_imb = []

y_train = df1_B_imb[['CGPA']]
X_train = df1_B_imb[['Family Size', "Parent's Education Level.Father", "Parent's Education Level.Mother", 
                     "Father's Occupation", "Mother's Occupation", 'Financial Difficulties', 'Financial Support']]

y_test = df2_B_imb[['CGPA']]
X_test = df2_B_imb[['Family Size', "Parent's Education Level.Father", "Parent's Education Level.Mother", 
                     "Father's Occupation", "Mother's Occupation", 'Financial Difficulties', 'Financial Support']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    tempB_SFB_imb.append([100,j,acc,pre,recall,f1,param])
        
dfB_SFB_imb = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=tempB_SFB_imb)
dfB_SFB_imb



Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.978827,0.958103,0.978827,0.968354,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.978827,0.958103,0.978827,0.968354,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.978827,0.958103,0.978827,0.968354,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"


In [23]:
dfB_SFB_imb.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.978827,0.958103,0.978827,0.968354


### SFB after RandomOverSampler

In [24]:
tempB_SFB_ROS = []

y_train = df1_B_ROS[['CGPA']]
X_train = df1_B_ROS[['Family Size', "Parent's Education Level.Father", "Parent's Education Level.Mother", 
                     "Father's Occupation", "Mother's Occupation", 'Financial Difficulties', 'Financial Support']]

y_test = df2_B_ROS[['CGPA']]
X_test = df2_B_ROS[['Family Size', "Parent's Education Level.Father", "Parent's Education Level.Mother", 
                     "Father's Occupation", "Mother's Occupation", 'Financial Difficulties', 'Financial Support']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    tempB_SFB_ROS.append([100,j,acc,pre,recall,f1,param])
        
dfB_SFB_ROS = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=tempB_SFB_ROS)
dfB_SFB_ROS

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.5,0.25,0.5,0.333333,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.5,0.25,0.5,0.333333,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.5,0.25,0.5,0.333333,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"


In [25]:
dfB_SFB_ROS.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.5,0.25,0.5,0.333333


### SFB after SMOTE

In [26]:
tempB_SFB_SMOTE = []

y_train = df1_B_SMOTE[['CGPA']]
X_train = df1_B_SMOTE[['Family Size', "Parent's Education Level.Father", "Parent's Education Level.Mother", 
                     "Father's Occupation", "Mother's Occupation", 'Financial Difficulties', 'Financial Support']]

y_test = df2_B_SMOTE[['CGPA']]
X_test = df2_B_SMOTE[['Family Size', "Parent's Education Level.Father", "Parent's Education Level.Mother", 
                     "Father's Occupation", "Mother's Occupation", 'Financial Difficulties', 'Financial Support']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    tempB_SFB_SMOTE.append([100,j,acc,pre,recall,f1,param])
        
dfB_SFB_SMOTE = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=tempB_SFB_SMOTE)
dfB_SFB_SMOTE

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.485025,0.246199,0.485025,0.326611,"{'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}"
1,100,69,0.485025,0.246199,0.485025,0.326611,"{'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}"
2,100,101,0.485025,0.246199,0.485025,0.326611,"{'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}"


In [27]:
dfB_SFB_SMOTE.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.485025,0.246199,0.485025,0.326611


## Student Lifestyle

### SL before Data Balancing

In [28]:
tempB_SL_imb = []

y_train = df1_B_imb[['CGPA']]
X_train = df1_B_imb[['Relationship Status', 'Scholarship', 'PTPTN', 'Skipping Class', 'Additional Course',
                    'Study Behavior', 'Study Time', 'Extra Curricular Activities', 'Part-time Job', 'Commute Time',
                    'Gaming', 'Alcohol Consumption', 'I have enough free time after class', 'Social Interaction',
                    'I hang out with friends']]

y_test = df2_B_imb[['CGPA']]
X_test = df2_B_imb[['Relationship Status', 'Scholarship', 'PTPTN', 'Skipping Class', 'Additional Course',
                    'Study Behavior', 'Study Time', 'Extra Curricular Activities', 'Part-time Job', 'Commute Time',
                    'Gaming', 'Alcohol Consumption', 'I have enough free time after class', 'Social Interaction',
                    'I hang out with friends']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    tempB_SL_imb.append([100,j,acc,pre,recall,f1,param])
        
dfB_SL_imb = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=tempB_SL_imb)
dfB_SL_imb

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.978827,0.958103,0.978827,0.968354,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.978827,0.958103,0.978827,0.968354,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.978827,0.958103,0.978827,0.968354,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"


In [29]:
dfB_SL_imb.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.978827,0.958103,0.978827,0.968354


### SL after RandomOverSampler

In [30]:
tempB_SL_ROS = []

y_train = df1_B_ROS[['CGPA']]
X_train = df1_B_ROS[['Relationship Status', 'Scholarship', 'PTPTN', 'Skipping Class', 'Additional Course',
                    'Study Behavior', 'Study Time', 'Extra Curricular Activities', 'Part-time Job', 'Commute Time',
                    'Gaming', 'Alcohol Consumption', 'I have enough free time after class', 'Social Interaction',
                    'I hang out with friends']]

y_test = df2_B_ROS[['CGPA']]
X_test = df2_B_ROS[['Relationship Status', 'Scholarship', 'PTPTN', 'Skipping Class', 'Additional Course',
                    'Study Behavior', 'Study Time', 'Extra Curricular Activities', 'Part-time Job', 'Commute Time',
                    'Gaming', 'Alcohol Consumption', 'I have enough free time after class', 'Social Interaction',
                    'I hang out with friends']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    tempB_SL_ROS.append([100,j,acc,pre,recall,f1,param])
        
dfB_SL_ROS = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=tempB_SL_ROS)
dfB_SL_ROS

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.5,0.25,0.5,0.333333,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.5,0.25,0.5,0.333333,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.5,0.25,0.5,0.333333,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"


In [31]:
dfB_SL_ROS.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.5,0.25,0.5,0.333333


### SL after SMOTE

In [32]:
tempB_SL_SMOTE = []

y_train = df1_B_SMOTE[['CGPA']]
X_train = df1_B_SMOTE[['Relationship Status', 'Scholarship', 'PTPTN', 'Skipping Class', 'Additional Course',
                    'Study Behavior', 'Study Time', 'Extra Curricular Activities', 'Part-time Job', 'Commute Time',
                    'Gaming', 'Alcohol Consumption', 'I have enough free time after class', 'Social Interaction',
                    'I hang out with friends']]

y_test = df2_B_SMOTE[['CGPA']]
X_test = df2_B_SMOTE[['Relationship Status', 'Scholarship', 'PTPTN', 'Skipping Class', 'Additional Course',
                    'Study Behavior', 'Study Time', 'Extra Curricular Activities', 'Part-time Job', 'Commute Time',
                    'Gaming', 'Alcohol Consumption', 'I have enough free time after class', 'Social Interaction',
                    'I hang out with friends']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    tempB_SL_SMOTE.append([100,j,acc,pre,recall,f1,param])
        
dfB_SL_SMOTE = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=tempB_SL_SMOTE)
dfB_SL_SMOTE

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.5,0.25,0.5,0.333333,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.5,0.25,0.5,0.333333,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.5,0.25,0.5,0.333333,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"


In [33]:
dfB_SL_SMOTE.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.5,0.25,0.5,0.333333


## All Attributes

### AA before Data Balancing

In [34]:
tempB_imb = []

y_train = df1_B_imb[['CGPA']]
X_train = df1_B_imb.drop(columns=['CGPA'])

y_test = df2_B_imb[['CGPA']]
X_test = df2_B_imb.drop(columns=['CGPA'])

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    tempB_imb.append([100,j,acc,pre,recall,f1,param])
        
dfB_imb = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=tempB_imb)
dfB_imb



Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.978827,0.958103,0.978827,0.968354,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.978827,0.958103,0.978827,0.968354,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.978827,0.958103,0.978827,0.968354,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"


In [35]:
dfB_imb.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.978827,0.958103,0.978827,0.968354


### AA after RandomOverSampler

In [36]:
tempB_ROS = []

y_train = df1_B_ROS[['CGPA']]
X_train = df1_B_ROS.drop(columns=['CGPA'])

y_test = df2_B_ROS[['CGPA']]
X_test = df2_B_ROS.drop(columns=['CGPA'])

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    tempB_ROS.append([100,j,acc,pre,recall,f1,param])
        
dfB_ROS = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=tempB_ROS)
dfB_ROS

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.5,0.25,0.5,0.333333,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.5,0.25,0.5,0.333333,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.5,0.25,0.5,0.333333,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"


In [37]:
dfB_ROS.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.5,0.25,0.5,0.333333


### AA after SMOTE

In [38]:
tempB_SMOTE = []

y_train = df1_B_SMOTE[['CGPA']]
X_train = df1_B_SMOTE.drop(columns=['CGPA'])

y_test = df2_B_SMOTE[['CGPA']]
X_test = df2_B_SMOTE.drop(columns=['CGPA'])

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    tempB_SMOTE.append([100,j,acc,pre,recall,f1,param])
        
dfB_SMOTE = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=tempB_SMOTE)
dfB_SMOTE

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.5,0.25,0.5,0.333333,"{'C': 10, 'gamma': 0.1, 'kernel': 'rbf'}"
1,100,69,0.5,0.25,0.5,0.333333,"{'C': 10, 'gamma': 0.1, 'kernel': 'rbf'}"
2,100,101,0.5,0.25,0.5,0.333333,"{'C': 10, 'gamma': 0.1, 'kernel': 'rbf'}"


In [39]:
dfB_SMOTE.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.5,0.25,0.5,0.333333


# 5-Level Classification

## Student Background

### SB before Data Balancing

In [40]:
temp5L_SB_imb = []

y_train = df1_5L_imb[['CGPA']]
X_train = df1_5L_imb[['Age','Gender','Nationality', 'Living Status', 'Home Town', 'Faculty', 'Education Level',
                     'Years of Study', 'Completed Credit Hours']]

y_test = df2_5L_imb[['CGPA']]
X_test = df2_5L_imb[['Age','Gender','Nationality', 'Living Status', 'Home Town', 'Faculty', 'Education Level',
                     'Years of Study', 'Completed Credit Hours']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    temp5L_SB_imb.append([100,j,acc,pre,recall,f1,param])
        
df5L_SB_imb = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=temp5L_SB_imb)
df5L_SB_imb



Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.452769,0.205,0.452769,0.282219,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.452769,0.205,0.452769,0.282219,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.452769,0.205,0.452769,0.282219,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"


In [41]:
df5L_SB_imb.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.452769,0.205,0.452769,0.282219


### SB after RandomOverSampler

In [42]:
temp5L_SB_ROS = []

y_train = df1_5L_ROS[['CGPA']]
X_train = df1_5L_ROS[['Age','Gender','Nationality', 'Living Status', 'Home Town', 'Faculty', 'Education Level',
                     'Years of Study', 'Completed Credit Hours']]

y_test = df2_5L_ROS[['CGPA']]
X_test = df2_5L_ROS[['Age','Gender','Nationality', 'Living Status', 'Home Town', 'Faculty', 'Education Level',
                     'Years of Study', 'Completed Credit Hours']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    temp5L_SB_ROS.append([100,j,acc,pre,recall,f1,param])
        
df5L_SB_ROS = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=temp5L_SB_ROS)
df5L_SB_ROS

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.215108,0.133688,0.215108,0.142418,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.215108,0.133688,0.215108,0.142418,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.215108,0.133688,0.215108,0.142418,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"


In [43]:
df5L_SB_ROS.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.215108,0.133688,0.215108,0.142418


### SB after SMOTE

In [44]:
temp5L_SB_SMOTE = []

y_train = df1_5L_SMOTE[['CGPA']]
X_train = df1_5L_SMOTE[['Age','Gender','Nationality', 'Living Status', 'Home Town', 'Faculty', 'Education Level',
                     'Years of Study', 'Completed Credit Hours']]

y_test = df2_5L_SMOTE[['CGPA']]
X_test = df2_5L_SMOTE[['Age','Gender','Nationality', 'Living Status', 'Home Town', 'Faculty', 'Education Level',
                     'Years of Study', 'Completed Credit Hours']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    temp5L_SB_SMOTE.append([100,j,acc,pre,recall,f1,param])
        
df5L_SB_SMOTE = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=temp5L_SB_SMOTE)
df5L_SB_SMOTE

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.223022,0.180274,0.223022,0.184512,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.223022,0.180274,0.223022,0.184512,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.223022,0.180274,0.223022,0.184512,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"


In [45]:
df5L_SB_SMOTE.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.223022,0.180274,0.223022,0.184512


## Student History Grades

### SHG before Data Balancing

In [46]:
temp5L_SHG_imb = []

y_train = df1_5L_imb[['CGPA']]
X_train = df1_5L_imb[['Number of Subjects Failed','SPM Result (A)','SPM Result (B)','SPM Result (C)']]

y_test = df2_5L_imb[['CGPA']]
X_test = df2_5L_imb[['Number of Subjects Failed','SPM Result (A)','SPM Result (B)','SPM Result (C)']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    temp5L_SHG_imb.append([100,j,acc,pre,recall,f1,param])
        
df5L_SHG_imb = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=temp5L_SHG_imb)
df5L_SHG_imb

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.530945,0.465565,0.530945,0.463492,"{'C': 100, 'gamma': 0.01, 'kernel': 'rbf'}"
1,100,69,0.530945,0.465565,0.530945,0.463492,"{'C': 100, 'gamma': 0.01, 'kernel': 'rbf'}"
2,100,101,0.530945,0.465565,0.530945,0.463492,"{'C': 100, 'gamma': 0.01, 'kernel': 'rbf'}"


In [47]:
df5L_SHG_imb.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.530945,0.465565,0.530945,0.463492


### SHG after RandomOverSampler

In [48]:
temp5L_SHG_ROS = []

y_train = df1_5L_ROS[['CGPA']]
X_train = df1_5L_ROS[['Number of Subjects Failed','SPM Result (A)','SPM Result (B)','SPM Result (C)']]

y_test = df2_5L_ROS[['CGPA']]
X_test = df2_5L_ROS[['Number of Subjects Failed','SPM Result (A)','SPM Result (B)','SPM Result (C)']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    temp5L_SHG_ROS.append([100,j,acc,pre,recall,f1,param])
        
df5L_SHG_ROS = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=temp5L_SHG_ROS)
df5L_SHG_ROS

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.286331,0.273632,0.286331,0.260274,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.286331,0.273632,0.286331,0.260274,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.286331,0.273632,0.286331,0.260274,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"


In [49]:
df5L_SHG_ROS.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.286331,0.273632,0.286331,0.260274


### SHG after SMOTE

In [50]:
temp5L_SHG_SMOTE = []

y_train = df1_5L_SMOTE[['CGPA']]
X_train = df1_5L_SMOTE[['Number of Subjects Failed','SPM Result (A)','SPM Result (B)','SPM Result (C)']]

y_test = df2_5L_SMOTE[['CGPA']]
X_test = df2_5L_SMOTE[['Number of Subjects Failed','SPM Result (A)','SPM Result (B)','SPM Result (C)']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    temp5L_SHG_SMOTE.append([100,j,acc,pre,recall,f1,param])
        
df5L_SHG_SMOTE = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=temp5L_SHG_SMOTE)
df5L_SHG_SMOTE

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.372662,0.369998,0.372662,0.360954,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.372662,0.369998,0.372662,0.360954,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.372662,0.369998,0.372662,0.360954,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"


In [51]:
df5L_SHG_SMOTE.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.372662,0.369998,0.372662,0.360954


## Student Opinion Towards MMU Environment

### SOTME before Data Balancing

In [52]:
temp5L_SOTME_imb = []

y_train = df1_5L_imb[['CGPA']]
X_train = df1_5L_imb[['Reason Study at Multimedia University', 'Lecture Class Capacity','Tutorial Class Capacity', 
                     'Classroom Facilities', 'Campus Environment']]

y_test = df2_5L_imb[['CGPA']]
X_test = df2_5L_imb[['Reason Study at Multimedia University', 'Lecture Class Capacity','Tutorial Class Capacity', 
                     'Classroom Facilities', 'Campus Environment']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    temp5L_SOTME_imb.append([100,j,acc,pre,recall,f1,param])
        
df5L_SOTME_imb = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=temp5L_SOTME_imb)
df5L_SOTME_imb

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.452769,0.205,0.452769,0.282219,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.452769,0.205,0.452769,0.282219,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.452769,0.205,0.452769,0.282219,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"


In [53]:
df5L_SOTME_imb.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.452769,0.205,0.452769,0.282219


### SOTME after RandomOverSampler

In [54]:
temp5L_SOTME_ROS = []

y_train = df1_5L_ROS[['CGPA']]
X_train = df1_5L_ROS[['Reason Study at Multimedia University', 'Lecture Class Capacity','Tutorial Class Capacity', 
                     'Classroom Facilities', 'Campus Environment']]

y_test = df2_5L_ROS[['CGPA']]
X_test = df2_5L_ROS[['Reason Study at Multimedia University', 'Lecture Class Capacity','Tutorial Class Capacity', 
                     'Classroom Facilities', 'Campus Environment']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    temp5L_SOTME_ROS.append([100,j,acc,pre,recall,f1,param])
        
df5L_SOTME_ROS = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=temp5L_SOTME_ROS)
df5L_SOTME_ROS

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.204317,0.133956,0.204317,0.102208,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.204317,0.133956,0.204317,0.102208,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.204317,0.133956,0.204317,0.102208,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"


In [55]:
df5L_SOTME_ROS.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.204317,0.133956,0.204317,0.102208


### SOTME after SMOTE

In [56]:
temp5L_SOTME_SMOTE = []

y_train = df1_5L_SMOTE[['CGPA']]
X_train = df1_5L_SMOTE[['Reason Study at Multimedia University', 'Lecture Class Capacity','Tutorial Class Capacity', 
                     'Classroom Facilities', 'Campus Environment']]

y_test = df2_5L_SMOTE[['CGPA']]
X_test = df2_5L_SMOTE[['Reason Study at Multimedia University', 'Lecture Class Capacity','Tutorial Class Capacity', 
                     'Classroom Facilities', 'Campus Environment']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    temp5L_SOTME_SMOTE.append([100,j,acc,pre,recall,f1,param])
        
df5L_SOTME_SMOTE = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=temp5L_SOTME_SMOTE)
df5L_SOTME_SMOTE

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.222302,0.265245,0.222302,0.209311,"{'C': 100, 'gamma': 0.1, 'kernel': 'rbf'}"
1,100,69,0.222302,0.265245,0.222302,0.209311,"{'C': 100, 'gamma': 0.1, 'kernel': 'rbf'}"
2,100,101,0.222302,0.265245,0.222302,0.209311,"{'C': 100, 'gamma': 0.1, 'kernel': 'rbf'}"


In [57]:
df5L_SOTME_SMOTE.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.222302,0.265245,0.222302,0.209311


## Student Family Background

### SFB before Data Balancing

In [58]:
temp5L_SFB_imb = []

y_train = df1_5L_imb[['CGPA']]
X_train = df1_5L_imb[['Family Size', "Parent's Education Level.Father", "Parent's Education Level.Mother", 
                     "Father's Occupation", "Mother's Occupation", 'Financial Difficulties', 'Financial Support']]

y_test = df2_5L_imb[['CGPA']]
X_test = df2_5L_imb[['Family Size', "Parent's Education Level.Father", "Parent's Education Level.Mother", 
                     "Father's Occupation", "Mother's Occupation", 'Financial Difficulties', 'Financial Support']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    temp5L_SFB_imb.append([100,j,acc,pre,recall,f1,param])
        
df5L_SFB_imb = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=temp5L_SFB_imb)
df5L_SFB_imb

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.442997,0.297805,0.442997,0.304247,"{'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}"
1,100,69,0.442997,0.297805,0.442997,0.304247,"{'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}"
2,100,101,0.442997,0.297805,0.442997,0.304247,"{'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}"


In [59]:
df5L_SFB_imb.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.442997,0.297805,0.442997,0.304247


### SFB after RandomOverSampler

In [60]:
temp5L_SFB_ROS = []

y_train = df1_5L_ROS[['CGPA']]
X_train = df1_5L_ROS[['Family Size', "Parent's Education Level.Father", "Parent's Education Level.Mother", 
                     "Father's Occupation", "Mother's Occupation", 'Financial Difficulties', 'Financial Support']]

y_test = df2_5L_ROS[['CGPA']]
X_test = df2_5L_ROS[['Family Size', "Parent's Education Level.Father", "Parent's Education Level.Mother", 
                     "Father's Occupation", "Mother's Occupation", 'Financial Difficulties', 'Financial Support']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    temp5L_SFB_ROS.append([100,j,acc,pre,recall,f1,param])
        
df5L_SFB_ROS = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=temp5L_SFB_ROS)
df5L_SFB_ROS

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.200719,0.20194,0.200719,0.072399,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.200719,0.20194,0.200719,0.072399,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.200719,0.20194,0.200719,0.072399,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"


In [61]:
df5L_SFB_ROS.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.200719,0.20194,0.200719,0.072399


### SFB after SMOTE

In [62]:
temp5L_SFB_SMOTE = []

y_train = df1_5L_SMOTE[['CGPA']]
X_train = df1_5L_SMOTE[['Family Size', "Parent's Education Level.Father", "Parent's Education Level.Mother", 
                     "Father's Occupation", "Mother's Occupation", 'Financial Difficulties', 'Financial Support']]

y_test = df2_5L_SMOTE[['CGPA']]
X_test = df2_5L_SMOTE[['Family Size', "Parent's Education Level.Father", "Parent's Education Level.Mother", 
                     "Father's Occupation", "Mother's Occupation", 'Financial Difficulties', 'Financial Support']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    temp5L_SFB_SMOTE.append([100,j,acc,pre,recall,f1,param])
        
df5L_SFB_SMOTE = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=temp5L_SFB_SMOTE)
df5L_SFB_SMOTE

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.194964,0.196149,0.194964,0.165144,"{'C': 100, 'gamma': 0.01, 'kernel': 'rbf'}"
1,100,69,0.194964,0.196149,0.194964,0.165144,"{'C': 100, 'gamma': 0.01, 'kernel': 'rbf'}"
2,100,101,0.194964,0.196149,0.194964,0.165144,"{'C': 100, 'gamma': 0.01, 'kernel': 'rbf'}"


In [63]:
df5L_SFB_SMOTE.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.194964,0.196149,0.194964,0.165144


## Student Lifestyle

### SL before Data Balancing

In [64]:
temp5L_SL_imb = []

y_train = df1_5L_imb[['CGPA']]
X_train = df1_5L_imb[['Relationship Status', 'Scholarship', 'PTPTN', 'Skipping Class', 'Additional Course',
                    'Study Behavior', 'Study Time', 'Extra Curricular Activities', 'Part-time Job', 'Commute Time',
                    'Gaming', 'Alcohol Consumption', 'I have enough free time after class', 'Social Interaction',
                    'I hang out with friends']]


y_test = df2_5L_imb[['CGPA']]
X_test = df2_5L_imb[['Relationship Status', 'Scholarship', 'PTPTN', 'Skipping Class', 'Additional Course',
                    'Study Behavior', 'Study Time', 'Extra Curricular Activities', 'Part-time Job', 'Commute Time',
                    'Gaming', 'Alcohol Consumption', 'I have enough free time after class', 'Social Interaction',
                    'I hang out with friends']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    temp5L_SL_imb.append([100,j,acc,pre,recall,f1,param])
        
df5L_SL_imb = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=temp5L_SL_imb)
df5L_SL_imb

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.452769,0.205,0.452769,0.282219,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.452769,0.205,0.452769,0.282219,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.452769,0.205,0.452769,0.282219,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"


In [65]:
df5L_SL_imb.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.452769,0.205,0.452769,0.282219


### SL after RandomOverSampler

In [66]:
temp5L_SL_ROS = []

y_train = df1_5L_ROS[['CGPA']]
X_train = df1_5L_ROS[['Relationship Status', 'Scholarship', 'PTPTN', 'Skipping Class', 'Additional Course',
                    'Study Behavior', 'Study Time', 'Extra Curricular Activities', 'Part-time Job', 'Commute Time',
                    'Gaming', 'Alcohol Consumption', 'I have enough free time after class', 'Social Interaction',
                    'I hang out with friends']]


y_test = df2_5L_ROS[['CGPA']]
X_test = df2_5L_ROS[['Relationship Status', 'Scholarship', 'PTPTN', 'Skipping Class', 'Additional Course',
                    'Study Behavior', 'Study Time', 'Extra Curricular Activities', 'Part-time Job', 'Commute Time',
                    'Gaming', 'Alcohol Consumption', 'I have enough free time after class', 'Social Interaction',
                    'I hang out with friends']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    temp5L_SL_ROS.append([100,j,acc,pre,recall,f1,param])
        
df5L_SL_ROS = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=temp5L_SL_ROS)
df5L_SL_ROS

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.197122,0.070893,0.197122,0.076797,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.197122,0.070893,0.197122,0.076797,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.197122,0.070893,0.197122,0.076797,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"


In [67]:
df5L_SL_ROS.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.197122,0.070893,0.197122,0.076797


### SL after SMOTE

In [68]:
temp5L_SL_SMOTE = []

y_train = df1_5L_SMOTE[['CGPA']]
X_train = df1_5L_SMOTE[['Relationship Status', 'Scholarship', 'PTPTN', 'Skipping Class', 'Additional Course',
                    'Study Behavior', 'Study Time', 'Extra Curricular Activities', 'Part-time Job', 'Commute Time',
                    'Gaming', 'Alcohol Consumption', 'I have enough free time after class', 'Social Interaction',
                    'I hang out with friends']]


y_test = df2_5L_SMOTE[['CGPA']]
X_test = df2_5L_SMOTE[['Relationship Status', 'Scholarship', 'PTPTN', 'Skipping Class', 'Additional Course',
                    'Study Behavior', 'Study Time', 'Extra Curricular Activities', 'Part-time Job', 'Commute Time',
                    'Gaming', 'Alcohol Consumption', 'I have enough free time after class', 'Social Interaction',
                    'I hang out with friends']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    temp5L_SL_SMOTE.append([100,j,acc,pre,recall,f1,param])
        
df5L_SL_SMOTE = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=temp5L_SL_SMOTE)
df5L_SL_SMOTE

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.242446,0.154943,0.242446,0.174373,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.242446,0.154943,0.242446,0.174373,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.242446,0.154943,0.242446,0.174373,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"


In [69]:
df5L_SL_SMOTE.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.242446,0.154943,0.242446,0.174373


## All Attributes

### AA before Data Balancing

In [70]:
temp5L_imb = []

y_train = df1_5L_imb[['CGPA']]
X_train = df1_5L_imb.drop(columns=["CGPA"])

y_test = df2_5L_imb[['CGPA']]
X_test = df2_5L_imb.drop(columns=["CGPA"])

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    temp5L_imb.append([100,j,acc,pre,recall,f1,param])
        
df5L_imb = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=temp5L_imb)
df5L_imb



Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.456026,0.316445,0.456026,0.301494,"{'C': 0.1, 'gamma': 0.001, 'kernel': 'sigmoid'}"
1,100,69,0.456026,0.316445,0.456026,0.301494,"{'C': 0.1, 'gamma': 0.001, 'kernel': 'sigmoid'}"
2,100,101,0.456026,0.316445,0.456026,0.301494,"{'C': 0.1, 'gamma': 0.001, 'kernel': 'sigmoid'}"


In [71]:
df5L_imb.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.456026,0.316445,0.456026,0.301494


### AA after RandomOverSampler

In [72]:
temp5L_ROS = []

y_train = df1_5L_ROS[['CGPA']]
X_train = df1_5L_ROS.drop(columns=["CGPA"])

y_test = df2_5L_ROS[['CGPA']]
X_test = df2_5L_ROS.drop(columns=["CGPA"])

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    temp5L_ROS.append([100,j,acc,pre,recall,f1,param])
        
df5L_ROS = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=temp5L_ROS)
df5L_ROS

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.2,0.04,0.2,0.066667,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.2,0.04,0.2,0.066667,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.2,0.04,0.2,0.066667,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"


In [73]:
df5L_ROS.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.2,0.04,0.2,0.066667


### AA after SMOTE

In [74]:
temp5L_SMOTE = []

y_train = df1_5L_SMOTE[['CGPA']]
X_train = df1_5L_SMOTE.drop(columns=["CGPA"])

y_test = df2_5L_SMOTE[['CGPA']]
X_test = df2_5L_SMOTE.drop(columns=["CGPA"])

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    temp5L_SMOTE.append([100,j,acc,pre,recall,f1,param])
        
df5L_SMOTE = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=temp5L_SMOTE)
df5L_SMOTE

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.240288,0.277608,0.240288,0.187265,"{'C': 100, 'gamma': 0.01, 'kernel': 'rbf'}"
1,100,69,0.240288,0.277608,0.240288,0.187265,"{'C': 100, 'gamma': 0.01, 'kernel': 'rbf'}"
2,100,101,0.240288,0.277608,0.240288,0.187265,"{'C': 100, 'gamma': 0.01, 'kernel': 'rbf'}"


In [75]:
df5L_SMOTE.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.240288,0.277608,0.240288,0.187265
