# 3.2 Support Vector Machine (Cross Validation)

### Import Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import classification_report, confusion_matrix

# Dataset used

In [2]:
### Senior dataset
df3_B_imb = pd.read_csv('tmerged_imb.csv')
df3_5L_imb = pd.read_csv('fmerged_imb.csv')

df3_B_ROS = pd.read_csv('tros_merged.csv')
df3_5L_ROS = pd.read_csv('fros_merged.csv')

df3_B_SMOTE = pd.read_csv('tsmote_merged.csv')
df3_5L_SMOTE = pd.read_csv('fsmote_merged.csv')

### Merge dataset 
merge_B_imb = pd.read_csv('merge_B_imb.csv')
merge_5L_imb = pd.read_csv('merge_5L_imb.csv')

merge_B_ROS = pd.read_csv('merge_B_ROS.csv')
merge_5L_ROS = pd.read_csv('merge_5L_ROS.csv')

merge_B_SMOTE = pd.read_csv('merge_B_SMOTE.csv')
merge_5L_SMOTE = pd.read_csv('merge_5L_SMOTE.csv')

print("Senior Dataset:\n")
print("df3_B_imb:", df3_B_imb.shape)
print("df3_5L_imb:", df3_5L_imb.shape)
print("df3_B_ROS:", df3_B_ROS.shape)
print("df3_5L_ROS:", df3_5L_ROS.shape)
print("df3_B_SMOTE:", df3_B_SMOTE.shape)
print("df3_5L_SMOTE:", df3_5L_SMOTE.shape)
      
print("\nMerged Dataset:\n")
print("merge_B_imb:", merge_B_imb.shape)
print("merge_5L_imb:", merge_5L_imb.shape)
print("merge_B_ROS:", merge_B_ROS.shape)
print("merge_5L_ROS:", merge_5L_ROS.shape)
print("merge_B_SMOTE:", merge_B_SMOTE.shape)
print("merge_5L_SMOTE:", merge_5L_SMOTE.shape)

Senior Dataset:

df3_B_imb: (1109, 41)
df3_5L_imb: (1109, 41)
df3_B_ROS: (2194, 41)
df3_5L_ROS: (2755, 41)
df3_B_SMOTE: (2194, 41)
df3_5L_SMOTE: (2755, 41)

Merged Dataset:

merge_B_imb: (1287, 41)
merge_5L_imb: (1287, 41)
merge_B_ROS: (2540, 41)
merge_5L_ROS: (3085, 41)
merge_B_SMOTE: (2540, 41)
merge_5L_SMOTE: (3085, 41)


In [3]:
rand_states = [7,69,101]

# Binary Classification

## Student Background

### SB before Data Balancing

In [4]:
tempB_SB_imb = []

y_train = df3_B_imb[['CGPA']]
X_train = df3_B_imb[['Age','Gender','Nationality', 'Living Status', 'Home Town', 'Faculty', 'Education Level',
                     'Years of Study', 'Completed Credit Hours']]

y_test = merge_B_imb[['CGPA']]
X_test = merge_B_imb[['Age','Gender','Nationality', 'Living Status', 'Home Town', 'Faculty', 'Education Level',
                     'Years of Study', 'Completed Credit Hours']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    tempB_SB_imb.append([100,j,acc,pre,recall,f1,param])
        
dfB_SB_imb = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=tempB_SB_imb)
dfB_SB_imb

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.986791,0.973756,0.986791,0.98023,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.986791,0.973756,0.986791,0.98023,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.986791,0.973756,0.986791,0.98023,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"


In [5]:
dfB_SB_imb.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.986791,0.973756,0.986791,0.98023


### SB after RandomOverSampler

In [6]:
tempB_SB_ROS = []

y_train = df3_B_ROS[['CGPA']]
X_train = df3_B_ROS[['Age','Gender','Nationality', 'Living Status', 'Home Town', 'Faculty', 'Education Level',
                     'Years of Study', 'Completed Credit Hours']]

y_test = merge_B_ROS[['CGPA']]
X_test = merge_B_ROS[['Age','Gender','Nationality', 'Living Status', 'Home Town', 'Faculty', 'Education Level',
                     'Years of Study', 'Completed Credit Hours']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    tempB_SB_ROS.append([100,j,acc,pre,recall,f1,param])
        
dfB_SB_ROS = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=tempB_SB_ROS)
dfB_SB_ROS

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.49252,0.248116,0.49252,0.329992,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.49252,0.248116,0.49252,0.329992,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.49252,0.248116,0.49252,0.329992,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"


In [7]:
dfB_SB_ROS.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.49252,0.248116,0.49252,0.329992


### SB after SMOTE

In [8]:
tempB_SB_SMOTE = []

y_train = df3_B_SMOTE[['CGPA']]
X_train = df3_B_SMOTE[['Age','Gender','Nationality', 'Living Status', 'Home Town', 'Faculty', 'Education Level',
                     'Years of Study', 'Completed Credit Hours']]

y_test = merge_B_SMOTE[['CGPA']]
X_test = merge_B_SMOTE[['Age','Gender','Nationality', 'Living Status', 'Home Town', 'Faculty', 'Education Level',
                     'Years of Study', 'Completed Credit Hours']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    tempB_SB_SMOTE.append([100,j,acc,pre,recall,f1,param])
        
dfB_SB_SMOTE = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=tempB_SB_SMOTE)
dfB_SB_SMOTE

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.526378,0.583701,0.526378,0.428536,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.526378,0.583701,0.526378,0.428536,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.526378,0.583701,0.526378,0.428536,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"


In [9]:
dfB_SB_SMOTE.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.526378,0.583701,0.526378,0.428536


## Student History Grades

### SHG before Data Balancing

In [10]:
tempB_SHG_imb = []

y_train = df3_B_imb[['CGPA']]
X_train = df3_B_imb[['Number of Subjects Failed','SPM Result (A)','SPM Result (B)','SPM Result (C)']]

y_test = merge_B_imb[['CGPA']]
X_test = merge_B_imb[['Number of Subjects Failed','SPM Result (A)','SPM Result (B)','SPM Result (C)']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    tempB_SHG_imb.append([100,j,acc,pre,recall,f1,param])
        
dfB_SHG_imb = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=tempB_SHG_imb)
dfB_SHG_imb

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.986791,0.973756,0.986791,0.98023,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.986791,0.973756,0.986791,0.98023,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.986791,0.973756,0.986791,0.98023,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"


In [11]:
dfB_SHG_imb.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.986791,0.973756,0.986791,0.98023


### SHG after RandomOverSampler

In [12]:
tempB_SHG_ROS = []

y_train = df3_B_ROS[['CGPA']]
X_train = df3_B_ROS[['Number of Subjects Failed','SPM Result (A)','SPM Result (B)','SPM Result (C)']]

y_test = merge_B_ROS[['CGPA']]
X_test = merge_B_ROS[['Number of Subjects Failed','SPM Result (A)','SPM Result (B)','SPM Result (C)']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    tempB_SHG_ROS.append([100,j,acc,pre,recall,f1,param])
        
dfB_SHG_ROS = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=tempB_SHG_ROS)
dfB_SHG_ROS

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.538583,0.636732,0.538583,0.437669,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.538583,0.636732,0.538583,0.437669,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.538583,0.636732,0.538583,0.437669,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"


In [13]:
dfB_SHG_ROS.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.538583,0.636732,0.538583,0.437669


### SHG after SMOTE

In [14]:
tempB_SHG_SMOTE = []

y_train = df3_B_SMOTE[['CGPA']]
X_train = df3_B_SMOTE[['Number of Subjects Failed','SPM Result (A)','SPM Result (B)','SPM Result (C)']]

y_test = merge_B_SMOTE[['CGPA']]
X_test = merge_B_SMOTE[['Number of Subjects Failed','SPM Result (A)','SPM Result (B)','SPM Result (C)']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    tempB_SHG_SMOTE.append([100,j,acc,pre,recall,f1,param])
        
dfB_SHG_SMOTE = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=tempB_SHG_SMOTE)
dfB_SHG_SMOTE

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.672047,0.725201,0.672047,0.651482,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.672047,0.725201,0.672047,0.651482,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.672047,0.725201,0.672047,0.651482,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"


In [15]:
dfB_SHG_SMOTE.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.672047,0.725201,0.672047,0.651482


## Student Opinion Towards MMU Environment

### SOTME before Data Balancing

In [16]:
tempB_SOTME_imb = []

y_train = df3_B_imb[['CGPA']]
X_train = df3_B_imb[['Reason Study at Multimedia University', 'Lecture Class Capacity','Tutorial Class Capacity', 
                     'Classroom Facilities', 'Campus Environment']]

y_test = merge_B_imb[['CGPA']]
X_test = merge_B_imb[['Reason Study at Multimedia University', 'Lecture Class Capacity','Tutorial Class Capacity', 
                     'Classroom Facilities', 'Campus Environment']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    tempB_SOTME_imb.append([100,j,acc,pre,recall,f1,param])
        
dfB_SOTME_imb = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=tempB_SOTME_imb)
dfB_SOTME_imb

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.986791,0.973756,0.986791,0.98023,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.986791,0.973756,0.986791,0.98023,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.986791,0.973756,0.986791,0.98023,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"


In [17]:
dfB_SOTME_imb.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.986791,0.973756,0.986791,0.98023


### SOTME after RandomOverSampler

In [18]:
tempB_SOTME_ROS = []

y_train = df3_B_ROS[['CGPA']]
X_train = df3_B_ROS[['Reason Study at Multimedia University', 'Lecture Class Capacity','Tutorial Class Capacity', 
                     'Classroom Facilities', 'Campus Environment']]

y_test = merge_B_ROS[['CGPA']]
X_test = merge_B_ROS[['Reason Study at Multimedia University', 'Lecture Class Capacity','Tutorial Class Capacity', 
                     'Classroom Facilities', 'Campus Environment']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    tempB_SOTME_ROS.append([100,j,acc,pre,recall,f1,param])
        
dfB_SOTME_ROS = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=tempB_SOTME_ROS)
dfB_SOTME_ROS

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.499213,0.249803,0.499213,0.332983,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.499213,0.249803,0.499213,0.332983,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.499213,0.249803,0.499213,0.332983,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"


In [19]:
dfB_SOTME_ROS.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.499213,0.249803,0.499213,0.332983


### SOTME after SMOTE

In [20]:
tempB_SOTME_SMOTE = []

y_train = df3_B_SMOTE[['CGPA']]
X_train = df3_B_SMOTE[['Reason Study at Multimedia University', 'Lecture Class Capacity','Tutorial Class Capacity', 
                     'Classroom Facilities', 'Campus Environment']]

y_test = merge_B_SMOTE[['CGPA']]
X_test = merge_B_SMOTE[['Reason Study at Multimedia University', 'Lecture Class Capacity','Tutorial Class Capacity', 
                     'Classroom Facilities', 'Campus Environment']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    tempB_SOTME_SMOTE.append([100,j,acc,pre,recall,f1,param])
        
dfB_SOTME_SMOTE = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=tempB_SOTME_SMOTE)
dfB_SOTME_SMOTE

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.499606,0.249902,0.499606,0.333158,"{'C': 100, 'gamma': 0.1, 'kernel': 'rbf'}"
1,100,69,0.499606,0.249902,0.499606,0.333158,"{'C': 100, 'gamma': 0.1, 'kernel': 'rbf'}"
2,100,101,0.499606,0.249902,0.499606,0.333158,"{'C': 100, 'gamma': 0.1, 'kernel': 'rbf'}"


In [21]:
dfB_SOTME_SMOTE.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.499606,0.249902,0.499606,0.333158


## Student Family Background

### SFB before Data Balancing

In [22]:
df3_B_imb.rename(columns={"Parent's Education Level [Father]": "Parent's Education Level.Father"}, inplace=True)
df3_B_imb.rename(columns={"Parent's Education Level [Mother]": "Parent's Education Level.Mother"}, inplace=True)
df3_B_ROS.rename(columns={"Parent's Education Level [Father]": "Parent's Education Level.Father"}, inplace=True)
df3_B_ROS.rename(columns={"Parent's Education Level [Mother]": "Parent's Education Level.Mother"}, inplace=True)
df3_B_SMOTE.rename(columns={"Parent's Education Level [Father]": "Parent's Education Level.Father"}, inplace=True)
df3_B_SMOTE.rename(columns={"Parent's Education Level [Mother]": "Parent's Education Level.Mother"}, inplace=True)

In [23]:
tempB_SFB_imb = []

y_train = df3_B_imb[['CGPA']]
X_train = df3_B_imb[['Family Size', "Parent's Education Level.Father", "Parent's Education Level.Mother", 
                     "Father's Occupation", "Mother's Occupation", 'Financial Difficulties', 'Financial Support']]

y_test = merge_B_imb[['CGPA']]
X_test = merge_B_imb[['Family Size', "Parent's Education Level.Father", "Parent's Education Level.Mother", 
                     "Father's Occupation", "Mother's Occupation", 'Financial Difficulties', 'Financial Support']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    tempB_SFB_imb.append([100,j,acc,pre,recall,f1,param])
        
dfB_SFB_imb = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=tempB_SFB_imb)
dfB_SFB_imb

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.986791,0.973756,0.986791,0.98023,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.986791,0.973756,0.986791,0.98023,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.986791,0.973756,0.986791,0.98023,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"


In [24]:
dfB_SFB_imb.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.986791,0.973756,0.986791,0.98023


### SFB after RandomOverSampler

In [25]:
tempB_SFB_ROS = []

y_train = df3_B_ROS[['CGPA']]
X_train = df3_B_ROS[['Family Size', "Parent's Education Level.Father", "Parent's Education Level.Mother", 
                     "Father's Occupation", "Mother's Occupation", 'Financial Difficulties', 'Financial Support']]

y_test = merge_B_ROS[['CGPA']]
X_test = merge_B_ROS[['Family Size', "Parent's Education Level.Father", "Parent's Education Level.Mother", 
                     "Father's Occupation", "Mother's Occupation", 'Financial Difficulties', 'Financial Support']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    tempB_SFB_ROS.append([100,j,acc,pre,recall,f1,param])
        
dfB_SFB_ROS = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=tempB_SFB_ROS)
dfB_SFB_ROS

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.5,0.25,0.5,0.333333,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.5,0.25,0.5,0.333333,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.5,0.25,0.5,0.333333,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"


In [26]:
dfB_SFB_ROS.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.5,0.25,0.5,0.333333


### SFB after SMOTE

In [27]:
tempB_SFB_SMOTE = []

y_train = df3_B_SMOTE[['CGPA']]
X_train = df3_B_SMOTE[['Family Size', "Parent's Education Level.Father", "Parent's Education Level.Mother", 
                     "Father's Occupation", "Mother's Occupation", 'Financial Difficulties', 'Financial Support']]

y_test = merge_B_SMOTE[['CGPA']]
X_test = merge_B_SMOTE[['Family Size', "Parent's Education Level.Father", "Parent's Education Level.Mother", 
                     "Father's Occupation", "Mother's Occupation", 'Financial Difficulties', 'Financial Support']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    tempB_SFB_SMOTE.append([100,j,acc,pre,recall,f1,param])
        
dfB_SFB_SMOTE = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=tempB_SFB_SMOTE)
dfB_SFB_SMOTE

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.5,0.25,0.5,0.333333,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.5,0.25,0.5,0.333333,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.5,0.25,0.5,0.333333,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"


In [28]:
dfB_SFB_SMOTE.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.5,0.25,0.5,0.333333


## Student Lifestyle

### SL before Data Balancing

In [29]:
df3_B_imb.rename(columns={"Dating": "Relationship Status"}, inplace=True)
df3_B_imb.rename(columns={"Study Behaviour": "Study Behavior"}, inplace=True)
df3_B_imb.rename(columns={"I have enough free time after class.": "I have enough free time after class"}, inplace=True)
df3_B_imb.rename(columns={"I hang out with friends.": "I hang out with friends"}, inplace=True)

df3_B_ROS.rename(columns={"Dating": "Relationship Status"}, inplace=True)
df3_B_ROS.rename(columns={"Study Behaviour": "Study Behavior"}, inplace=True)
df3_B_ROS.rename(columns={"I have enough free time after class.": "I have enough free time after class"}, inplace=True)
df3_B_ROS.rename(columns={"I hang out with friends.": "I hang out with friends"}, inplace=True)

df3_B_SMOTE.rename(columns={"Dating": "Relationship Status"}, inplace=True)
df3_B_SMOTE.rename(columns={"Study Behaviour": "Study Behavior"}, inplace=True)
df3_B_SMOTE.rename(columns={"I have enough free time after class.": "I have enough free time after class"}, inplace=True)
df3_B_SMOTE.rename(columns={"I hang out with friends.": "I hang out with friends"}, inplace=True)

In [30]:
tempB_SL_imb = []

y_train = df3_B_imb[['CGPA']]
X_train = df3_B_imb[['Relationship Status', 'Scholarship', 'PTPTN', 'Skipping Class', 'Additional Course',
                    'Study Behavior', 'Study Time', 'Extra Curricular Activities', 'Part-time Job', 'Commute Time',
                    'Gaming', 'Alcohol Consumption', 'I have enough free time after class', 'Social Interaction',
                    'I hang out with friends']]

y_test = merge_B_imb[['CGPA']]
X_test = merge_B_imb[['Relationship Status', 'Scholarship', 'PTPTN', 'Skipping Class', 'Additional Course',
                    'Study Behavior', 'Study Time', 'Extra Curricular Activities', 'Part-time Job', 'Commute Time',
                    'Gaming', 'Alcohol Consumption', 'I have enough free time after class', 'Social Interaction',
                    'I hang out with friends']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    tempB_SL_imb.append([100,j,acc,pre,recall,f1,param])
        
dfB_SL_imb = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=tempB_SL_imb)
dfB_SL_imb

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.986791,0.973756,0.986791,0.98023,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.986791,0.973756,0.986791,0.98023,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.986791,0.973756,0.986791,0.98023,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"


In [31]:
dfB_SL_imb.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.986791,0.973756,0.986791,0.98023


### SL after RandomOverSampler

In [32]:
tempB_SL_ROS = []

y_train = df3_B_ROS[['CGPA']]
X_train = df3_B_ROS[['Relationship Status', 'Scholarship', 'PTPTN', 'Skipping Class', 'Additional Course',
                    'Study Behavior', 'Study Time', 'Extra Curricular Activities', 'Part-time Job', 'Commute Time',
                    'Gaming', 'Alcohol Consumption', 'I have enough free time after class', 'Social Interaction',
                    'I hang out with friends']]

y_test = merge_B_ROS[['CGPA']]
X_test = merge_B_ROS[['Relationship Status', 'Scholarship', 'PTPTN', 'Skipping Class', 'Additional Course',
                    'Study Behavior', 'Study Time', 'Extra Curricular Activities', 'Part-time Job', 'Commute Time',
                    'Gaming', 'Alcohol Consumption', 'I have enough free time after class', 'Social Interaction',
                    'I hang out with friends']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    tempB_SL_ROS.append([100,j,acc,pre,recall,f1,param])
        
dfB_SL_ROS = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=tempB_SL_ROS)
dfB_SL_ROS

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.5,0.25,0.5,0.333333,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.5,0.25,0.5,0.333333,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.5,0.25,0.5,0.333333,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"


In [33]:
dfB_SL_ROS.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.5,0.25,0.5,0.333333


### SL after SMOTE

In [34]:
tempB_SL_SMOTE = []

y_train = df3_B_SMOTE[['CGPA']]
X_train = df3_B_SMOTE[['Relationship Status', 'Scholarship', 'PTPTN', 'Skipping Class', 'Additional Course',
                    'Study Behavior', 'Study Time', 'Extra Curricular Activities', 'Part-time Job', 'Commute Time',
                    'Gaming', 'Alcohol Consumption', 'I have enough free time after class', 'Social Interaction',
                    'I hang out with friends']]

y_test = merge_B_SMOTE[['CGPA']]
X_test = merge_B_SMOTE[['Relationship Status', 'Scholarship', 'PTPTN', 'Skipping Class', 'Additional Course',
                    'Study Behavior', 'Study Time', 'Extra Curricular Activities', 'Part-time Job', 'Commute Time',
                    'Gaming', 'Alcohol Consumption', 'I have enough free time after class', 'Social Interaction',
                    'I hang out with friends']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    tempB_SL_SMOTE.append([100,j,acc,pre,recall,f1,param])
        
dfB_SL_SMOTE = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=tempB_SL_SMOTE)
dfB_SL_SMOTE

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.499213,0.249803,0.499213,0.332983,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.499213,0.249803,0.499213,0.332983,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.499213,0.249803,0.499213,0.332983,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"


In [35]:
dfB_SL_SMOTE.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.499213,0.249803,0.499213,0.332983


## All Attributes

### AA before Data Balancing

In [36]:
tempB_imb = []

y_train = df3_B_imb[['CGPA']]
X_train = df3_B_imb.drop(columns=['CGPA'])

y_test = merge_B_imb[['CGPA']]
X_test = merge_B_imb.drop(columns=['CGPA'])

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    tempB_imb.append([100,j,acc,pre,recall,f1,param])
        
dfB_imb = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=tempB_imb)
dfB_imb

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.986791,0.973756,0.986791,0.98023,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.986791,0.973756,0.986791,0.98023,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.986791,0.973756,0.986791,0.98023,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"


In [37]:
dfB_imb.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.986791,0.973756,0.986791,0.98023


### AA after RandomOverSampler

In [38]:
tempB_ROS = []

y_train = df3_B_ROS[['CGPA']]
X_train = df3_B_ROS.drop(columns=['CGPA'])

y_test = merge_B_ROS[['CGPA']]
X_test = merge_B_ROS.drop(columns=['CGPA'])

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    tempB_ROS.append([100,j,acc,pre,recall,f1,param])
        
dfB_ROS = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=tempB_ROS)
dfB_ROS

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.5,0.25,0.5,0.333333,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.5,0.25,0.5,0.333333,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.5,0.25,0.5,0.333333,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"


In [39]:
dfB_ROS.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.5,0.25,0.5,0.333333


### AA after SMOTE

In [40]:
tempB_SMOTE = []

y_train = df3_B_SMOTE[['CGPA']]
X_train = df3_B_SMOTE.drop(columns=['CGPA'])

y_test = merge_B_SMOTE[['CGPA']]
X_test = merge_B_SMOTE.drop(columns=['CGPA'])

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    tempB_SMOTE.append([100,j,acc,pre,recall,f1,param])
        
dfB_SMOTE = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=tempB_SMOTE)
dfB_SMOTE

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.5,0.25,0.5,0.333333,"{'C': 0.1, 'gamma': 0.1, 'kernel': 'rbf'}"
1,100,69,0.5,0.25,0.5,0.333333,"{'C': 0.1, 'gamma': 0.1, 'kernel': 'rbf'}"
2,100,101,0.5,0.25,0.5,0.333333,"{'C': 0.1, 'gamma': 0.1, 'kernel': 'rbf'}"


In [41]:
dfB_SMOTE.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.5,0.25,0.5,0.333333


# 5-Level Classification

## Student Background

### SB before Data Balancing

In [42]:
temp5L_SB_imb = []

y_train = df3_5L_imb[['CGPA']]
X_train = df3_5L_imb[['Age','Gender','Nationality', 'Living Status', 'Home Town', 'Faculty', 'Education Level',
                     'Years of Study', 'Completed Credit Hours']]

y_test = merge_5L_imb[['CGPA']]
X_test = merge_5L_imb[['Age','Gender','Nationality', 'Living Status', 'Home Town', 'Faculty', 'Education Level',
                     'Years of Study', 'Completed Credit Hours']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    temp5L_SB_imb.append([100,j,acc,pre,recall,f1,param])
        
df5L_SB_imb = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=temp5L_SB_imb)
df5L_SB_imb

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.469308,0.317056,0.469308,0.342438,"{'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}"
1,100,69,0.469308,0.317056,0.469308,0.342438,"{'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}"
2,100,101,0.469308,0.317056,0.469308,0.342438,"{'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}"


In [43]:
df5L_SB_imb.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.469308,0.317056,0.469308,0.342438


### SB after RandomOverSampler

In [44]:
temp5L_SB_ROS = []

y_train = df3_5L_ROS[['CGPA']]
X_train = df3_5L_ROS[['Age','Gender','Nationality', 'Living Status', 'Home Town', 'Faculty', 'Education Level',
                     'Years of Study', 'Completed Credit Hours']]

y_test = merge_5L_ROS[['CGPA']]
X_test = merge_5L_ROS[['Age','Gender','Nationality', 'Living Status', 'Home Town', 'Faculty', 'Education Level',
                     'Years of Study', 'Completed Credit Hours']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    temp5L_SB_ROS.append([100,j,acc,pre,recall,f1,param])
        
df5L_SB_ROS = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=temp5L_SB_ROS)
df5L_SB_ROS

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.201621,0.159021,0.201621,0.149114,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.201621,0.159021,0.201621,0.149114,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.201621,0.159021,0.201621,0.149114,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"


In [45]:
df5L_SB_ROS.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.201621,0.159021,0.201621,0.149114


### SB after SMOTE

In [46]:
temp5L_SB_SMOTE = []

y_train = df3_5L_SMOTE[['CGPA']]
X_train = df3_5L_SMOTE[['Age','Gender','Nationality', 'Living Status', 'Home Town', 'Faculty', 'Education Level',
                     'Years of Study', 'Completed Credit Hours']]

y_test = merge_5L_SMOTE[['CGPA']]
X_test = merge_5L_SMOTE[['Age','Gender','Nationality', 'Living Status', 'Home Town', 'Faculty', 'Education Level',
                     'Years of Study', 'Completed Credit Hours']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    temp5L_SB_SMOTE.append([100,j,acc,pre,recall,f1,param])
        
df5L_SB_SMOTE = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=temp5L_SB_SMOTE)
df5L_SB_SMOTE

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.270989,0.281939,0.270989,0.254415,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.270989,0.281939,0.270989,0.254415,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.270989,0.281939,0.270989,0.254415,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"


In [47]:
df5L_SB_SMOTE.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.270989,0.281939,0.270989,0.254415


## Student History Grades

### SHG before Data Balancing

In [48]:
temp5L_SHG_imb = []

y_train = df3_5L_imb[['CGPA']]
X_train = df3_5L_imb[['Number of Subjects Failed','SPM Result (A)','SPM Result (B)','SPM Result (C)']]

y_test = merge_5L_imb[['CGPA']]
X_test = merge_5L_imb[['Number of Subjects Failed','SPM Result (A)','SPM Result (B)','SPM Result (C)']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    temp5L_SHG_imb.append([100,j,acc,pre,recall,f1,param])
        
df5L_SHG_imb = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=temp5L_SHG_imb)
df5L_SHG_imb

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.559441,0.51214,0.559441,0.487671,"{'C': 100, 'gamma': 0.01, 'kernel': 'rbf'}"
1,100,69,0.559441,0.51214,0.559441,0.487671,"{'C': 100, 'gamma': 0.01, 'kernel': 'rbf'}"
2,100,101,0.559441,0.51214,0.559441,0.487671,"{'C': 100, 'gamma': 0.01, 'kernel': 'rbf'}"


In [49]:
df5L_SHG_imb.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.559441,0.51214,0.559441,0.487671


### SHG after RandomOverSampler

In [50]:
temp5L_SHG_ROS = []

y_train = df3_5L_ROS[['CGPA']]
X_train = df3_5L_ROS[['Number of Subjects Failed','SPM Result (A)','SPM Result (B)','SPM Result (C)']]

y_test = merge_5L_ROS[['CGPA']]
X_test = merge_5L_ROS[['Number of Subjects Failed','SPM Result (A)','SPM Result (B)','SPM Result (C)']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    temp5L_SHG_ROS.append([100,j,acc,pre,recall,f1,param])
        
df5L_SHG_ROS = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=temp5L_SHG_ROS)
df5L_SHG_ROS

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.276499,0.222966,0.276499,0.241843,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.276499,0.222966,0.276499,0.241843,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.276499,0.222966,0.276499,0.241843,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"


In [51]:
df5L_SHG_ROS.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.276499,0.222966,0.276499,0.241843


### SHG after SMOTE

In [52]:
temp5L_SHG_SMOTE = []

y_train = df3_5L_SMOTE[['CGPA']]
X_train = df3_5L_SMOTE[['Number of Subjects Failed','SPM Result (A)','SPM Result (B)','SPM Result (C)']]

y_test = merge_5L_SMOTE[['CGPA']]
X_test = merge_5L_SMOTE[['Number of Subjects Failed','SPM Result (A)','SPM Result (B)','SPM Result (C)']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    temp5L_SHG_SMOTE.append([100,j,acc,pre,recall,f1,param])
        
df5L_SHG_SMOTE = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=temp5L_SHG_SMOTE)
df5L_SHG_SMOTE

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.354295,0.350413,0.354295,0.349467,"{'C': 100, 'gamma': 0.1, 'kernel': 'rbf'}"
1,100,69,0.354295,0.350413,0.354295,0.349467,"{'C': 100, 'gamma': 0.1, 'kernel': 'rbf'}"
2,100,101,0.354295,0.350413,0.354295,0.349467,"{'C': 100, 'gamma': 0.1, 'kernel': 'rbf'}"


In [53]:
df5L_SHG_SMOTE.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.354295,0.350413,0.354295,0.349467


## Student Opinion Towards MMU Environment

### SOTME before Data Balancing

In [54]:
temp5L_SOTME_imb = []

y_train = df3_5L_imb[['CGPA']]
X_train = df3_5L_imb[['Reason Study at Multimedia University', 'Lecture Class Capacity','Tutorial Class Capacity', 
                     'Classroom Facilities', 'Campus Environment']]

y_test = merge_5L_imb[['CGPA']]
X_test = merge_5L_imb[['Reason Study at Multimedia University', 'Lecture Class Capacity','Tutorial Class Capacity', 
                     'Classroom Facilities', 'Campus Environment']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    temp5L_SOTME_imb.append([100,j,acc,pre,recall,f1,param])
        
df5L_SOTME_imb = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=temp5L_SOTME_imb)
df5L_SOTME_imb

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.479409,0.229833,0.479409,0.31071,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.479409,0.229833,0.479409,0.31071,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.479409,0.229833,0.479409,0.31071,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"


In [55]:
df5L_SOTME_imb.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.479409,0.229833,0.479409,0.31071


### SOTME after RandomOverSampler

In [56]:
temp5L_SOTME_ROS = []

y_train = df3_5L_ROS[['CGPA']]
X_train = df3_5L_ROS[['Reason Study at Multimedia University', 'Lecture Class Capacity','Tutorial Class Capacity', 
                     'Classroom Facilities', 'Campus Environment']]

y_test = merge_5L_ROS[['CGPA']]
X_test = merge_5L_ROS[['Reason Study at Multimedia University', 'Lecture Class Capacity','Tutorial Class Capacity', 
                     'Classroom Facilities', 'Campus Environment']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    temp5L_SOTME_ROS.append([100,j,acc,pre,recall,f1,param])
        
df5L_SOTME_ROS = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=temp5L_SOTME_ROS)
df5L_SOTME_ROS

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.198703,0.039935,0.198703,0.066504,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.198703,0.039935,0.198703,0.066504,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.198703,0.039935,0.198703,0.066504,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"


In [57]:
df5L_SOTME_ROS.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.198703,0.039935,0.198703,0.066504


### SOTME after SMOTE

In [58]:
temp5L_SOTME_SMOTE = []

y_train = df3_5L_SMOTE[['CGPA']]
X_train = df3_5L_SMOTE[['Reason Study at Multimedia University', 'Lecture Class Capacity','Tutorial Class Capacity', 
                     'Classroom Facilities', 'Campus Environment']]

y_test = merge_5L_SMOTE[['CGPA']]
X_test = merge_5L_SMOTE[['Reason Study at Multimedia University', 'Lecture Class Capacity','Tutorial Class Capacity', 
                     'Classroom Facilities', 'Campus Environment']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    temp5L_SOTME_SMOTE.append([100,j,acc,pre,recall,f1,param])
        
df5L_SOTME_SMOTE = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=temp5L_SOTME_SMOTE)
df5L_SOTME_SMOTE

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.199352,0.039909,0.199352,0.066504,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.199352,0.039909,0.199352,0.066504,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.199352,0.039909,0.199352,0.066504,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"


In [59]:
df5L_SOTME_SMOTE.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.199352,0.039909,0.199352,0.066504


## Student Family Background

### SFB before Data Balancing

In [60]:
df3_5L_imb.rename(columns={"Parent's Education Level [Father]": "Parent's Education Level.Father"}, inplace=True)
df3_5L_imb.rename(columns={"Parent's Education Level [Mother]": "Parent's Education Level.Mother"}, inplace=True)
df3_5L_ROS.rename(columns={"Parent's Education Level [Father]": "Parent's Education Level.Father"}, inplace=True)
df3_5L_ROS.rename(columns={"Parent's Education Level [Mother]": "Parent's Education Level.Mother"}, inplace=True)
df3_5L_SMOTE.rename(columns={"Parent's Education Level [Father]": "Parent's Education Level.Father"}, inplace=True)
df3_5L_SMOTE.rename(columns={"Parent's Education Level [Mother]": "Parent's Education Level.Mother"}, inplace=True)

In [61]:
temp5L_SFB_imb = []

y_train = df3_5L_imb[['CGPA']]
X_train = df3_5L_imb[['Family Size', "Parent's Education Level.Father", "Parent's Education Level.Mother", 
                     "Father's Occupation", "Mother's Occupation", 'Financial Difficulties', 'Financial Support']]

y_test = merge_5L_imb[['CGPA']]
X_test = merge_5L_imb[['Family Size', "Parent's Education Level.Father", "Parent's Education Level.Mother", 
                     "Father's Occupation", "Mother's Occupation", 'Financial Difficulties', 'Financial Support']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    temp5L_SFB_imb.append([100,j,acc,pre,recall,f1,param])
        
df5L_SFB_imb = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=temp5L_SFB_imb)
df5L_SFB_imb

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.479409,0.229833,0.479409,0.31071,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.479409,0.229833,0.479409,0.31071,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.479409,0.229833,0.479409,0.31071,"{'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}"


In [62]:
df5L_SFB_imb.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.479409,0.229833,0.479409,0.31071


### SFB after RandomOverSampler

In [63]:
temp5L_SFB_ROS = []

y_train = df3_5L_ROS[['CGPA']]
X_train = df3_5L_ROS[['Family Size', "Parent's Education Level.Father", "Parent's Education Level.Mother", 
                     "Father's Occupation", "Mother's Occupation", 'Financial Difficulties', 'Financial Support']]

y_test = merge_5L_ROS[['CGPA']]
X_test = merge_5L_ROS[['Family Size', "Parent's Education Level.Father", "Parent's Education Level.Mother", 
                     "Father's Occupation", "Mother's Occupation", 'Financial Difficulties', 'Financial Support']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    temp5L_SFB_ROS.append([100,j,acc,pre,recall,f1,param])
        
df5L_SFB_ROS = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=temp5L_SFB_ROS)
df5L_SFB_ROS

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.2,0.040039,0.2,0.066721,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.2,0.040039,0.2,0.066721,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.2,0.040039,0.2,0.066721,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"


In [64]:
df5L_SFB_ROS.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.2,0.040039,0.2,0.066721


### SFB after SMOTE

In [65]:
temp5L_SFB_SMOTE = []

y_train = df3_5L_SMOTE[['CGPA']]
X_train = df3_5L_SMOTE[['Family Size', "Parent's Education Level.Father", "Parent's Education Level.Mother", 
                     "Father's Occupation", "Mother's Occupation", 'Financial Difficulties', 'Financial Support']]

y_test = merge_5L_SMOTE[['CGPA']]
X_test = merge_5L_SMOTE[['Family Size', "Parent's Education Level.Father", "Parent's Education Level.Mother", 
                     "Father's Occupation", "Mother's Occupation", 'Financial Difficulties', 'Financial Support']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    temp5L_SFB_SMOTE.append([100,j,acc,pre,recall,f1,param])
        
df5L_SFB_SMOTE = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=temp5L_SFB_SMOTE)
df5L_SFB_SMOTE

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.200324,0.240026,0.200324,0.06735,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.200324,0.240026,0.200324,0.06735,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.200324,0.240026,0.200324,0.06735,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"


In [66]:
df5L_SFB_SMOTE.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.200324,0.240026,0.200324,0.06735


## Student Lifestyle

### SL before Data Balancing

In [67]:
df3_5L_imb.rename(columns={"Dating": "Relationship Status"}, inplace=True)
df3_5L_imb.rename(columns={"Study Behaviour": "Study Behavior"}, inplace=True)
df3_5L_imb.rename(columns={"I have enough free time after class.": "I have enough free time after class"}, inplace=True)
df3_5L_imb.rename(columns={"I hang out with friends.": "I hang out with friends"}, inplace=True)

df3_5L_ROS.rename(columns={"Dating": "Relationship Status"}, inplace=True)
df3_5L_ROS.rename(columns={"Study Behaviour": "Study Behavior"}, inplace=True)
df3_5L_ROS.rename(columns={"I have enough free time after class.": "I have enough free time after class"}, inplace=True)
df3_5L_ROS.rename(columns={"I hang out with friends.": "I hang out with friends"}, inplace=True)

df3_5L_SMOTE.rename(columns={"Dating": "Relationship Status"}, inplace=True)
df3_5L_SMOTE.rename(columns={"Study Behaviour": "Study Behavior"}, inplace=True)
df3_5L_SMOTE.rename(columns={"I have enough free time after class.": "I have enough free time after class"}, inplace=True)
df3_5L_SMOTE.rename(columns={"I hang out with friends.": "I hang out with friends"}, inplace=True)

In [68]:
temp5L_SL_imb = []

y_train = df3_5L_imb[['CGPA']]
X_train = df3_5L_imb[['Relationship Status', 'Scholarship', 'PTPTN', 'Skipping Class', 'Additional Course',
                    'Study Behavior', 'Study Time', 'Extra Curricular Activities', 'Part-time Job', 'Commute Time',
                    'Gaming', 'Alcohol Consumption', 'I have enough free time after class', 'Social Interaction',
                    'I hang out with friends']]


y_test = merge_5L_imb[['CGPA']]
X_test = merge_5L_imb[['Relationship Status', 'Scholarship', 'PTPTN', 'Skipping Class', 'Additional Course',
                    'Study Behavior', 'Study Time', 'Extra Curricular Activities', 'Part-time Job', 'Commute Time',
                    'Gaming', 'Alcohol Consumption', 'I have enough free time after class', 'Social Interaction',
                    'I hang out with friends']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    temp5L_SL_imb.append([100,j,acc,pre,recall,f1,param])
        
df5L_SL_imb = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=temp5L_SL_imb)
df5L_SL_imb

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.461538,0.340993,0.461538,0.382236,"{'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}"
1,100,69,0.461538,0.340993,0.461538,0.382236,"{'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}"
2,100,101,0.461538,0.340993,0.461538,0.382236,"{'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}"


In [69]:
df5L_SL_imb.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.461538,0.340993,0.461538,0.382236


### SL after RandomOverSampler

In [70]:
temp5L_SL_ROS = []

y_train = df3_5L_ROS[['CGPA']]
X_train = df3_5L_ROS[['Relationship Status', 'Scholarship', 'PTPTN', 'Skipping Class', 'Additional Course',
                    'Study Behavior', 'Study Time', 'Extra Curricular Activities', 'Part-time Job', 'Commute Time',
                    'Gaming', 'Alcohol Consumption', 'I have enough free time after class', 'Social Interaction',
                    'I hang out with friends']]


y_test = merge_5L_ROS[['CGPA']]
X_test = merge_5L_ROS[['Relationship Status', 'Scholarship', 'PTPTN', 'Skipping Class', 'Additional Course',
                    'Study Behavior', 'Study Time', 'Extra Curricular Activities', 'Part-time Job', 'Commute Time',
                    'Gaming', 'Alcohol Consumption', 'I have enough free time after class', 'Social Interaction',
                    'I hang out with friends']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    temp5L_SL_ROS.append([100,j,acc,pre,recall,f1,param])
        
df5L_SL_ROS = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=temp5L_SL_ROS)
df5L_SL_ROS

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.199028,0.088247,0.199028,0.077457,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.199028,0.088247,0.199028,0.077457,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.199028,0.088247,0.199028,0.077457,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"


In [71]:
df5L_SL_ROS.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.199028,0.088247,0.199028,0.077457


### SL after SMOTE

In [72]:
temp5L_SL_SMOTE = []

y_train = df3_5L_SMOTE[['CGPA']]
X_train = df3_5L_SMOTE[['Relationship Status', 'Scholarship', 'PTPTN', 'Skipping Class', 'Additional Course',
                    'Study Behavior', 'Study Time', 'Extra Curricular Activities', 'Part-time Job', 'Commute Time',
                    'Gaming', 'Alcohol Consumption', 'I have enough free time after class', 'Social Interaction',
                    'I hang out with friends']]


y_test = merge_5L_SMOTE[['CGPA']]
X_test = merge_5L_SMOTE[['Relationship Status', 'Scholarship', 'PTPTN', 'Skipping Class', 'Additional Course',
                    'Study Behavior', 'Study Time', 'Extra Curricular Activities', 'Part-time Job', 'Commute Time',
                    'Gaming', 'Alcohol Consumption', 'I have enough free time after class', 'Social Interaction',
                    'I hang out with friends']]

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    temp5L_SL_SMOTE.append([100,j,acc,pre,recall,f1,param])
        
df5L_SL_SMOTE = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=temp5L_SL_SMOTE)
df5L_SL_SMOTE

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.234684,0.177178,0.234684,0.176599,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.234684,0.177178,0.234684,0.176599,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.234684,0.177178,0.234684,0.176599,"{'C': 10, 'gamma': 1, 'kernel': 'rbf'}"


In [73]:
df5L_SL_SMOTE.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.234684,0.177178,0.234684,0.176599


## All Attributes

### AA before Data Balancing

In [74]:
temp5L_imb = []

y_train = df3_5L_imb[['CGPA']]
X_train = df3_5L_imb.drop(columns=["CGPA"])

y_test = merge_5L_imb[['CGPA']]
X_test = merge_5L_imb.drop(columns=["CGPA"])

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    temp5L_imb.append([100,j,acc,pre,recall,f1,param])
        
df5L_imb = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=temp5L_imb)
df5L_imb

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.048174,0.377208,0.048174,0.062327,"{'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}"
1,100,69,0.048174,0.377208,0.048174,0.062327,"{'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}"
2,100,101,0.048174,0.377208,0.048174,0.062327,"{'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}"


In [75]:
df5L_imb.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.048174,0.377208,0.048174,0.062327


### AA after RandomOverSampler

In [76]:
temp5L_ROS = []

y_train = df3_5L_ROS[['CGPA']]
X_train = df3_5L_ROS.drop(columns=["CGPA"])

y_test = merge_5L_ROS[['CGPA']]
X_test = merge_5L_ROS.drop(columns=["CGPA"])

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    temp5L_ROS.append([100,j,acc,pre,recall,f1,param])
        
df5L_ROS = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=temp5L_ROS)
df5L_ROS

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.2,0.04,0.2,0.066667,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"
1,100,69,0.2,0.04,0.2,0.066667,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"
2,100,101,0.2,0.04,0.2,0.066667,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}"


In [77]:
df5L_ROS.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.2,0.04,0.2,0.066667


### AA after SMOTE

In [78]:
temp5L_SMOTE = []

y_train = df3_5L_SMOTE[['CGPA']]
X_train = df3_5L_SMOTE.drop(columns=["CGPA"])

y_test = merge_5L_SMOTE[['CGPA']]
X_test = merge_5L_SMOTE.drop(columns=["CGPA"])

for j in rand_states:
    param_grid = {'C': [0.1, 1, 10, 100],
                  'gamma': [1, 0.1, 0.01, 0.001],
                  'kernel': ['rbf','sigmoid']}

    model = GridSearchCV(SVC(random_state=0), param_grid, refit=True, n_jobs=-1, cv=10, verbose=1)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test,pred)
    pre = precision_score(y_test, pred, average='weighted')
    recall = recall_score(y_test, pred, average='weighted')
    f1 = f1_score(y_test, pred, average='weighted')
    param = model.best_params_

    temp5L_SMOTE.append([100,j,acc,pre,recall,f1,param])
        
df5L_SMOTE = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score','Best Param'],data=temp5L_SMOTE)
df5L_SMOTE

Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Fitting 10 folds for each of 32 candidates, totalling 320 fits


  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score,Best Param
0,100,7,0.2,0.04,0.2,0.066667,"{'C': 10, 'gamma': 0.1, 'kernel': 'rbf'}"
1,100,69,0.2,0.04,0.2,0.066667,"{'C': 10, 'gamma': 0.1, 'kernel': 'rbf'}"
2,100,101,0.2,0.04,0.2,0.066667,"{'C': 10, 'gamma': 0.1, 'kernel': 'rbf'}"


In [79]:
df5L_SMOTE.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,0.2,0.04,0.2,0.066667
