# **L1 Regularization - CS Students Performance**

In [118]:
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
import tensorflow
import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder, MinMaxScaler
from sklearn.impute import SimpleImputer, KNNImputer
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.compose import ColumnTransformer
from tensorflow.keras import Sequential
from tensorflow.keras.regularizers import l1, l2, l1_l2
from tensorflow.keras.layers import Dense, Dropout 
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import accuracy_score, classification_report

In [119]:
df = pd.read_csv('Student_performance_data _.csv')
df.head()

Unnamed: 0,StudentID,Age,Gender,Ethnicity,ParentalEducation,StudyTimeWeekly,Absences,Tutoring,ParentalSupport,Extracurricular,Sports,Music,Volunteering,GPA,GradeClass
0,1001,17,1,0,2,19.833723,7,1,2,0,0,1,0,2.929196,2.0
1,1002,18,0,0,1,15.408756,0,0,1,0,0,0,0,3.042915,1.0
2,1003,15,0,2,3,4.21057,26,0,2,0,0,0,0,0.112602,4.0
3,1004,17,1,0,3,10.028829,14,0,3,1,0,0,0,2.054218,3.0
4,1005,17,1,0,2,4.672495,17,1,3,0,0,0,0,1.288061,4.0


In [120]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2392 entries, 0 to 2391
Data columns (total 15 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   StudentID          2392 non-null   int64  
 1   Age                2392 non-null   int64  
 2   Gender             2392 non-null   int64  
 3   Ethnicity          2392 non-null   int64  
 4   ParentalEducation  2392 non-null   int64  
 5   StudyTimeWeekly    2392 non-null   float64
 6   Absences           2392 non-null   int64  
 7   Tutoring           2392 non-null   int64  
 8   ParentalSupport    2392 non-null   int64  
 9   Extracurricular    2392 non-null   int64  
 10  Sports             2392 non-null   int64  
 11  Music              2392 non-null   int64  
 12  Volunteering       2392 non-null   int64  
 13  GPA                2392 non-null   float64
 14  GradeClass         2392 non-null   float64
dtypes: float64(3), int64(12)
memory usage: 280.4 KB


In [121]:
df.shape

(2392, 15)

In [122]:
df.columns

Index(['StudentID', 'Age', 'Gender', 'Ethnicity', 'ParentalEducation',
       'StudyTimeWeekly', 'Absences', 'Tutoring', 'ParentalSupport',
       'Extracurricular', 'Sports', 'Music', 'Volunteering', 'GPA',
       'GradeClass'],
      dtype='object')

In [123]:
df.isnull().sum()

StudentID            0
Age                  0
Gender               0
Ethnicity            0
ParentalEducation    0
StudyTimeWeekly      0
Absences             0
Tutoring             0
ParentalSupport      0
Extracurricular      0
Sports               0
Music                0
Volunteering         0
GPA                  0
GradeClass           0
dtype: int64

In [124]:
df.duplicated().sum()

0

In [125]:
df.describe(include='all')

Unnamed: 0,StudentID,Age,Gender,Ethnicity,ParentalEducation,StudyTimeWeekly,Absences,Tutoring,ParentalSupport,Extracurricular,Sports,Music,Volunteering,GPA,GradeClass
count,2392.0,2392.0,2392.0,2392.0,2392.0,2392.0,2392.0,2392.0,2392.0,2392.0,2392.0,2392.0,2392.0,2392.0,2392.0
mean,2196.5,16.468645,0.51087,0.877508,1.746237,9.771992,14.541388,0.301421,2.122074,0.383361,0.303512,0.196906,0.157191,1.906186,2.983696
std,690.655244,1.123798,0.499986,1.028476,1.000411,5.652774,8.467417,0.458971,1.122813,0.486307,0.45987,0.397744,0.364057,0.915156,1.233908
min,1001.0,15.0,0.0,0.0,0.0,0.001057,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,1598.75,15.0,0.0,0.0,1.0,5.043079,7.0,0.0,1.0,0.0,0.0,0.0,0.0,1.174803,2.0
50%,2196.5,16.0,1.0,0.0,2.0,9.705363,15.0,0.0,2.0,0.0,0.0,0.0,0.0,1.893393,4.0
75%,2794.25,17.0,1.0,2.0,2.0,14.40841,22.0,1.0,3.0,1.0,1.0,0.0,0.0,2.622216,4.0
max,3392.0,18.0,1.0,3.0,4.0,19.978094,29.0,1.0,4.0,1.0,1.0,1.0,1.0,4.0,4.0


In [126]:
df['GradeClass'].value_counts()

GradeClass
4.0    1211
3.0     414
2.0     391
1.0     269
0.0     107
Name: count, dtype: int64

In [127]:
X_train, X_test, y_train, y_test = train_test_split(df.drop(columns=['StudentID', 'GradeClass']),
                                                    df['GradeClass'],
                                                    test_size=0.2,
                                                    random_state=42)

In [128]:
numerical_columns = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]

In [129]:
handle_numerical = Pipeline(steps=[
    ('impute', KNNImputer(n_neighbors=11)),
    ('scale', MinMaxScaler())
])

In [130]:
preprocessing = ColumnTransformer(transformers=[
    ('numerical', handle_numerical, numerical_columns)
])

In [131]:
y_train = to_categorical(y_train, num_classes=5)
y_test = to_categorical(y_test, num_classes=5)

In [132]:
model = Sequential()

In [133]:
model.add(Dense(X_train.shape[0], activation='relu', kernel_regularizer=l1(0.01)))
model.add(Dense(X_train.shape[0], activation='relu', kernel_regularizer=l1(0.01)))
model.add(Dense(X_train.shape[0], activation='relu', kernel_regularizer=l1(0.01)))
model.add(Dense(X_train.shape[0], activation='relu', kernel_regularizer=l1(0.01)))
model.add(Dense(X_train.shape[0], activation='relu', kernel_regularizer=l1(0.01)))
model.add(Dense(X_train.shape[0], activation='relu', kernel_regularizer=l1(0.01)))
model.add(Dense(X_train.shape[0], activation='relu', kernel_regularizer=l1(0.01)))
model.add(Dense(X_train.shape[0], activation='relu', kernel_regularizer=l1(0.01)))
model.add(Dense(X_train.shape[0], activation='relu', kernel_regularizer=l1(0.01)))
model.add(Dense(X_train.shape[0], activation='relu', kernel_regularizer=l1(0.01)))
model.add(Dense(X_train.shape[0], activation='relu', kernel_regularizer=l1(0.01)))
model.add(Dense(X_train.shape[0], activation='relu', kernel_regularizer=l1(0.01)))
model.add(Dense(X_train.shape[0], activation='relu', kernel_regularizer=l1(0.01)))
model.add(Dense(X_train.shape[0], activation='relu', kernel_regularizer=l1(0.01)))
model.add(Dropout(0.5))

model.add(Dense(5, activation='softmax', kernel_regularizer=l1(0.01)))

In [134]:
model.compile(optimizer='Adam', loss='binary_crossentropy', metrics=['accuracy'])

In [135]:
X_train = preprocessing.fit_transform(X_train)
X_test = preprocessing.transform(X_test)

In [136]:
history = model.fit(X_train, y_train, epochs=100, batch_size=300, validation_split=0.2)

Epoch 1/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 2s/step - accuracy: 0.3614 - loss: 8839.2930 - val_accuracy: 0.4700 - val_loss: 6839.9463
Epoch 2/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 2s/step - accuracy: 0.5145 - loss: 6352.6069 - val_accuracy: 0.4700 - val_loss: 4719.3857
Epoch 3/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 2s/step - accuracy: 0.5203 - loss: 4329.5503 - val_accuracy: 0.4700 - val_loss: 3041.0679
Epoch 4/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 2s/step - accuracy: 0.5192 - loss: 2743.2451 - val_accuracy: 0.4700 - val_loss: 1783.5520
Epoch 5/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 2s/step - accuracy: 0.5246 - loss: 1576.3860 - val_accuracy: 0.4700 - val_loss: 945.2807
Epoch 6/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2s/step - accuracy: 0.5344 - loss: 827.7814 - val_accuracy: 0.4700 - val_loss: 522.9537
Epoch 7/100
[1

In [137]:
y_pred = model.predict(X_test)

[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 87ms/step


In [138]:
y_pred = (y_pred > 0.5).astype(int)

In [139]:
accuracy_score(y_test, y_pred)

0.49478079331941544

In [140]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        22
           1       0.00      0.00      0.00        49
           2       0.00      0.00      0.00        85
           3       0.00      0.00      0.00        86
           4       0.49      1.00      0.66       237

   micro avg       0.49      0.49      0.49       479
   macro avg       0.10      0.20      0.13       479
weighted avg       0.24      0.49      0.33       479
 samples avg       0.49      0.49      0.49       479

