# **L2 Regularization - CS Students Performance**

In [202]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
import tensorflow
import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder, MinMaxScaler
from sklearn.impute import SimpleImputer, KNNImputer
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.compose import ColumnTransformer
from tensorflow.keras import Sequential
from tensorflow.keras.regularizers import l1, l2, l1_l2
from tensorflow.keras.layers import Dense, Dropout 
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import r2_score

In [203]:
df = pd.read_csv('/kaggle/input/student-performance-predictions/student_performance.csv')
df.head()

Unnamed: 0,StudentID,Name,Gender,AttendanceRate,StudyHoursPerWeek,PreviousGrade,ExtracurricularActivities,ParentalSupport,FinalGrade
0,1,John,Male,85,15,78,1,High,80
1,2,Sarah,Female,90,20,85,2,Medium,87
2,3,Alex,Male,78,10,65,0,Low,68
3,4,Michael,Male,92,25,90,3,High,92
4,5,Emma,Female,88,18,82,2,Medium,85


In [204]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 9 columns):
 #   Column                     Non-Null Count  Dtype 
---  ------                     --------------  ----- 
 0   StudentID                  10 non-null     int64 
 1   Name                       10 non-null     object
 2   Gender                     10 non-null     object
 3   AttendanceRate             10 non-null     int64 
 4   StudyHoursPerWeek          10 non-null     int64 
 5   PreviousGrade              10 non-null     int64 
 6   ExtracurricularActivities  10 non-null     int64 
 7   ParentalSupport            10 non-null     object
 8   FinalGrade                 10 non-null     int64 
dtypes: int64(6), object(3)
memory usage: 848.0+ bytes


In [205]:
df.shape

(10, 9)

In [206]:
df.columns

Index(['StudentID', 'Name', 'Gender', 'AttendanceRate', 'StudyHoursPerWeek',
       'PreviousGrade', 'ExtracurricularActivities', 'ParentalSupport',
       'FinalGrade'],
      dtype='object')

In [207]:
df.isnull().sum()

StudentID                    0
Name                         0
Gender                       0
AttendanceRate               0
StudyHoursPerWeek            0
PreviousGrade                0
ExtracurricularActivities    0
ParentalSupport              0
FinalGrade                   0
dtype: int64

In [208]:
df.duplicated().sum()

0

In [209]:
df.describe(include='all')

Unnamed: 0,StudentID,Name,Gender,AttendanceRate,StudyHoursPerWeek,PreviousGrade,ExtracurricularActivities,ParentalSupport,FinalGrade
count,10.0,10,10,10.0,10.0,10.0,10.0,10,10.0
unique,,10,2,,,,,3,
top,,John,Male,,,,,High,
freq,,1,5,,,,,4,
mean,5.5,,,85.6,17.7,78.1,1.5,,80.2
std,3.02765,,,7.441625,6.848357,10.170218,1.080123,,10.097304
min,1.0,,,70.0,8.0,60.0,0.0,,62.0
25%,3.25,,,82.75,12.75,71.75,1.0,,73.5
50%,5.5,,,86.5,17.5,80.0,1.5,,82.5
75%,7.75,,,90.75,21.5,85.75,2.0,,87.75


In [210]:
X_train, X_test, y_train, y_test = train_test_split(df.drop(columns=['StudentID', 'Name', 'FinalGrade']),
                                                    df['FinalGrade'],
                                                    test_size=0.2,
                                                    random_state=42)

In [211]:
X_test.shape

(2, 6)

In [212]:
y_test.shape

(2,)

In [213]:
numerical_columns = [1, 2, 3, 4]
categorical_columns = [0, 5]

In [214]:
handle_numerical = Pipeline(steps=[
    ('impute', KNNImputer(n_neighbors=11)),
    ('scale', MinMaxScaler())
])

In [215]:
handle_categorical = Pipeline(steps=[
    ('impute', SimpleImputer(strategy='most_frequent')),
    ('encode', OneHotEncoder(drop='first', handle_unknown='ignore'))
])

In [216]:
preprocessing = ColumnTransformer(transformers=[
    ('numerical', handle_numerical, numerical_columns),
    ('categorical', handle_categorical, categorical_columns)
])

In [217]:
model = Sequential()

In [218]:
model.add(Dense(X_train.shape[0], activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dense(X_train.shape[0], activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dense(X_train.shape[0], activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dense(X_train.shape[0], activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dense(X_train.shape[0], activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dense(X_train.shape[0], activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dense(X_train.shape[0], activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dense(X_train.shape[0], activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dense(X_train.shape[0], activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.3))
model.add(Dense(X_train.shape[0], activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.3))
model.add(Dense(X_train.shape[0], activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.3))
model.add(Dense(X_train.shape[0], activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.3))
model.add(Dense(X_train.shape[0], activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.3))
model.add(Dense(X_train.shape[0], activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.3))

model.add(Dense(5, activation='relu', kernel_regularizer=l2(0.01)))

In [219]:
model.compile(optimizer='Adam', loss='mean_squared_error', metrics=['mean_absolute_error'])

In [220]:
X_train = preprocessing.fit_transform(X_train)
X_test = preprocessing.transform(X_test)

In [221]:
history = model.fit(X_train, y_train, epochs=100, batch_size=300, validation_split=0.2)

Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7s/step - loss: 6697.2891 - mean_absolute_error: 81.4996 - val_loss: 6154.8623 - val_mean_absolute_error: 76.9979
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - loss: 6696.9741 - mean_absolute_error: 81.4977 - val_loss: 6154.5762 - val_mean_absolute_error: 76.9961
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step - loss: 6696.6733 - mean_absolute_error: 81.4959 - val_loss: 6154.2607 - val_mean_absolute_error: 76.9941
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step - loss: 6696.2808 - mean_absolute_error: 81.4934 - val_loss: 6153.9634 - val_mean_absolute_error: 76.9921
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - loss: 6694.8228 - mean_absolute_error: 81.4844 - val_loss: 6153.6392 - val_mean_absolute_error: 76.9900
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0

In [222]:
y_pred = model.predict(X_test)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 149ms/step
