# Libraries

In [15]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler

from sklearn.model_selection import train_test_split

# EDA

In [16]:
df = pd.read_csv(r'C:\Users\Hibatullah\CA\DL\StudentsPerformance.csv')
df.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


In [17]:
df['avrage_score'] = (df['math score']+ df['reading score'] + df['writing score']) /3
df ['final score'] = df['avrage_score'].apply(lambda x:1 if x >=50 else 0)

df.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score,avrage_score,final score
0,female,group B,bachelor's degree,standard,none,72,72,74,72.666667,1
1,female,group C,some college,standard,completed,69,90,88,82.333333,1
2,female,group B,master's degree,standard,none,90,95,93,92.666667,1
3,male,group A,associate's degree,free/reduced,none,47,57,44,49.333333,0
4,male,group C,some college,standard,none,76,78,75,76.333333,1


In [18]:
df.columns

Index(['gender', 'race/ethnicity', 'parental level of education', 'lunch',
       'test preparation course', 'math score', 'reading score',
       'writing score', 'avrage_score', 'final score'],
      dtype='object')

In [19]:
cat_cols = ['gender', 'race/ethnicity', 'parental level of education', 'lunch', 'test preparation course' ]
num_col = ['math score', 'reading score', 'writing score']

selected_col = cat_cols + num_col

X = df.drop(['final score','avrage_score' ], axis=1, errors='ignore')
y = df['final score']

In [20]:
print("orginal data shape:", X.shape)
print("categorical columns:", cat_cols)
print("numerical columns:", num_col)
print("features:", selected_col)

orginal data shape: (1000, 8)
categorical columns: ['gender', 'race/ethnicity', 'parental level of education', 'lunch', 'test preparation course']
numerical columns: ['math score', 'reading score', 'writing score']
features: ['gender', 'race/ethnicity', 'parental level of education', 'lunch', 'test preparation course', 'math score', 'reading score', 'writing score']


In [21]:
X_encode = pd.get_dummies(X, columns=cat_cols, prefix=cat_cols, drop_first=False)
print("\nAfter one-hot encoding shape:", X_encode.shape)
print("Feature columns after encoding:", list(X_encode.columns))


After one-hot encoding shape: (1000, 20)
Feature columns after encoding: ['math score', 'reading score', 'writing score', 'gender_female', 'gender_male', 'race/ethnicity_group A', 'race/ethnicity_group B', 'race/ethnicity_group C', 'race/ethnicity_group D', 'race/ethnicity_group E', "parental level of education_associate's degree", "parental level of education_bachelor's degree", 'parental level of education_high school', "parental level of education_master's degree", 'parental level of education_some college', 'parental level of education_some high school', 'lunch_free/reduced', 'lunch_standard', 'test preparation course_completed', 'test preparation course_none']


In [25]:
scaler = StandardScaler()
X_encode[num_col] = scaler.fit_transform(X_encode[num_col])
print("\nNumerical features normalized")
print("Final preprocessed data shape:", X_encode.shape)


Numerical features normalized
Final preprocessed data shape: (1000, 20)


In [28]:
X_train, X_test, y_train, y_test = train_test_split(X_encode, y, test_size=0.2, random_state=42, stratify=y)
print(f"\nTrain set shape: {X_train.shape}")
print(f"Test set shape: {X_test.shape}")



Train set shape: (800, 20)
Test set shape: (200, 20)


# Day 1 practice in DL

## Basic of NN with scikit- learn

In [29]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split

In [30]:
def plot_decision_boundary(clf, X, y, ax=None, title=None):
    h = .02
    x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
    y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    if ax is None:
        ax = plt.gca()
    ax.contourf(xx, yy, Z, cmap=plt.cm.bwr, alpha=0.3)
    ax.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.bwr, edgecolor='k')
    ax.set_xlabel('Feature 0')
    ax.set_ylabel('Feature 1')
    if title:
        ax.set_title(title)

Single-layer networks:

[10] → 1 hidden layer with 10 neurons (simple architecture)

[50] → 1 hidden layer with 50 neurons (moderate complexity)

[100] → 1 hidden layer with 100 neurons (deeper representation)

Multi-layer networks:

[(]50, 30] → 2 hidden layers with 50 and 30 neurons respectively

[100, 50, 25] → 3 hidden layers with decreasing neuron counts (useful for feature refinement)

[200, 100, 50] → A deep network with gradually decreasing layer sizes

Deeper setups:

[500, 250, 100]

[10, 10, 10, 10, 10]

[1024, 512, 256, 128, 64] → Used for large-scale problems with high-dimensional data