# Assignment 5 Solution

## Student Study Performance

### Importing Required Libraries

In [None]:
import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

import os
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score

import torch
import torch.nn as nn
import torch.optim as optim



### Reading Data

In [None]:
Student_Data = pd.read_csv("/kaggle/input/student-study-performance/study_performance.csv")
Student_Data

### EDA

In [None]:
Student_Data.info()

In [None]:
# Check Missing values
Student_Data.isna().sum()

In [None]:
# Check Duplicates
Student_Data.duplicated().sum()

In [None]:
# Check the number of unique values of each column
Student_Data.nunique()

In [None]:
# Check statistics of data set
Student_Data.describe()

In [None]:
print("Categories in 'gender' variable:     ",end=" " )
print(Student_Data['gender'].unique())

print("Categories in 'race_ethnicity' variable:  ",end=" ")
print(Student_Data['race_ethnicity'].unique())

print("Categories in'parental level of education' variable:",end=" " )
print(Student_Data['parental_level_of_education'].unique())

print("Categories in 'lunch' variable:     ",end=" " )
print(Student_Data['lunch'].unique())

print("Categories in 'test preparation course' variable:     ",end=" " )
print(Student_Data['test_preparation_course'].unique())

In [None]:
# define numerical & categorical columns
numeric_features = [feature for feature in Student_Data.columns if Student_Data[feature].dtype != 'O']
categorical_features = [feature for feature in Student_Data.columns if Student_Data[feature].dtype == 'O']

# print columns
print('We have {} numerical features : {}'.format(len(numeric_features), numeric_features))
print('\nWe have {} categorical features : {}'.format(len(categorical_features), categorical_features))

In [None]:
Student_Data['total score'] = Student_Data['math_score'] + Student_Data['reading_score'] + Student_Data['writing_score']
Student_Data['average'] = Student_Data['total score']/3
Student_Data.head()

In [None]:
reading_full = Student_Data[Student_Data['reading_score'] == 100]['average'].count()
writing_full = Student_Data[Student_Data['writing_score'] == 100]['average'].count()
math_full = Student_Data[Student_Data['math_score'] == 100]['average'].count()

print(f'Number of students with full marks in Maths: {math_full}')
print(f'Number of students with full marks in Writing: {writing_full}')
print(f'Number of students with full marks in Reading: {reading_full}')

In [None]:
reading_less_20 = Student_Data[Student_Data['reading_score'] <= 20]['average'].count()
writing_less_20 = Student_Data[Student_Data['writing_score'] <= 20]['average'].count()
math_less_20 = Student_Data[Student_Data['math_score'] <= 20]['average'].count()

print(f'Number of students with less than 20 marks in Maths: {math_less_20}')
print(f'Number of students with less than 20 marks in Writing: {writing_less_20}')
print(f'Number of students with less than 20 marks in Reading: {reading_less_20}')

In [None]:
sns.pairplot(Student_Data,hue = 'gender')
plt.show()

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(15, 7))
plt.subplot(121)
sns.histplot(data=Student_Data,x='average',bins=30,kde=True,color='g')
plt.subplot(122)
sns.histplot(data=Student_Data,x='average',kde=True,hue='gender')
plt.show()

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(15, 7))
plt.subplot(121)
sns.histplot(data=Student_Data,x='total score',bins=30,kde=True,color='g')
plt.subplot(122)
sns.histplot(data=Student_Data,x='total score',kde=True,hue='gender')
plt.show()

In [None]:
plt.subplots(1,3,figsize=(25,6))
plt.subplot(141)
sns.histplot(data=Student_Data,x='average',kde=True,hue='lunch')
plt.subplot(142)
sns.histplot(data=Student_Data[Student_Data.gender=='female'],x='average',kde=True,hue='lunch')
plt.subplot(143)
sns.histplot(data=Student_Data[Student_Data.gender=='male'],x='average',kde=True,hue='lunch')
plt.show()

In [None]:
plt.subplots(1,3,figsize=(25,6))
plt.subplot(141)
ax =sns.histplot(data=Student_Data,x='average',kde=True,hue='parental_level_of_education')
plt.subplot(142)
ax =sns.histplot(data=Student_Data[Student_Data.gender=='male'],x='average',kde=True,hue='parental_level_of_education')
plt.subplot(143)
ax =sns.histplot(data=Student_Data[Student_Data.gender=='female'],x='average',kde=True,hue='parental_level_of_education')
plt.show()

In [None]:
plt.subplots(1,3,figsize=(25,6))
plt.subplot(141)
ax =sns.histplot(data=Student_Data,x='average',kde=True,hue='race_ethnicity')
plt.subplot(142)
ax =sns.histplot(data=Student_Data[Student_Data.gender=='female'],x='average',kde=True,hue='race_ethnicity')
plt.subplot(143)
ax =sns.histplot(data=Student_Data[Student_Data.gender=='male'],x='average',kde=True,hue='race_ethnicity')
plt.show()

In [None]:
plt.figure(figsize=(18,8))
plt.subplot(1, 4, 1)
plt.title('MATH SCORES')
sns.violinplot(y='math_score',data=Student_Data,color='red',linewidth=3)
plt.subplot(1, 4, 2)
plt.title('READING SCORES')
sns.violinplot(y='reading_score',data=Student_Data,color='green',linewidth=3)
plt.subplot(1, 4, 3)
plt.title('WRITING SCORES')
sns.violinplot(y='writing_score',data=Student_Data,color='blue',linewidth=3)
plt.show()

In [None]:
plt.rcParams['figure.figsize'] = (30, 12)

plt.subplot(1, 5, 1)
size = Student_Data['gender'].value_counts()
labels = 'Female', 'Male'
color = ['red','green']

plt.pie(size, colors = color, labels = labels,autopct = '.%2f%%')
plt.title('Gender', fontsize = 20)
plt.axis('off')

plt.subplot(1, 5, 2)
size = Student_Data['race_ethnicity'].value_counts()
labels = 'Group C', 'Group D','Group B','Group E','Group A'
color = ['red', 'green', 'blue', 'cyan','orange']

plt.pie(size, colors = color,labels = labels,autopct = '.%2f%%')
plt.title('Race/Ethnicity', fontsize = 20)
plt.axis('off')

plt.subplot(1, 5, 3)
size = Student_Data['lunch'].value_counts()
labels = 'Standard', 'Free'
color = ['red','green']

plt.pie(size, colors = color,labels = labels,autopct = '.%2f%%')
plt.title('Lunch', fontsize = 20)
plt.axis('off')

plt.subplot(1, 5, 4)
size = Student_Data['test_preparation_course'].value_counts()
labels = 'None', 'Completed'
color = ['red','green']

plt.pie(size, colors = color,labels = labels,autopct = '.%2f%%')
plt.title('Test Course', fontsize = 20)
plt.axis('off')

plt.subplot(1, 5, 5)
size = Student_Data['parental_level_of_education'].value_counts()
labels = 'Some College', "Associate's Degree",'High School','Some High School',"Bachelor's Degree","Master's Degree"
color = ['red', 'green', 'blue', 'cyan','orange','grey']

plt.pie(size, colors = color,labels = labels,autopct = '.%2f%%')
plt.title('Parental Education', fontsize = 20)
plt.axis('off')

plt.tight_layout()
plt.grid()

plt.show()

### Data Pre-Process

In [None]:
numeric_features = Student_Data.drop('math_score', axis=1).select_dtypes(include=['int64', 'float64']).columns
categorical_features = Student_Data.drop('math_score', axis=1).select_dtypes(include=['object']).columns
X = Student_Data.drop('math_score', axis=1)
y = Student_Data['math_score']

In [None]:
num_features = X.select_dtypes(exclude="object").columns
cat_features = X.select_dtypes(include="object").columns

num_features, cat_features

### Train Test Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
numeric_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(drop='first', sparse=False)

In [None]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)])

In [None]:
X_train_processed = preprocessor.fit_transform(X_train)
X_test_processed = preprocessor.transform(X_test)

In [None]:
X_train_tensor = torch.tensor(X_train_processed, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)  # Reshape to column vector
X_test_tensor = torch.tensor(X_test_processed, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)  # Reshape to column vector


### Model Architecture

In [None]:
class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim1, hidden_dim2, output_dim):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim1)
        self.fc2 = nn.Linear(hidden_dim1, hidden_dim2)
        self.fc3 = nn.Linear(hidden_dim2, output_dim)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [None]:
input_dim = X_train_tensor.shape[1]  # Number of features
hidden_dim1 = 128
hidden_dim2 = 64
output_dim = 1  # For regression
model = MLP(input_dim, hidden_dim1, hidden_dim2, output_dim)

In [None]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

### Training Procedure

In [None]:
num_epochs = 5000
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
#     print(f'Input shape: {X_train_tensor.shape}')
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()
    if (epoch+1) % 10 == 0:
        model.eval()
        with torch.no_grad():
            y_pred = model(X_test_tensor)
            test_loss = criterion(y_pred, y_test_tensor)
        print(f'Epoch [{epoch+1}/{num_epochs}] | Train_Loss: {loss.item():.4f} | Test Loss: {test_loss.item():.4f}')

### Evaluation

In [None]:
model.eval()
with torch.no_grad():
    y_pred = model(X_test_tensor)
    test_loss = criterion(y_pred, y_test_tensor)
    print(f'Test Loss: {test_loss.item():.4f}')

## Obesity Levels

### Import Required Libraries

In [None]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline



import os
import matplotlib.pyplot as plt
from sklearn.preprocessing import PowerTransformer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder ,LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn import metrics
import plotly.graph_objects as go
import plotly.express as px


from torch.utils import data
from tqdm.notebook import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset


### Loading Data

In [None]:
Obesity_Data = pd.read_csv("/kaggle/input/obesity-levels/ObesityDataSet_raw_and_data_sinthetic.csv")
Obesity_Data

In [None]:
Obesity_Data.info()

In [None]:
Obesity_Data.isna().sum()

In [None]:
Obesity_Data.duplicated().sum()

In [None]:
Obesity_Data.nunique()

### EDA

In [None]:
Obesity_Data.describe()

In [None]:
categorical_features = Obesity_Data.select_dtypes(include="object").columns
continuous_features = Obesity_Data.select_dtypes(exclude="object").columns

In [None]:
target_count = Obesity_Data['NObeyesdad'].value_counts()
target_unique = Obesity_Data['NObeyesdad'].unique()

In [None]:
fig = px.pie(values= target_count, names = target_unique,color_discrete_sequence=px.colors.qualitative.Pastel1 ,title = "the number of people related to each type of obesity level")

fig.show()

In [None]:
df_ot = Obesity_Data[Obesity_Data["NObeyesdad"] == 'Obesity_Type_I' ]
df_ot2 = Obesity_Data[Obesity_Data["NObeyesdad"] == 'Obesity_Type_II']
df_ot3 = Obesity_Data[Obesity_Data["NObeyesdad"] == 'Obesity_Type_III']

In [None]:
df_ot_final = pd.concat([df_ot,df_ot2,df_ot3])
df_ot_final.reset_index(drop=True, inplace = True)

In [None]:
df_ow = Obesity_Data[Obesity_Data["NObeyesdad"]=='Overweight_Level_I']
df_ow2 = Obesity_Data[Obesity_Data["NObeyesdad"]=='Overweight_Level_II']

In [None]:
df_ow_final = pd.concat([df_ow,df_ow2])
df_ow_final.reset_index(drop=True, inplace = True)

In [None]:
df_n = Obesity_Data[Obesity_Data["NObeyesdad"]=='Normal_Weight']

In [None]:
df_In = Obesity_Data[Obesity_Data["NObeyesdad"]=='Insufficient_Weight']

In [None]:
data_list = [df_ot_final, df_ow_final, df_n, df_In]

data_name =["obesity_type", "over_weight_type", "normal", "Insufficient_Weight" ]

fig, axes = plt.subplots(figsize=(10,8),nrows = 2 , ncols =2)

for i in range(2):

    sns.histplot(data=data_list[i], x='Gender', hue='NObeyesdad',palette= 'turbo', ax=axes[i, 0], multiple='stack')
    axes[i,0].set_title(f'{data_name[i]} vs Gender')

    sns.histplot(data=data_list[i+2], x='Gender', hue='NObeyesdad',palette= 'turbo', ax=axes[i, 1], multiple='stack')
    axes[i,1].set_title(f'{data_name[i+2]} vs Gender')

fig.suptitle('Obesity_levels vs Gender')
plt.tight_layout()
plt.show()

In [None]:
data_list = [df_ot_final, df_ow_final, df_n, df_In]

data_name =["obesity_type", "over_weight_type", "normal", "Insufficient_Weight" ]

fig, axes = plt.subplots(figsize=(10,8),nrows = 2 , ncols =2)

for i in range(2):

    sns.histplot(data=data_list[i], x='CALC', hue='NObeyesdad',palette= 'turbo', ax=axes[i, 0], multiple='stack')
    axes[i,0].set_title(f'{data_name[i]} vs CALC')

    sns.histplot(data=data_list[i+2], x='CALC', hue='NObeyesdad',palette= 'turbo', ax=axes[i, 1], multiple='stack')
    axes[i,1].set_title(f'{data_name[i+2]} vs CALC')

fig.suptitle('Obesity_levels vs CALC')
plt.tight_layout()
plt.show()

In [None]:
data_list = [df_ot_final, df_ow_final, df_n, df_In]

data_name =["obesity_type", "over_weight_type", "normal", "Insufficient_Weight" ]

fig, axes = plt.subplots(figsize=(10,8),nrows = 2 , ncols =2)

for i in range(2):

    sns.histplot(data=data_list[i], x='FAVC', hue='NObeyesdad',palette= 'turbo' ,ax=axes[i, 0], multiple='stack')
    axes[i,0].set_title(f'{data_name[i]} vs FAVC')

    sns.histplot(data=data_list[i+2], x='FAVC', hue='NObeyesdad', palette= 'turbo',ax=axes[i, 1], multiple='stack')
    axes[i,1].set_title(f'{data_name[i+2]} vs FAVC')

fig.suptitle('Obesity_levels vs FAVC')
plt.tight_layout()
plt.show()

In [None]:
data_list = [df_ot_final, df_ow_final, df_n, df_In]

data_name =["obesity_type", "over_weight_type", "normal", "Insufficient_Weight" ]

fig, axes = plt.subplots(figsize=(10,8),nrows = 2 , ncols =2)

for i in range(2):

    sns.histplot(data=data_list[i], x='SCC', hue='NObeyesdad',palette= 'turbo', ax=axes[i, 0], multiple='stack')
    axes[i,0].set_title(f'{data_name[i]} vs SCC')

    sns.histplot(data=data_list[i+2], x='SCC', hue='NObeyesdad',palette = "turbo" ,ax=axes[i, 1], multiple='stack')
    axes[i,1].set_title(f'{data_name[i+2]} vs SCC')

fig.suptitle('Obesity_levels vs SCC')
plt.tight_layout()
plt.show()

In [None]:
data_list = [df_ot_final, df_ow_final, df_n, df_In]

data_name =["obesity_type", "over_weight_type", "normal", "Insufficient_Weight" ]

fig, axes = plt.subplots(figsize=(10,8),nrows = 2 , ncols =2)

for i in range(2):

    sns.histplot(data=data_list[i], x='SMOKE', hue='NObeyesdad', palette= 'turbo',ax=axes[i, 0], multiple='stack')
    axes[i,0].set_title(f'{data_name[i]} vs SMOKE')

    sns.histplot(data=data_list[i+2], x='SMOKE', hue='NObeyesdad', palette= 'turbo',ax=axes[i, 1], multiple='stack')
    axes[i,1].set_title(f'{data_name[i+2]} vs SMOKE')

fig.suptitle('Obesity_levels vs SMOKE')
plt.tight_layout()
plt.show()

In [None]:
data_list = [df_ot_final, df_ow_final, df_n, df_In]

data_name =["obesity_type", "over_weight_type", "normal", "Insufficient_Weight" ]

fig, axes = plt.subplots(figsize=(10,8),nrows = 2 , ncols =2)

for i in range(2):

    sns.histplot(data=data_list[i], x='family_history_with_overweight', hue='NObeyesdad',palette= 'turbo', ax=axes[i, 0], multiple='stack')
    axes[i,0].set_title(f'{data_name[i]} vs family_history_with_overweight')

    sns.histplot(data=data_list[i+2], x='family_history_with_overweight', hue='NObeyesdad',palette= 'turbo', ax=axes[i, 1], multiple='stack')
    axes[i,1].set_title(f'{data_name[i+2]} vs family_history_with_overweight')

fig.suptitle('Obesity_levels vs family_history_with_overweight')
plt.tight_layout()
plt.show()

In [None]:
data_list = [df_ot_final, df_ow_final, df_n, df_In]

data_name =["obesity_type", "over_weight_type", "normal", "Insufficient_Weight" ]

fig, axes = plt.subplots(figsize=(10,8),nrows = 2 , ncols =2)

for i in range(2):

    sns.histplot(data=data_list[i], x='CAEC', hue='NObeyesdad', palette= 'turbo',ax=axes[i, 0], multiple='stack')
    axes[i,0].set_title(f'{data_name[i]} vs CAEC')

    sns.histplot(data=data_list[i+2], x='CAEC', hue='NObeyesdad', palette= 'turbo',ax=axes[i, 1], multiple='stack')
    axes[i,1].set_title(f'{data_name[i+2]} vs CAEC')

fig.suptitle('Obesity_levels vs CAEC')
plt.tight_layout()
plt.show()

In [None]:
data_list = [df_ot_final, df_ow_final, df_n, df_In]

data_name =["obesity_type", "over_weight_type", "normal", "Insufficient_Weight" ]

fig, axes = plt.subplots(figsize=(15,8),nrows = 2 , ncols =2)

for i in range(2):

    sns.histplot(data=data_list[i], x='MTRANS', hue='NObeyesdad', palette= 'turbo',ax=axes[i, 0], multiple='stack')
    axes[i,0].set_title(f'{data_name[i]} vs MTRANS')

    sns.histplot(data=data_list[i+2], x='MTRANS', hue='NObeyesdad',palette= 'turbo', ax=axes[i, 1], multiple='stack')
    axes[i,1].set_title(f'{data_name[i+2]} vs MTRANS')

fig.suptitle('Obesity_levels vs MTRANS')
plt.tight_layout()
plt.show()

In [None]:
data_list = [df_ot_final, df_ow_final, df_n, df_In]
data_name =["obesity_type", "over_weight_type", "normal", "Insufficient_Weight" ]

fig,axes = plt.subplots(nrows = 2, ncols = 2, figsize = (10,8))

for i in range(2):

    sns.kdeplot(ax = axes[i,0],data=data_list[i], x="Age", hue="NObeyesdad", fill =True)
    axes[i, 0].set_title(f'{data_name[i]} vs Age')

    sns.kdeplot(ax = axes[i,1],data=data_list[i+2], x="Age", hue="NObeyesdad", fill =True)
    axes[i, 1].set_title(f'{data_name[i+2]} vs Age')



fig.suptitle('Obesity_levels vs Age')
plt.tight_layout()
plt.show()

In [None]:
data_list = [df_ot_final, df_ow_final, df_n, df_In]
data_name =["obesity_type", "over_weight_type", "normal", "Insufficient_Weight" ]

fig,axes = plt.subplots(nrows = 2, ncols = 2, figsize = (10,8))

for i in range(2):

    sns.kdeplot(ax = axes[i,0],data=data_list[i], x="Height", hue="NObeyesdad", fill =True)
    axes[i, 0].set_title(f'{data_name[i]} vs Height')

    sns.kdeplot(ax = axes[i,1],data=data_list[i+2], x="Height", hue="NObeyesdad", fill =True)
    axes[i, 1].set_title(f'{data_name[i+2]} vs Height')



fig.suptitle('Obesity_levels vs Height')
plt.tight_layout()
plt.show()

In [None]:
data_list = [df_ot_final, df_ow_final, df_n, df_In]
data_name =["obesity_type", "over_weight_type", "normal", "Insufficient_Weight" ]

fig,axes = plt.subplots(nrows = 2, ncols = 2, figsize = (10,8))

for i in range(2):

    sns.kdeplot(ax = axes[i,0],data=data_list[i], x="Weight", hue="NObeyesdad", fill =True)
    axes[i, 0].set_title(f'{data_name[i]} vs Weight')

    sns.kdeplot(ax = axes[i,1],data=data_list[i+2], x="Weight", hue="NObeyesdad", fill =True)
    axes[i, 1].set_title(f'{data_name[i+2]} vs Weight')



fig.suptitle('Obesity_levels vs Weight')
plt.tight_layout()
plt.show()

In [None]:
data_list = [df_ot_final, df_ow_final, df_n, df_In]
data_name =["obesity_type", "over_weight_type", "normal", "Insufficient_Weight" ]

fig,axes = plt.subplots(nrows = 2, ncols = 2, figsize = (10,8))

for i in range(2):

    sns.kdeplot(ax = axes[i,0],data=data_list[i], x="FCVC", hue="NObeyesdad", fill =True)
    axes[i, 0].set_title(f'{data_name[i]} vs FCVC')

    sns.kdeplot(ax = axes[i,1],data=data_list[i+2], x="FCVC", hue="NObeyesdad", fill =True)
    axes[i, 1].set_title(f'{data_name[i+2]} vs FCVC')



fig.suptitle('Obesity_levels vs FCVC')
plt.tight_layout()
plt.show()

In [None]:
data_list = [df_ot_final, df_ow_final, df_n, df_In]
data_name =["obesity_type", "over_weight_type", "normal", "Insufficient_Weight" ]

fig,axes = plt.subplots(nrows = 2, ncols = 2, figsize = (10,8))

for i in range(2):

    sns.kdeplot(ax = axes[i,0],data=data_list[i], x="NCP", hue="NObeyesdad", fill =True)
    axes[i, 0].set_title(f'{data_name[i]} vs NCP')

    sns.kdeplot(ax = axes[i,1],data=data_list[i+2], x="NCP", hue="NObeyesdad", fill =True)
    axes[i, 1].set_title(f'{data_name[i+2]} vs NCP')



fig.suptitle('Obesity_levels vs NCP')
plt.tight_layout()
plt.show()

### Data Pre-Process

In [None]:
df1 = Obesity_Data.copy()

In [None]:
le = LabelEncoder()
le.fit(df1['NObeyesdad'])
df1['NObeyesdad'] = le.transform(df1['NObeyesdad'])
df1.loc[df1['family_history_with_overweight'] == 'no', 'family_history_with_overweight'] = 0
df1.loc[df1['family_history_with_overweight'] == 'yes', 'family_history_with_overweight'] = 1
df1.loc[df1['FAVC'] == 'no', 'FAVC'] = 0
df1.loc[df1['FAVC'] == 'yes', 'FAVC'] = 1
df1.loc[df1['CAEC'] == 'no', 'CAEC'] = 0
df1.loc[df1['CAEC'] == 'Sometimes', 'CAEC'] = 1
df1.loc[df1['CAEC'] == 'Frequently', 'CAEC'] = 2
df1.loc[df1['CAEC'] == 'Always', 'CAEC'] = 3
df1.loc[df1['SMOKE'] == 'no', 'SMOKE'] = 0
df1.loc[df1['SMOKE'] == 'yes', 'SMOKE'] = 1
df1.loc[df1['SCC'] == 'no', 'SCC'] = 0
df1.loc[df1['SCC'] == 'yes', 'SCC'] = 1
df1.loc[df1['CALC'] == 'no', 'CALC'] = 0
df1.loc[df1['CALC'] == 'Sometimes', 'CALC'] = 1
df1.loc[df1['CALC'] == 'Frequently', 'CALC'] = 2
df1.loc[df1['CALC'] == 'Always', 'CALC'] = 3

df1 = pd.get_dummies(df1)
df1 = df1.astype('float64')

In [None]:
plt.figure(figsize=(20,12))
sns.heatmap(df1.corr(), annot = True, cmap = "coolwarm")
plt.title('The correlation among features',y= 1.05)
plt.show()

In [None]:
X = df1.drop(columns=['NObeyesdad'])
y = df1['NObeyesdad']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_test_tensor = torch.tensor(y_test.to_numpy(), dtype=torch.long)

### Model Architecture

In [None]:
class Model(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(Model, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.dropout1 = nn.Dropout(0.2)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.dropout2 = nn.Dropout(0.2)
        self.fc3 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.dropout1(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.dropout2(out)
        out = self.fc3(out)
        return out

In [None]:
input_size = X_train_tensor.shape[1]
hidden_size = 128
num_classes = 7
learning_rate = 0.001
num_epochs = 100
batch_size = 256

In [None]:
model = Model(input_size, hidden_size, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

### Training

In [None]:
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader, 1):
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    epoch_loss = running_loss / len(train_loader)
    model.eval()
    with torch.no_grad():
        outputs = model(X_test_tensor)
        test_loss = criterion(outputs, y_test_tensor)
    print(f'Epoch [{epoch + 1}/{num_epochs}] | Train Loss: {epoch_loss:.4f} | Test Loss: {test_loss.item():.4f}')

### Evaluation

In [None]:
with torch.no_grad():
    outputs = model(X_test_tensor)
    _, predicted = torch.max(outputs, 1)
    test_accuracy = accuracy_score(y_test_tensor.numpy(), predicted.numpy())

print(f'Test accuracy: {test_accuracy}')