# **Weight Initialization**

In [71]:
import numpy as np
import pandas as pd 
pd.set_option('display.max_columns', None)
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow
import keras
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder, MinMaxScaler
from sklearn.impute import KNNImputer, SimpleImputer
from sklearn.compose import ColumnTransformer
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras import Sequential
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import r2_score

In [54]:
df = pd.read_csv('train.csv')
df.head()

Unnamed: 0,Gender,Height (cm),Weight (kg),Blood Pressure (s/d),Cholesterol Level (mg/dL),BMI,Blood Glucose Level (mg/dL),Bone Density (g/cm²),Vision Sharpness,Hearing Ability (dB),Physical Activity Level,Smoking Status,Alcohol Consumption,Diet,Chronic Diseases,Medication Use,Family History,Cognitive Function,Mental Health Status,Sleep Patterns,Stress Levels,Pollution Exposure,Sun Exposure,Education Level,Income Level,Age (years)
0,Male,171.148359,86.185197,151/109,259.465814,29.423017,157.652848,0.132868,0.2,58.786198,Moderate,Former,,Low-carb,,,,44.059172,Good,Insomnia,2.797064,5.142344,7.108975,,Medium,89
1,Male,172.946206,79.641937,134/112,263.630292,26.626847,118.507805,0.629534,0.267312,54.63527,Low,Current,Occasional,Balanced,Hypertension,,Heart Disease,45.312298,Good,Normal,9.33993,7.27272,3.918489,Undergraduate,Medium,77
2,Female,155.945488,49.167058,160/101,207.846206,20.217553,143.58755,0.473487,0.248667,54.564632,Moderate,Never,,Balanced,Hypertension,Regular,Hypertension,56.246991,Poor,Insomnia,9.234637,8.500386,5.393408,,Medium,70
3,Female,169.078298,56.017921,133/94,253.283779,19.59527,137.448581,1.184315,0.513818,79.722963,Moderate,Never,,Balanced,Diabetes,Occasional,Hypertension,55.196092,Poor,Insomnia,4.693446,7.555511,2.745578,,Low,52
4,Female,163.758355,73.966304,170/106,236.119899,27.582078,145.328695,0.434562,0.306864,52.479469,Low,Former,Frequent,Vegetarian,,,,53.023379,Good,Normal,4.038537,9.429097,3.878435,Undergraduate,High,79


In [55]:
df.shape

(3000, 26)

In [56]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3000 entries, 0 to 2999
Data columns (total 26 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   Gender                       3000 non-null   object 
 1   Height (cm)                  3000 non-null   float64
 2   Weight (kg)                  3000 non-null   float64
 3   Blood Pressure (s/d)         3000 non-null   object 
 4   Cholesterol Level (mg/dL)    3000 non-null   float64
 5   BMI                          3000 non-null   float64
 6   Blood Glucose Level (mg/dL)  3000 non-null   float64
 7   Bone Density (g/cm²)         3000 non-null   float64
 8   Vision Sharpness             3000 non-null   float64
 9   Hearing Ability (dB)         3000 non-null   float64
 10  Physical Activity Level      3000 non-null   object 
 11  Smoking Status               3000 non-null   object 
 12  Alcohol Consumption          1799 non-null   object 
 13  Diet              

In [57]:
df.isnull().sum()

Gender                            0
Height (cm)                       0
Weight (kg)                       0
Blood Pressure (s/d)              0
Cholesterol Level (mg/dL)         0
BMI                               0
Blood Glucose Level (mg/dL)       0
Bone Density (g/cm²)              0
Vision Sharpness                  0
Hearing Ability (dB)              0
Physical Activity Level           0
Smoking Status                    0
Alcohol Consumption            1201
Diet                              0
Chronic Diseases               1299
Medication Use                 1198
Family History                 1451
Cognitive Function                0
Mental Health Status              0
Sleep Patterns                    0
Stress Levels                     0
Pollution Exposure                0
Sun Exposure                      0
Education Level                 627
Income Level                      0
Age (years)                       0
dtype: int64

In [58]:
df.duplicated().sum()

0

In [59]:
df[['Systolic', 'Diastolic']] = df['Blood Pressure (s/d)'].str.split('/', expand=True)

df['Systolic'] = pd.to_numeric(df['Systolic'])
df['Diastolic'] = pd.to_numeric(df['Diastolic'])
df.drop(columns=['Blood Pressure (s/d)'], inplace=True)

In [60]:
X_train, X_test, y_train, y_test = train_test_split(df.drop(columns=['Age (years)']),
                                                    df['Age (years)'],
                                                    test_size=0.2,
                                                    random_state=1)

In [61]:
numerical_columns =  [1, 2, 3, 4, 5, 6, 7, 8, 16, 19, 20, 21, 24, 25]
categorical_columns = [0, 9, 10, 11, 12, 13, 14, 15, 17, 18, 22, 23]

In [62]:
handle_numerical = Pipeline(steps=[
    ('impute', KNNImputer(n_neighbors=11)),
    ('scale', MinMaxScaler())
])

In [63]:
handle_categorical = Pipeline(steps=[
    ('impute', OneHotEncoder(drop='first', handle_unknown='ignore'))
])

In [64]:
preprocessing = ColumnTransformer(transformers=[
    ('numerical', handle_numerical, numerical_columns),
    ('categorical', handle_categorical, categorical_columns)
])

In [65]:
X_train = preprocessing.fit_transform(X_train)
X_test = preprocessing.transform(X_test)

In [66]:
model = Sequential()

In [67]:
for i in range(20):
    model.add(Dense(32, activation='relu', kernel_initializer='he_uniform'))
    if i > 18:
        model.add(Dropout(0.5))
model.add(Dense(1))

In [68]:
model.compile(loss='mean_squared_error', optimizer=Adam(), metrics=['mean_absolute_error'])

In [69]:
model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 20ms/step - loss: 2727.5337 - mean_absolute_error: 46.7816 - val_loss: 411.7712 - val_mean_absolute_error: 17.3774
Epoch 2/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 614.2426 - mean_absolute_error: 19.9263 - val_loss: 280.1229 - val_mean_absolute_error: 14.0687
Epoch 3/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 400.5540 - mean_absolute_error: 16.1189 - val_loss: 129.7534 - val_mean_absolute_error: 9.3359
Epoch 4/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 273.8796 - mean_absolute_error: 12.9169 - val_loss: 69.1881 - val_mean_absolute_error: 6.6997
Epoch 5/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 237.9876 - mean_absolute_error: 11.9800 - val_loss: 129.9212 - val_mean_absolute_error: 9.4378
Epoch 6/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

<keras.src.callbacks.history.History at 0x1cb64edb1d0>

In [70]:
y_pred = model.predict(X_test)

[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step 


In [72]:
r2_score(y_test, y_pred)

0.8219131231307983