In [1]:
#Importing data handling libraries
import pandas as pd
import numpy as np

In [2]:
#Reading the dataset
dataset = pd.read_csv("dataset/toy_dataset.csv")
dataset.head()

Unnamed: 0,Number,City,Gender,Age,Income,Illness
0,1,Dallas,Male,41,40367.0,No
1,2,Dallas,Male,54,45084.0,No
2,3,Dallas,Male,42,52483.0,No
3,4,Dallas,Male,40,40941.0,No
4,5,Dallas,Male,46,50289.0,No


In [3]:
#Dropping the Number column
dataset = dataset.drop(['Number'], axis=1)

In [4]:
#Dropping duplicates
dataset = dataset.drop_duplicates()

In [5]:
#Using custom label encoding to convert the categorical data to numerical data
from mlmodule.preprocessing import LabelEncoder

#City column
city_le = LabelEncoder()
dataset['City'] = city_le.fit_transform(dataset['City'])

#Gender column
gender_le = LabelEncoder()
dataset['Gender'] = gender_le.fit_transform(dataset['Gender'])

In [6]:
dataset.head()

Unnamed: 0,City,Gender,Age,Income,Illness
0,2,1,41,40367.0,No
1,2,1,54,45084.0,No
2,2,1,42,52483.0,No
3,2,1,40,40941.0,No
4,2,1,46,50289.0,No


In [7]:
#Scaling the data
from mlmodule.preprocessing import StandardScaler
numerical_columns = ['City', 'Gender', 'Age', 'Income']

scaler = StandardScaler()
dataset[numerical_columns] = scaler.fit_transform(dataset[numerical_columns])

In [8]:
dataset.head()

Unnamed: 0,City,Gender,Age,Income,Illness
0,-0.869701,0.889214,-0.341072,-2.034076,No
1,-0.869701,0.889214,0.782161,-1.845478,No
2,-0.869701,0.889214,-0.25467,-1.549648,No
3,-0.869701,0.889214,-0.427475,-2.011126,No
4,-0.869701,0.889214,0.09094,-1.637369,No


In [9]:
#Converting the illness column to 0 for No and 1 for Yes
dataset['Illness'] = dataset['Illness'].map({'No': 0, 'Yes': 1})

In [10]:
dataset_illness_0 = dataset[dataset['Illness'] == 0]
dataset_illness_1 = dataset[dataset['Illness'] == 1]
print("Illness 0: ", dataset_illness_0.shape)
print("Illness 1: ", dataset_illness_1.shape)

Illness 0:  (137202, 5)
Illness 1:  (12130, 5)


In [11]:
dataset_illness_0 = dataset_illness_0.sample(n=60000, random_state=42)

In [12]:
#Merge the two datasets
dataset = pd.concat([dataset_illness_0, dataset_illness_1], axis=0)

#Shuffling the dataset
dataset = dataset.sample(frac=1, random_state=42).reset_index(drop=True)

In [13]:
#Dataset
dataset.head()

Unnamed: 0,City,Gender,Age,Income,Illness
0,-1.963903,0.889214,0.350148,-0.315107,0
1,0.771603,-1.124588,-1.377903,-0.207674,0
2,-0.3226,-1.124588,0.350148,-0.124871,1
3,0.771603,-1.124588,0.522953,0.369154,0
4,0.224502,-1.124588,-0.859488,0.885088,0


In [14]:
#Train test split
margin = int(0.8 * dataset.shape[0])
X_train, X_test, y_train, y_test = dataset.iloc[:margin, :-1], dataset.iloc[margin:, :-1], dataset.iloc[:margin, -1], dataset.iloc[margin:, -1]

In [15]:
#Creating neural network model
from mlmodule.layers import Dense
from mlmodule.models import Sequential
from mlmodule.activations import *
from mlmodule.optimizers import Adam
from mlmodule.losses import *

model = Sequential()
model.add(Dense(64, activation = relu))
model.add(Dense(32, activation = relu))
model.add(Dense(16, activation = relu))
model.add(Dense(8, activation = relu))
model.add(Dense(1, activation = sigmoid))

#Compiling the model
model.compile(optimizer=Adam(learning_rate=0.001), loss=binary_crossentropy)

#Training the model
model.fit(X_train, y_train, epochs=600, batch_size=64)

#Predicting the output
predictions = model.predict(X_test)

Epoch 1/600, Loss: 0.4623
Epoch 2/600, Loss: 0.4559
Epoch 3/600, Loss: 0.4559
Epoch 4/600, Loss: 0.4554
Epoch 5/600, Loss: 0.4552
Epoch 6/600, Loss: 0.4550
Epoch 7/600, Loss: 0.4549
Epoch 8/600, Loss: 0.4550
Epoch 9/600, Loss: 0.4549
Epoch 10/600, Loss: 0.4548
Epoch 11/600, Loss: 0.4548
Epoch 12/600, Loss: 0.4549
Epoch 13/600, Loss: 0.4548
Epoch 14/600, Loss: 0.4547
Epoch 15/600, Loss: 0.4546
Epoch 16/600, Loss: 0.4546
Epoch 17/600, Loss: 0.4547
Epoch 18/600, Loss: 0.4546
Epoch 19/600, Loss: 0.4546
Epoch 20/600, Loss: 0.4546
Epoch 21/600, Loss: 0.4543
Epoch 22/600, Loss: 0.4545
Epoch 23/600, Loss: 0.4544
Epoch 24/600, Loss: 0.4545
Epoch 25/600, Loss: 0.4545
Epoch 26/600, Loss: 0.4545
Epoch 27/600, Loss: 0.4544
Epoch 28/600, Loss: 0.4544
Epoch 29/600, Loss: 0.4544
Epoch 30/600, Loss: 0.4543
Epoch 31/600, Loss: 0.4543
Epoch 32/600, Loss: 0.4543
Epoch 33/600, Loss: 0.4542
Epoch 34/600, Loss: 0.4543
Epoch 35/600, Loss: 0.4543
Epoch 36/600, Loss: 0.4543
Epoch 37/600, Loss: 0.4543
Epoch 38/6

In [16]:
binary_predictions = (predictions > 0.5).astype(int)

In [17]:
#Evaluating the model
y_test = y_test.values

In [20]:
#Unique values
print("Unique value counts: ", np.unique(y_test, return_counts=True))

Unique value counts:  (array([0, 1]), array([12055,  2371]))


In [19]:
#Calculating the accuracy
accuracy = np.mean(binary_predictions == y_test)

print(f"Accuracy: {accuracy*100}")

Accuracy: 83.52717105491149
