In [1]:
#Importing data handling libraries
import pandas as pd
import numpy as np

In [2]:
#Reading the dataset
dataset = pd.read_csv("dataset/toy_dataset.csv")
dataset.head()

Unnamed: 0,Number,City,Gender,Age,Income,Illness
0,1,Dallas,Male,41,40367.0,No
1,2,Dallas,Male,54,45084.0,No
2,3,Dallas,Male,42,52483.0,No
3,4,Dallas,Male,40,40941.0,No
4,5,Dallas,Male,46,50289.0,No


In [3]:
#Dropping the Number column
dataset = dataset.drop(['Number'], axis=1)

In [4]:
#Using custom label encoding to convert the categorical data to numerical data
from mlmodule.preprocessing import LabelEncoder

#City column
city_le = LabelEncoder()
dataset['City'] = city_le.fit_transform(dataset['City'])

#Gender column
gender_le = LabelEncoder()
dataset['Gender'] = gender_le.fit_transform(dataset['Gender'])

In [5]:
dataset.head()

Unnamed: 0,City,Gender,Age,Income,Illness
0,2,1,41,40367.0,No
1,2,1,54,45084.0,No
2,2,1,42,52483.0,No
3,2,1,40,40941.0,No
4,2,1,46,50289.0,No


In [6]:
#Scaling the data
from mlmodule.preprocessing import StandardScaler
numerical_columns = ['City', 'Gender', 'Age', 'Income']

scaler = StandardScaler()
dataset[numerical_columns] = scaler.fit_transform(dataset[numerical_columns])

In [7]:
#Converting the illness column to 0 for No and 1 for Yes
dataset['Illness'] = dataset['Illness'].map({'No': 0, 'Yes': 1})

In [8]:
dataset = dataset.sort_values(by='Illness')

In [9]:
#Dataset
dataset.head()

Unnamed: 0,City,Gender,Age,Income,Illness
22,-0.871607,-1.125105,-1.032642,-2.731228,0
24,-0.871607,0.888806,1.127661,-1.443123,0
25,-0.871607,0.888806,1.646134,-2.604454,0
26,-0.871607,0.888806,1.127661,-1.357807,0
27,-0.871607,0.888806,-0.082109,-1.182452,0


In [10]:
dataset = dataset.drop_duplicates()

In [11]:
dataset = dataset.iloc[10000:]

#Sorting the dataset by index
dataset = dataset.sort_index()

In [12]:
#Train test split
margin = int(0.8 * dataset.shape[0])
X_train, X_test, y_train, y_test = dataset.iloc[:margin, :-1], dataset.iloc[margin:, :-1], dataset.iloc[:margin, -1], dataset.iloc[margin:, -1]

In [13]:
#Creating neural network model
from mlmodule.layers import Dense
from mlmodule.models import Sequential
from mlmodule.activations import *
from mlmodule.optimizers import Adam
from mlmodule.losses import *

model = Sequential()
model.add(Dense(64, activation = relu))
model.add(Dense(32, activation = relu))
model.add(Dense(16, activation = relu))
model.add(Dense(1, activation = sigmoid))

#Compiling the model
model.compile(optimizer=Adam(learning_rate=0.001), loss=binary_crossentropy)

#Training the model
model.fit(X_train, y_train, epochs=50, batch_size=64)

#Predicting the output
predictions = model.predict(X_test)

Epoch 1/50, Loss: 0.3061
Epoch 2/50, Loss: 0.2964
Epoch 3/50, Loss: 0.2961
Epoch 4/50, Loss: 0.2960
Epoch 5/50, Loss: 0.2957
Epoch 6/50, Loss: 0.2956
Epoch 7/50, Loss: 0.2956
Epoch 8/50, Loss: 0.2954
Epoch 9/50, Loss: 0.2953
Epoch 10/50, Loss: 0.2953
Epoch 11/50, Loss: 0.2952
Epoch 12/50, Loss: 0.2952
Epoch 13/50, Loss: 0.2952
Epoch 14/50, Loss: 0.2952
Epoch 15/50, Loss: 0.2951
Epoch 16/50, Loss: 0.2952
Epoch 17/50, Loss: 0.2952
Epoch 18/50, Loss: 0.2951
Epoch 19/50, Loss: 0.2950
Epoch 20/50, Loss: 0.2951
Epoch 21/50, Loss: 0.2950
Epoch 22/50, Loss: 0.2950
Epoch 23/50, Loss: 0.2950
Epoch 24/50, Loss: 0.2949
Epoch 25/50, Loss: 0.2950
Epoch 26/50, Loss: 0.2948
Epoch 27/50, Loss: 0.2949
Epoch 28/50, Loss: 0.2949
Epoch 29/50, Loss: 0.2949
Epoch 30/50, Loss: 0.2949
Epoch 31/50, Loss: 0.2948
Epoch 32/50, Loss: 0.2949
Epoch 33/50, Loss: 0.2949
Epoch 34/50, Loss: 0.2948
Epoch 35/50, Loss: 0.2948
Epoch 36/50, Loss: 0.2948
Epoch 37/50, Loss: 0.2948
Epoch 38/50, Loss: 0.2947
Epoch 39/50, Loss: 0.

In [14]:
binary_predictions = (predictions > 0.5).astype(int)

In [15]:
#Evaluating the model
y_test = y_test.values

In [16]:
#Calculating the accuracy
accuracy = np.mean(binary_predictions == y_test)

print(f"Accuracy: {accuracy*100}")

Accuracy: 91.54196720134927
