In [None]:
# Importing Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from keras import Sequential
from keras.layers import Dense
from keras.optimizers.legacy import adam

from sklearn.model_selection import train_test_split

In [None]:
dataset = pd.read_csv('drug200.csv')
dataset.head()

Unnamed: 0,Age,Sex,BP,Cholesterol,Na_to_K,Drug
0,23,F,HIGH,HIGH,25.355,DrugY
1,47,M,LOW,HIGH,13.093,drugC
2,47,M,LOW,HIGH,10.114,drugC
3,28,F,NORMAL,HIGH,7.798,drugX
4,61,F,LOW,HIGH,18.043,DrugY


In [None]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Age          200 non-null    int64  
 1   Sex          200 non-null    object 
 2   BP           200 non-null    object 
 3   Cholesterol  200 non-null    object 
 4   Na_to_K      200 non-null    float64
 5   Drug         200 non-null    object 
dtypes: float64(1), int64(1), object(4)
memory usage: 9.5+ KB


In [None]:
dataset.describe()

Unnamed: 0,Age,Na_to_K
count,200.0,200.0
mean,44.315,16.084485
std,16.544315,7.223956
min,15.0,6.269
25%,31.0,10.4455
50%,45.0,13.9365
75%,58.0,19.38
max,74.0,38.247


In [None]:
dataset.Sex = dataset.Sex.astype('category').cat.codes
dataset.BP = dataset.BP.astype('category').cat.codes
dataset.Cholesterol = dataset.Cholesterol.astype('category').cat.codes
dataset.head()

Unnamed: 0,Age,Sex,BP,Cholesterol,Na_to_K,Drug
0,23,0,0,0,25.355,DrugY
1,47,1,1,0,13.093,drugC
2,47,1,1,0,10.114,drugC
3,28,0,2,0,7.798,drugX
4,61,0,1,0,18.043,DrugY


### Handling Empty Values


In [None]:
dataset.isna().sum()

Age            0
Sex            0
BP             0
Cholesterol    0
Na_to_K        0
Drug           0
dtype: int64

In [None]:
x = dataset.drop(['Drug'], axis = 1).values
y = pd.get_dummies(dataset.Drug).values
xTrain, xTest, yTrain, yTest = train_test_split(x, y, test_size = 0.3)

print(xTrain.shape)
print(yTrain.shape)

xTrain[:5]

(140, 5)
(140, 5)


array([[21.   ,  0.   ,  0.   ,  1.   , 28.632],
       [43.   ,  1.   ,  0.   ,  0.   , 13.972],
       [49.   ,  1.   ,  0.   ,  1.   ,  8.7  ],
       [67.   ,  1.   ,  2.   ,  1.   ,  9.514],
       [32.   ,  0.   ,  0.   ,  1.   , 10.292]])

## Model Definition Stage

In [None]:
model = Sequential()
model.add(Dense(5, input_dim = 5, activation = 'relu'))
model.add(Dense(16, activation = 'relu'))
model.add(Dense(16, activation = 'relu'))
model.add(Dense(32, activation = 'relu'))
model.add(Dense(5, activation = 'softmax'))
model.compile(optimizer = adam.Adam(), loss = 'categorical_crossentropy', metrics = ['accuracy'])


In [None]:
model.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_30 (Dense)            (None, 5)                 30        
                                                                 
 dense_31 (Dense)            (None, 16)                96        
                                                                 
 dense_32 (Dense)            (None, 16)                272       
                                                                 
 dense_33 (Dense)            (None, 32)                544       
                                                                 
 dense_34 (Dense)            (None, 5)                 165       
                                                                 
Total params: 1,107
Trainable params: 1,107
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.fit(xTrain, yTrain, epochs = 10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fe82ca3e980>

## Model Performance Stage

In [None]:
yPred = model.predict(xTest)

yPred[:5]



array([[0.35718194, 0.1192129 , 0.21689261, 0.03950176, 0.26721078],
       [0.3215536 , 0.15882324, 0.22560854, 0.16829945, 0.12571509],
       [0.3725991 , 0.10420002, 0.21870321, 0.02867647, 0.27582115],
       [0.32772067, 0.14643371, 0.2104638 , 0.06591012, 0.2494717 ],
       [0.30285567, 0.16718027, 0.20254946, 0.0939471 , 0.23346744]],
      dtype=float32)

In [None]:
yTest[:5]

array([[ True, False, False, False, False],
       [ True, False, False, False, False],
       [False, False, False, False,  True],
       [False, False, False, False,  True],
       [False, False, False, False,  True]])