In [38]:
# Task 1: Read the dataset and do data pre-processing
import numpy as np
import pandas as pd
import tensorflow
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [39]:
# Reading the csv file
df = pd.read_csv('sample_data/drug200.csv')

# Visualizing the 1st 5 observations
df.head()

Unnamed: 0,Age,Sex,BP,Cholesterol,Na_to_K,Drug
0,23,F,HIGH,HIGH,25.355,DrugY
1,47,M,LOW,HIGH,13.093,drugC
2,47,M,LOW,HIGH,10.114,drugC
3,28,F,NORMAL,HIGH,7.798,drugX
4,61,F,LOW,HIGH,18.043,DrugY


In [40]:
# Visualizing the last 5 observations
df.tail()

Unnamed: 0,Age,Sex,BP,Cholesterol,Na_to_K,Drug
195,56,F,LOW,HIGH,11.567,drugC
196,16,M,LOW,HIGH,12.006,drugC
197,52,M,NORMAL,HIGH,9.894,drugX
198,23,M,NORMAL,NORMAL,14.02,drugX
199,40,F,LOW,NORMAL,11.349,drugX


In [41]:
# descriptive stat
df.describe(include='all')

Unnamed: 0,Age,Sex,BP,Cholesterol,Na_to_K,Drug
count,200.0,200,200,200,200.0,200
unique,,2,3,2,,5
top,,M,HIGH,HIGH,,DrugY
freq,,104,77,103,,91
mean,44.315,,,,16.084485,
std,16.544315,,,,7.223956,
min,15.0,,,,6.269,
25%,31.0,,,,10.4455,
50%,45.0,,,,13.9365,
75%,58.0,,,,19.38,


In [42]:
# Check the null values
df.isnull().sum()

Age            0
Sex            0
BP             0
Cholesterol    0
Na_to_K        0
Drug           0
dtype: int64

In [43]:
# We are not having any null values so we can skip the handling null value step.
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Age          200 non-null    int64  
 1   Sex          200 non-null    object 
 2   BP           200 non-null    object 
 3   Cholesterol  200 non-null    object 
 4   Na_to_K      200 non-null    float64
 5   Drug         200 non-null    object 
dtypes: float64(1), int64(1), object(4)
memory usage: 9.5+ KB


In [44]:
# Finding unique category
df['Drug'].unique()

array(['DrugY', 'drugC', 'drugX', 'drugA', 'drugB'], dtype=object)

In [45]:
# Finding the count of observations based on unique value
df['Drug'].value_counts()

DrugY    91
drugX    54
drugA    23
drugC    16
drugB    16
Name: Drug, dtype: int64

In [46]:
label_encoder = LabelEncoder() # initialize the lib
# transfrom all onjext cols with label_encoder
df['Sex'] = label_encoder.fit_transform(df['Sex'])
df['BP'] = label_encoder.fit_transform(df['BP'])
df['Cholesterol'] = label_encoder.fit_transform(df['Cholesterol'])
df['Drug'] = label_encoder.fit_transform(df['Drug'])

In [47]:
df.head()

Unnamed: 0,Age,Sex,BP,Cholesterol,Na_to_K,Drug
0,23,0,0,0,25.355,0
1,47,1,1,0,13.093,3
2,47,1,1,0,10.114,3
3,28,0,2,0,7.798,4
4,61,0,1,0,18.043,0


In [48]:
# Split the training and testing data
X = df.iloc[:,:-1]
Y = df.iloc[:,-1]
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)


In [49]:
# Task 2: Build the ANN model with (input layer, min 3 hidden layers & output layer)
# Initializing the seq model
model = Sequential()
# Adding the input layer to the model
model.add(Dense(64, activation='relu', input_dim=X_train.shape[1])) #input_dim=no. of cols in X_train
# Adding the 3 hidden layer to the model
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
# Adding output layer
total_classes = len(label_encoder.classes_) #find total classes in output
model.add(Dense(total_classes, activation='softmax')) #more than 2 difference classes in output use softmax

In [50]:
model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])

In [51]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_10 (Dense)            (None, 64)                384       
                                                                 
 dense_11 (Dense)            (None, 64)                4160      
                                                                 
 dense_12 (Dense)            (None, 32)                2080      
                                                                 
 dense_13 (Dense)            (None, 16)                528       
                                                                 
 dense_14 (Dense)            (None, 5)                 85        
                                                                 
Total params: 7,237
Trainable params: 7,237
Non-trainable params: 0
_________________________________________________________________


In [52]:
model.fit(X_train, Y_train, batch_size=32, epochs=10, validation_data=(X_test, Y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7efb804f5240>

In [53]:
_, accuracy = model.evaluate(X_test, Y_test)
accuracy



0.5

In [54]:
# Task 3: Test the model with random data
Y_pred = model.predict(X_test)
l=[]
for arr in Y_pred:
  predicted_class = np.argmax(arr)
  predicted_drug = label_encoder.inverse_transform([predicted_class])[0]
  l.append(predicted_drug)
print("The Predicted values are: ")
print(l)

The Predicted values are: 
['DrugY', 'DrugY', 'DrugY', 'drugX', 'DrugY', 'DrugY', 'DrugY', 'DrugY', 'DrugY', 'drugX', 'DrugY', 'DrugY', 'DrugY', 'DrugY', 'drugX', 'DrugY', 'drugX', 'drugX', 'DrugY', 'DrugY', 'DrugY', 'drugX', 'drugX', 'DrugY', 'DrugY', 'DrugY', 'drugX', 'DrugY', 'DrugY', 'DrugY', 'DrugY', 'DrugY', 'DrugY', 'DrugY', 'drugX', 'DrugY', 'drugX', 'DrugY', 'DrugY', 'DrugY']


In [55]:
# Comparing predicted with the actual value
comp = pd.DataFrame(Y_test)  # Creating a dataframe
comp['Drug'] = label_encoder.inverse_transform(comp['Drug'])
comp.columns = ['Actual Value']  # Changing the column name
comp['Predicted Value'] = l  # Creating a column based onth prediction
comp

Unnamed: 0,Actual Value,Predicted Value
95,drugX,DrugY
15,DrugY,DrugY
30,drugX,DrugY
158,drugC,drugX
128,DrugY,DrugY
115,DrugY,DrugY
69,DrugY,DrugY
170,drugX,DrugY
174,drugA,DrugY
45,drugX,drugX


In [56]:
y_pred = model.predict([[30, 1, 2, 0, 24.110]])
print(y_pred)
result =  np.argmax(y_pred)
output = ['DrugY', 'drugX', 'drugA', 'drugC', 'drugB']
result

[[0.92712957 0.00787001 0.01400959 0.00105586 0.04993504]]


0

In [57]:
print(f"Output is: {output[result]}")

Output is: DrugY
