In [74]:
import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = None

In [75]:
df = pd.read_csv("data/mushrooms.csv")
df.head()

Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,p,x,s,n,t,p,f,c,n,k,...,s,w,w,p,w,o,p,k,s,u
1,e,x,s,y,t,a,f,c,b,k,...,s,w,w,p,w,o,p,n,n,g
2,e,b,s,w,t,l,f,c,b,n,...,s,w,w,p,w,o,p,n,n,m
3,p,x,y,w,t,p,f,c,n,n,...,s,w,w,p,w,o,p,k,s,u
4,e,x,s,g,f,n,f,w,b,k,...,s,w,w,p,w,o,e,n,a,g


In [76]:
X = df.drop(["class"], axis = 1)
Y = df[["class"]]

### Label Encoding for classes

In [77]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
Y["class"] = le.fit_transform(Y["class"])

### One Hot Encoding

In [78]:
X = pd.get_dummies(X)
X.head()

Unnamed: 0,cap-shape_b,cap-shape_c,cap-shape_f,cap-shape_k,cap-shape_s,cap-shape_x,cap-surface_f,cap-surface_g,cap-surface_s,cap-surface_y,...,population_s,population_v,population_y,habitat_d,habitat_g,habitat_l,habitat_m,habitat_p,habitat_u,habitat_w
0,0,0,0,0,0,1,0,0,1,0,...,1,0,0,0,0,0,0,0,1,0
1,0,0,0,0,0,1,0,0,1,0,...,0,0,0,0,1,0,0,0,0,0
2,1,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,1,0,0,0
3,0,0,0,0,0,1,0,0,0,1,...,1,0,0,0,0,0,0,0,1,0
4,0,0,0,0,0,1,0,0,1,0,...,0,0,0,0,1,0,0,0,0,0


### Split the dataset

In [79]:
from sklearn.model_selection import train_test_split

In [80]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 7)

### Create the tensors

In [81]:
import torch

In [82]:
X_train_tensor = torch.from_numpy(X_train.values).float()
X_test_tensor = torch.from_numpy(X_test.values).float()
Y_train_tensor = torch.from_numpy(np.squeeze(Y_train.values)).long()
Y_test_tensor = torch.from_numpy(np.squeeze(Y_test.values)).long()

In [83]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [84]:
input_dim = X_train_tensor.shape[1]

output_dim = len(Y["class"].unique())

### Network Architecture

In [85]:
class MushroomNet(nn.Module):
    def  __init__(self, hidden_size, activation_fn = 'relu', apply_dropout = False):
        super(MushroomNet, self).__init__()
        
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)
        
        self.hidden_size = hidden_size
        self.activation_fn = activation_fn
        
        self.dropout = None
        if apply_dropout:
            self.dropout = nn.Dropout(0.2)
            
    def forward(self, x):
        activation_fn = None
        if self.activation_fn == 'relu':
            activation_fn = F.relu
        elif self.activation_fn == 'sigmoid':
            activation_fn = F.torch.sigmoid
        elif self.activation_fn == 'tanh':
            activation_fn = F.torch.tanh
        
        x = activation_fn(self.fc1(x))
        x = activation_fn(self.fc2(x))
        
        if self.dropout != None:
            x = self.dropout(x)
        
        x = self.fc3(x)
        return F.log_softmax(x, dim=-1)

### Training & Evaluating the model

In [99]:
def train_and_evaluate(model, nb_epochs = 1000, learning_rate=1e-3):
    
    optimizer = optim.Adam(model.parameters(), lr = learning_rate)
    
    loss_fn = nn.NLLLoss()
    
    for epoch in range(1, nb_epochs+1):
        
        optimizer.zero_grad()
        
        model.train()
        
        Y_pred_train = model(X_train_tensor)
        
        loss_train = loss_fn(Y_pred_train, Y_train_tensor)
        loss_train.backward()
        optimizer.step()
        
        model.eval()
        
        Y_pred_test = model(X_test_tensor)
        loss_test = loss_fn(Y_pred_test, Y_test_tensor)
        _,class_idx = Y_pred_test.max(1)
        
        accuracy = class_idx.eq(Y_test_tensor).sum().item() / Y_test_tensor.shape[0]
        
        if epoch % 100 == 0:
            print("Epoch: {}, Training Loss: {}, Test Loss: {}, Test Accuracy: {}".format(epoch, loss_train.item(), loss_test.item(), accuracy))

In [100]:
model = MushroomNet(hidden_size = 3, activation_fn = 'sigmoid', apply_dropout=False)
model

MushroomNet(
  (fc1): Linear(in_features=117, out_features=3, bias=True)
  (fc2): Linear(in_features=3, out_features=3, bias=True)
  (fc3): Linear(in_features=3, out_features=2, bias=True)
)

In [101]:
train_and_evaluate(model, 1000, 1e-3)

Epoch: 100, Training Loss: 0.6542438864707947, Test Loss: 0.6526139378547668, Test Accuracy: 0.856
Epoch: 200, Training Loss: 0.5650483965873718, Test Loss: 0.5608644485473633, Test Accuracy: 0.9150769230769231
Epoch: 300, Training Loss: 0.4452352523803711, Test Loss: 0.43978309631347656, Test Accuracy: 0.9636923076923077
Epoch: 400, Training Loss: 0.3296014070510864, Test Loss: 0.3245125710964203, Test Accuracy: 0.9827692307692307
Epoch: 500, Training Loss: 0.2404295653104782, Test Loss: 0.2360580861568451, Test Accuracy: 0.9926153846153846
Epoch: 600, Training Loss: 0.17745441198349, Test Loss: 0.17391522228717804, Test Accuracy: 0.9963076923076923
Epoch: 700, Training Loss: 0.13390545547008514, Test Loss: 0.1312510073184967, Test Accuracy: 0.9969230769230769
Epoch: 800, Training Loss: 0.10352172702550888, Test Loss: 0.10162767022848129, Test Accuracy: 0.9981538461538462
Epoch: 900, Training Loss: 0.08188366144895554, Test Loss: 0.08056462556123734, Test Accuracy: 0.9993846153846154


In [102]:
model = MushroomNet(hidden_size = 10, activation_fn = 'relu', apply_dropout=False)
model

MushroomNet(
  (fc1): Linear(in_features=117, out_features=10, bias=True)
  (fc2): Linear(in_features=10, out_features=10, bias=True)
  (fc3): Linear(in_features=10, out_features=2, bias=True)
)

In [103]:
train_and_evaluate(model, 1000, 1e-3)

Epoch: 100, Training Loss: 0.22179245948791504, Test Loss: 0.20918329060077667, Test Accuracy: 0.9827692307692307
Epoch: 200, Training Loss: 0.027941960841417313, Test Loss: 0.025176620110869408, Test Accuracy: 0.9987692307692307
Epoch: 300, Training Loss: 0.010451150126755238, Test Loss: 0.009900914505124092, Test Accuracy: 0.9993846153846154
Epoch: 400, Training Loss: 0.005354117136448622, Test Loss: 0.0052700950764119625, Test Accuracy: 1.0
Epoch: 500, Training Loss: 0.0032094940543174744, Test Loss: 0.0032418747432529926, Test Accuracy: 1.0
Epoch: 600, Training Loss: 0.0021188321989029646, Test Loss: 0.002179601928219199, Test Accuracy: 1.0
Epoch: 700, Training Loss: 0.0014942493289709091, Test Loss: 0.0015572879929095507, Test Accuracy: 1.0
Epoch: 800, Training Loss: 0.0011053183116018772, Test Loss: 0.0011637096758931875, Test Accuracy: 1.0
Epoch: 900, Training Loss: 0.0008474972564727068, Test Loss: 0.0008997905533760786, Test Accuracy: 1.0
Epoch: 1000, Training Loss: 0.00066825

In [104]:
model = MushroomNet(hidden_size = 10, activation_fn = 'relu', apply_dropout=True)
model

MushroomNet(
  (fc1): Linear(in_features=117, out_features=10, bias=True)
  (fc2): Linear(in_features=10, out_features=10, bias=True)
  (fc3): Linear(in_features=10, out_features=2, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
)

In [105]:
train_and_evaluate(model, 1000, 1e-3)

Epoch: 100, Training Loss: 0.18812774121761322, Test Loss: 0.15213803946971893, Test Accuracy: 0.9772307692307692
Epoch: 200, Training Loss: 0.03672264516353607, Test Loss: 0.01816711202263832, Test Accuracy: 0.9993846153846154
Epoch: 300, Training Loss: 0.01715090312063694, Test Loss: 0.004938621073961258, Test Accuracy: 1.0
Epoch: 400, Training Loss: 0.008859877474606037, Test Loss: 0.0019141172524541616, Test Accuracy: 1.0
Epoch: 500, Training Loss: 0.0074091688729822636, Test Loss: 0.0008916385122574866, Test Accuracy: 1.0
Epoch: 600, Training Loss: 0.006055639125406742, Test Loss: 0.0004663467698264867, Test Accuracy: 1.0
Epoch: 700, Training Loss: 0.0053313737735152245, Test Loss: 0.0002679798344615847, Test Accuracy: 1.0
Epoch: 800, Training Loss: 0.005349571350961924, Test Loss: 0.00016450615657959133, Test Accuracy: 1.0
Epoch: 900, Training Loss: 0.004180808085948229, Test Loss: 0.00010490238491911441, Test Accuracy: 1.0
Epoch: 1000, Training Loss: 0.0038069074507802725, Test 

### Using a model for prediction

In [118]:
features_required = X.columns
features_required

Index(['cap-shape_b', 'cap-shape_c', 'cap-shape_f', 'cap-shape_k',
       'cap-shape_s', 'cap-shape_x', 'cap-surface_f', 'cap-surface_g',
       'cap-surface_s', 'cap-surface_y',
       ...
       'population_s', 'population_v', 'population_y', 'habitat_d',
       'habitat_g', 'habitat_l', 'habitat_m', 'habitat_p', 'habitat_u',
       'habitat_w'],
      dtype='object', length=117)

In [141]:
sample = df.sample(n=10)
sample

Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
143,e,f,y,n,t,a,f,c,b,w,...,y,w,w,p,w,o,p,k,y,p
2240,e,x,y,e,t,n,f,c,b,w,...,s,w,g,p,w,o,p,n,y,d
2768,e,x,y,g,t,n,f,c,b,w,...,s,g,g,p,w,o,p,k,y,d
3594,p,f,f,g,f,f,f,c,b,g,...,k,n,n,p,w,o,l,h,y,g
1575,e,f,s,w,f,n,f,w,b,k,...,f,w,w,p,w,o,e,k,a,g
2566,p,x,f,g,f,f,f,c,b,p,...,k,b,p,p,w,o,l,h,y,d
3174,p,x,f,p,f,c,f,w,n,g,...,s,w,w,p,w,o,p,k,v,d
141,e,b,y,y,t,l,f,c,b,g,...,s,w,w,p,w,o,p,k,n,m
4234,p,f,f,y,f,f,f,c,b,p,...,k,b,p,p,w,o,l,h,v,d
3140,e,f,y,n,t,n,f,c,b,p,...,s,p,p,p,w,o,p,k,v,d


In [142]:
actual_classes = sample[["class"]]
actual_classes

Unnamed: 0,class
143,e
2240,e
2768,e
3594,p
1575,e
2566,p
3174,p
141,e
4234,p
3140,e


In [143]:
actual_classes["class"] = le.transform(actual_classes["class"])
actual_classes

Unnamed: 0,class
143,0
2240,0
2768,0
3594,1
1575,0
2566,1
3174,1
141,0
4234,1
3140,0


In [144]:
X_pred = sample.drop(["class"], axis = 1)
X_pred.head()

Unnamed: 0,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,stalk-shape,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
143,f,y,n,t,a,f,c,b,w,e,...,y,w,w,p,w,o,p,k,y,p
2240,x,y,e,t,n,f,c,b,w,t,...,s,w,g,p,w,o,p,n,y,d
2768,x,y,g,t,n,f,c,b,w,t,...,s,g,g,p,w,o,p,k,y,d
3594,f,f,g,f,f,f,c,b,g,e,...,k,n,n,p,w,o,l,h,y,g
1575,f,s,w,f,n,f,w,b,k,t,...,f,w,w,p,w,o,e,k,a,g


In [145]:
X_pred = pd.get_dummies(X_pred)
X_pred.head()

Unnamed: 0,cap-shape_b,cap-shape_f,cap-shape_x,cap-surface_f,cap-surface_s,cap-surface_y,cap-color_e,cap-color_g,cap-color_n,cap-color_p,...,spore-print-color_k,spore-print-color_n,population_a,population_n,population_v,population_y,habitat_d,habitat_g,habitat_m,habitat_p
143,0,1,0,0,0,1,0,0,1,0,...,1,0,0,0,0,1,0,0,0,1
2240,0,0,1,0,0,1,1,0,0,0,...,0,1,0,0,0,1,1,0,0,0
2768,0,0,1,0,0,1,0,1,0,0,...,1,0,0,0,0,1,1,0,0,0
3594,0,1,0,1,0,0,0,1,0,0,...,0,0,0,0,0,1,0,1,0,0
1575,0,1,0,0,1,0,0,0,0,0,...,1,0,1,0,0,0,0,1,0,0


In [146]:
for feature in features_required:
    if feature not in X_pred.columns:
        values = [0 for i in range(10)]
        X_pred[feature] = values
X_pred.head()

Unnamed: 0,cap-shape_b,cap-shape_f,cap-shape_x,cap-surface_f,cap-surface_s,cap-surface_y,cap-color_e,cap-color_g,cap-color_n,cap-color_p,...,spore-print-color_o,spore-print-color_r,spore-print-color_u,spore-print-color_w,spore-print-color_y,population_c,population_s,habitat_l,habitat_u,habitat_w
143,0,1,0,0,0,1,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2240,0,0,1,0,0,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2768,0,0,1,0,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
3594,0,1,0,1,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
1575,0,1,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [147]:
X_pred_tensor = torch.from_numpy(X_pred.values).float()
actual_classes_tensor = torch.from_numpy(np.squeeze(actual_classes.values)).long()
Y_pred = model(X_pred_tensor)
Y_pred

tensor([[-1.4095e-02, -4.2689e+00],
        [-1.2616e-03, -6.6760e+00],
        [-2.4754e-03, -6.0026e+00],
        [-5.8402e+00, -2.9126e-03],
        [-8.9308e-01, -5.2661e-01],
        [-3.3999e-01, -1.2440e+00],
        [-7.0597e+00, -8.5937e-04],
        [-6.9141e-06, -1.1881e+01],
        [-4.9853e-04, -7.6041e+00],
        [-4.3657e-04, -7.7368e+00]], grad_fn=<LogSoftmaxBackward>)

In [148]:
_, predicted = Y_pred.max(1)
predicted

tensor([0, 0, 0, 1, 1, 0, 1, 0, 0, 0])

In [150]:
accuracy = predicted.eq(actual_classes_tensor).sum().item() / X_pred_tensor.shape[0]
print("Accuracy: ", accuracy * 100)

Accuracy:  70.0
