<a href="https://colab.research.google.com/github/V1PASH/PYTORCH/blob/main/mushroom_dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Mushroom dataset to see if mushroom is poisnoious or not

This dataset is a cleaned version of the original Mushroom Dataset for Binary Classification Available at UCI Library. This dataset was cleaned using various techniques such as Modal imputation, one-hot encoding, z-score normalization, and feature selection. It contains 9 columns:

* Cap Diameter
* Cap Shape
* Gill Attachment
* Gill Color
* Stem Height
* Stem Width
* Stem Color
* Season


Target Class - Is it edible or not?
The Target Class contains two values - 0 or 1 - where 0 refers to edible and 1 refers to poisonous.

## importing required libraries and data sets

In [None]:
import torch
from torch  import nn
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import requests

from pathlib import Path

if Path("helper_function.py").is_file():
  print("have")
else:
  print("download")
  request=requests.get("https://raw.githubusercontent.com/mrdbourke/pytorch-deep-learning/main/helper_functions.py")
  with open("helper_functions.py","wb")as f:
    f.write(request.content)

download


###### jumbling the dataset

In [None]:
mushroom=pd.read_csv('/content/drive/MyDrive/Data_Science_Practice/mushroom_cleaned.csv')
mushroom=mushroom.sample(n=len(mushroom))

In [None]:
mushroom.shape

(54035, 9)

In [None]:
mushroom.columns

Index(['cap-diameter', 'cap-shape', 'gill-attachment', 'gill-color',
       'stem-height', 'stem-width', 'stem-color', 'season', 'class'],
      dtype='object')

In [None]:
x=mushroom[['cap-diameter', 'cap-shape', 'gill-attachment', 'gill-color','stem-height', 'stem-width', 'stem-color', 'season']]
y=mushroom[['class']]

In [None]:
y.head()

Unnamed: 0,class
50512,1
10007,1
25408,1
20280,0
41635,0


In [None]:
mushroom

Unnamed: 0,cap-diameter,cap-shape,gill-attachment,gill-color,stem-height,stem-width,stem-color,season,class
50512,520,3,0,10,2.013794,0,2,0.943195,1
10007,212,6,0,10,0.449200,179,6,0.888450,1
25408,612,2,6,11,0.063238,984,11,0.943195,1
20280,336,6,1,6,0.095921,630,7,1.804273,0
41635,486,2,6,3,0.457158,400,11,0.943195,0
...,...,...,...,...,...,...,...,...,...
2828,1493,6,2,10,1.575254,1742,11,0.888450,0
1179,645,2,0,10,1.794098,1293,11,0.888450,1
46900,984,6,4,11,0.739666,1867,6,0.888450,0
21234,672,6,6,11,0.898826,1632,11,0.943195,0


##### converting to tensor

In [None]:
x=torch.tensor(x.values)
y=torch.tensor(y.values)

In [None]:
y.shape,x.shape

(torch.Size([54035, 1]), torch.Size([54035, 8]))

## train test split

In [None]:
from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

In [None]:
x_train.shape,x_test.shape,y_train.shape,y_test.shape

(torch.Size([43228, 8]),
 torch.Size([10807, 8]),
 torch.Size([43228, 1]),
 torch.Size([10807, 1]))

## Device agnostic code

In [None]:
device='cuda'if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [None]:
def accuracy_fn(y_true,y_pred):
  correct=torch.eq(y_true,y_pred).sum().item()
  acc =(correct/len(y_pred))*100
  return acc

## builing a model

In [None]:
from torch import nn

In [None]:

class MushroomModel(nn.Module):
  def __init__(self,input,output,hidden=16):
    super().__init__()
    self.model_layers=nn.Sequential(
        nn.Linear(in_features=input,out_features=hidden),
        nn.ReLU(),
        nn.Linear(in_features=hidden,out_features=hidden),
        nn.ReLU(),
        nn.Linear(in_features=hidden,out_features=hidden),
        nn.ReLU(),
        nn.Linear(in_features=hidden,out_features=hidden),
        nn.ReLU(),
        nn.Linear(in_features=hidden,out_features=hidden),
        nn.ReLU(),
        nn.Linear(in_features=hidden,out_features=hidden),
        nn.ReLU(),
        nn.Linear(in_features=hidden,out_features=output),
    )
  def forward(self,x):
    return self.model_layers(x)

In [None]:
model0=MushroomModel(input=8,output=1)

In [None]:
model0.to(device)

MushroomModel(
  (model_layers): Sequential(
    (0): Linear(in_features=8, out_features=16, bias=True)
    (1): ReLU()
    (2): Linear(in_features=16, out_features=16, bias=True)
    (3): ReLU()
    (4): Linear(in_features=16, out_features=16, bias=True)
    (5): ReLU()
    (6): Linear(in_features=16, out_features=16, bias=True)
    (7): ReLU()
    (8): Linear(in_features=16, out_features=16, bias=True)
    (9): ReLU()
    (10): Linear(in_features=16, out_features=16, bias=True)
    (11): ReLU()
    (12): Linear(in_features=16, out_features=1, bias=True)
  )
)

In [None]:
x_train,y_train,x_test,y_test=x_train.to(device),y_train.to(device),x_test.to(device),y_test.to(device)

## outputs wihout training

In [None]:
x_train,y_train,x_test,y_test=x_train.type(torch.double),y_train.type(torch.double),x_test.type(torch.double),y_test.type(torch.double)


In [None]:
model0.eval()
model0.type(torch.double)
with torch.inference_mode():
  logits=model0(x_test.to(device)).type(torch.double)

In [None]:
logits.to

<function Tensor.to>

In [None]:
pred=torch.sigmoid(logits).round()

In [None]:
acc=accuracy_fn(y_true=y_test,y_pred=pred)

In [None]:
print(pred==y_test)

tensor([[ True],
        [False],
        [ True],
        ...,
        [ True],
        [False],
        [ True]], device='cuda:0')


In [None]:
acc

45.09114462848154

## optimizer and loss function

In [None]:
loss_fn=torch.nn.BCEWithLogitsLoss()
optimizer=torch.optim.Adam(params=model0.parameters(),lr=0.00003)

## train test loop

In [None]:
from pathlib import Path

model_path=Path("Mushroom_Model")
model_save_path="/content/drive/MyDrive/Machine Learning/Mushroom_model_trained.pth"
model0.load_state_dict(torch.load(f=model_save_path, map_location=torch.device('cpu')))

  model0.load_state_dict(torch.load(f=model_save_path, map_location=torch.device('cpu')))


<All keys matched successfully>

In [None]:
torch.manual_seed(42)
torch.cuda.manual_seed(42)

epochs=51

for epoch in range(epochs):
  model0.train()

  logits=model0(x_train.type(torch.float64))
  pred=torch.round(torch.sigmoid(logits))
  loss=loss_fn(logits,y_train.type(torch.float64))
  acc=accuracy_fn(y_true=y_train,y_pred=pred)

  optimizer.zero_grad()

  loss.backward()

  optimizer.step()

  model0.eval()
  with torch.inference_mode():
    test_logits=model0(x_test.type(torch.double))
    test_preds=torch.round(torch.sigmoid(test_logits))
    test_acc=accuracy_fn(y_true=y_test,y_pred=test_preds)

    test_loss=loss_fn(test_logits,y_test.type(torch.double))

  if epoch%10==0:
    print(f"epoch {epoch} || train -- loss {loss : .4f}  acc {acc : .3f} || test -- loss {test_loss : .4f}  acc {test_acc : .3f}" )

epoch 0 || train -- loss  0.0448  acc  98.873 || test -- loss  0.0410  acc  98.621
epoch 10 || train -- loss  0.0406  acc  98.823 || test -- loss  0.0389  acc  98.834
epoch 20 || train -- loss  0.0389  acc  98.853 || test -- loss  0.0387  acc  98.834
epoch 30 || train -- loss  0.0379  acc  98.876 || test -- loss  0.0389  acc  98.806
epoch 40 || train -- loss  0.0369  acc  98.869 || test -- loss  0.0395  acc  98.760
epoch 50 || train -- loss  0.0360  acc  98.871 || test -- loss  0.0405  acc  98.723


In [None]:
model0.eval()

with torch.inference_mode():
  logits=model0(x_test.type(torch.float64).to(device))
  pred=torch.round(torch.sigmoid(logits))
accuracy_fn(y_true=y_test,y_pred=pred)


98.72304987508097

In [None]:
from pathlib import Path

model_path=Path("Mushroom_Model")

model_path.mkdir(parents=True,exist_ok=True)

model_name="Mushroom_model_trained.pth"

model_save_path="/content/drive/MyDrive/Machine Learning/Mushroom_model_trained.pth"

torch.save(obj=model0.state_dict(),f=model_path/model_name)

torch.save(obj=model0.state_dict(),f=model_save_path)

In [None]:
model_save_path=model_path/"Mushroom_model_trained.pth"
loaded_model=MushroomModel(input=8,output=1).to(device).type(torch.double)
loaded_model.load_state_dict(torch.load(f=model_save_path))

  loaded_model.load_state_dict(torch.load(f=model_save_path))


<All keys matched successfully>

In [None]:
loaded_model.eval()

with torch.inference_mode():
  logits=loaded_model(x_test.type(torch.float64).to(device))
  pred=torch.round(torch.sigmoid(logits))
accuracy_fn(y_true=y_test,y_pred=pred)

98.72304987508097

In [None]:
#sns.pairplot(mushroom, hue="class")