# Mushroom Classifier 

In [1]:
import pandas as pd
from math import e
import numpy as np
import scipy.stats as s
import random
import matplotlib.pyplot as plt
import itertools

In [2]:
data = pd.read_csv('mushrooms.csv')
data

Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,p,x,s,n,t,p,f,c,n,k,...,s,w,w,p,w,o,p,k,s,u
1,e,x,s,y,t,a,f,c,b,k,...,s,w,w,p,w,o,p,n,n,g
2,e,b,s,w,t,l,f,c,b,n,...,s,w,w,p,w,o,p,n,n,m
3,p,x,y,w,t,p,f,c,n,n,...,s,w,w,p,w,o,p,k,s,u
4,e,x,s,g,f,n,f,w,b,k,...,s,w,w,p,w,o,e,n,a,g
5,e,x,y,y,t,a,f,c,b,n,...,s,w,w,p,w,o,p,k,n,g
6,e,b,s,w,t,a,f,c,b,g,...,s,w,w,p,w,o,p,k,n,m
7,e,b,y,w,t,l,f,c,b,n,...,s,w,w,p,w,o,p,n,s,m
8,p,x,y,w,t,p,f,c,n,p,...,s,w,w,p,w,o,p,k,v,g
9,e,b,s,y,t,a,f,c,b,g,...,s,w,w,p,w,o,p,k,s,m


#### Seperating P and E Class 

In [3]:
PData = data[data['class']== 'p']
EData = data[data['class']== 'e']

In [4]:
print(PData.shape,EData.shape)

(3916, 23) (4208, 23)


In [5]:
splitindex = 3046

In [6]:
PTrainData = PData[:splitindex]
ETrainData = EData[:splitindex]

In [19]:
PTestData = PData[splitindex:]
ETestData = EData[splitindex:]
PTestData.shape

(870, 23)

## Write Formula 

In [8]:
AllColumns = data.columns
AllColumns = np.array(AllColumns)
AllColumns

array(['class', 'cap-shape', 'cap-surface', 'cap-color', 'bruises',
       'odor', 'gill-attachment', 'gill-spacing', 'gill-size',
       'gill-color', 'stalk-shape', 'stalk-root',
       'stalk-surface-above-ring', 'stalk-surface-below-ring',
       'stalk-color-above-ring', 'stalk-color-below-ring', 'veil-type',
       'veil-color', 'ring-number', 'ring-type', 'spore-print-color',
       'population', 'habitat'], dtype=object)

In [9]:

def maketypedictP(ColumnName,TypeList):
    d = dict(zip(TypeList,list(map(lambda x: PTrainData[PTrainData[ColumnName]==x].shape[0]/3046,TypeList))))
    
    
    for i in TypeList:
        if d[i] == 0:
            d[i] = 1/(ETrainData.shape[0]+len(d))           
    return d





def maketypedictE(ColumnName,TypeList):
    d = {i : 0 for i in TypeList}
    for i in ETrainData[ColumnName]:
        d[i] += 1
    d = {i : d[i]/3046 for i in TypeList}
    
    
    for i in TypeList:
        if d[i] == 0:
            d[i] = 1/(ETrainData.shape[0]+len(d))
            
    return d
        


In [10]:
MasterListP = {}
for c in AllColumns[1:]:
    MasterListDict = maketypedictP(c,data[c].unique())
    MasterListP[c] = MasterListDict



# MasterListP = dict(map(lambda tx,cx:dict(zip(AllColumns[1:],dict(zip(tx,list(map(lambda x: PTrainData[PTrainData[cx]==x].shape[0]/3000,tx)))))),zip(list(map(lambda x : x.unique , AllColumns[1:])),AllColumns[1:])))
    
MasterListE = {}
for c in AllColumns[1:]:
    MasterListDict = maketypedictE(c,data[c].unique())
    MasterListE[c]=MasterListDict
    
print(MasterListP)
print()
print()
print(MasterListE)

{'cap-shape': {'x': 0.5180564674983585, 'b': 0.015101772816808929, 's': 0.000327653997378768, 'f': 0.44353250164149705, 'k': 0.022652659225213395, 'c': 0.0006565988181221273}, 'cap-surface': {'s': 0.3148391332895601, 'y': 0.4343401181877873, 'f': 0.2495075508864084, 'g': 0.0013131976362442547}, 'cap-color': {'n': 0.20091923834537098, 'y': 0.2179908076165463, 'w': 0.10505581089954039, 'g': 0.26526592252133946, 'e': 0.1421536441234406, 'p': 0.028890347997373604, 'b': 0.03939592908732764, 'u': 0.0003272251308900524, 'c': 0.00032829940906106366, 'r': 0.0003272251308900524}, 'bruises': {'t': 0.20485883125410373, 'f': 0.7951411687458962}, 'odor': {'p': 0.0840446487196323, 'a': 0.0003273322422258593, 'l': 0.0003273322422258593, 'n': 0.036769533814839134, 'f': 0.6165462902166776, 'c': 0.06303348653972422, 'y': 0.10275771503611293, 's': 0.09619172685489166, 'm': 0.0006565988181221273}, 'gill-attachment': {'f': 0.9996717005909389, 'a': 0.00032829940906106366}, 'gill-spacing': {'c': 0.96585686145

#### In ETrainData there is some values which are equal to 0. To smooth that we will do Laplace Smoothing 

In [24]:
PTestData

Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
6756,p,f,s,n,f,f,f,c,n,b,...,k,p,w,p,w,o,e,w,v,l
6757,p,f,y,n,f,f,f,c,n,b,...,k,w,p,p,w,o,e,w,v,d
6758,p,x,y,e,f,s,f,c,n,b,...,s,p,w,p,w,o,e,w,v,d
6759,p,f,y,n,f,s,f,c,n,b,...,s,w,w,p,w,o,e,w,v,l
6760,p,x,s,n,f,y,f,c,n,b,...,k,p,p,p,w,o,e,w,v,d
6761,p,f,y,e,f,f,f,c,n,b,...,s,p,w,p,w,o,e,w,v,d
6764,p,f,s,n,f,s,f,c,n,b,...,s,w,w,p,w,o,e,w,v,l
6765,p,x,s,n,f,s,f,c,n,b,...,s,p,w,p,w,o,e,w,v,p
6766,p,f,y,n,f,s,f,c,n,b,...,s,p,p,p,w,o,e,w,v,l
6767,p,f,s,e,f,s,f,c,n,b,...,s,w,w,p,w,o,e,w,v,l


In [53]:



# print(MasterListP['cap-shape']['x'])
# print(PTestData[AllColumns[1:]].iloc[1])
truepositive = 0
for i in range(len(PTestData)):
    PMushroomwhenP = 1
    EMushroomwhenP = 1
    for j in AllColumns[1:]:
        PMushroomwhenP *= MasterListP[j][PTestData[j].iloc[i]]
        EMushroomwhenP *= MasterListE[j][PTestData[j].iloc[i]]
#         print('PMushroomwhenE ',PMushroomwhenE)
    PMushroomisP = (PMushroomwhenP * 0.5)/(PMushroomwhenP * 0.5 + EMushroomwhenP * 0.5)
    if PMushroomisP > 0.5:
        truepositive += 1
        
print(truepositive," out of ",len(PTestData))
falsenegative = len(PTestData) - truepositive
    

 

870  out of  870


In [54]:



# print(MasterListP['cap-shape']['x'])
# print(PTestData[AllColumns[1:]].iloc[1])
truenegative = 0
for i in range(len(PTestData)):
    PMushroomwhenE = 1
    EMushroomwhenE = 1
    for j in AllColumns[1:]:
        PMushroomwhenE *= MasterListP[j][ETestData[j].iloc[i]]
        EMushroomwhenE *= MasterListE[j][ETestData[j].iloc[i]]
#         print('PMushroomwhenE ',PMushroomwhenE)
    PMushroomisE = (EMushroomwhenE * 0.5)/(EMushroomwhenE * 0.5 + PMushroomwhenE * 0.5)
    print(PMushroomisE)
    if PMushroomisP > 0.5:
        truenegative += 1
        
print(truenegative," out of ",len(ETestData))
    
falsepositive = len(ETestData) - truenegative
 

0.9999999858266506
0.9999999999192176
0.999806757816663
0.999939650865409
0.9999918966589638
0.9999999872940708
0.9999999999628996
0.9998730308894099
0.9999999994836061
0.9999392941648718
0.9999751480220848
0.9999999517353312
0.9999999034147175
0.9999999504305781
0.9998114476488618
0.9999997803350285
0.9999999949302385
0.9999985753754304
0.9999999895178693
0.9999928408878647
0.9999863540123264
0.9999999788445267
0.9999034261015849
0.999982950321358
0.999999778679317
0.9999999998853774
0.9999997117104785
0.9999999849178732
0.9999999949886682
0.9999999693215651
0.9999999558921493
0.9999996639289997
0.9999999999453298
0.9999955725937425
0.9999999996180503
0.9999912272280868
0.9999999898829248
0.9999999331401798
0.9999999998394613
0.9999078223025718
0.9999999931859889
0.9999999408125603
0.9999999071442017
0.9999999143624642
0.9998069009465381
0.9996977450786578
0.9999999993857419
0.999985009479866
0.9999999784649785
0.9999999075225485
0.9997464031838259
0.9999999999638001
0.999999978552724

4.215808305995884e-07
4.386495213830554e-07
7.636607142583223e-07
2.2678028731562105e-07
1.2554490296889512e-10
1.7441149661859374e-08
2.5043647245298013e-09
3.6288518638511766e-07
2.888426915681399e-09
2.636913205178825e-07
4.392265323275437e-09
1.0853833721765568e-08
1.2521824343659845e-08
3.794223935874071e-11
1.065506335749864e-07
7.805615682887729e-07
2.118179739797882e-09
6.363900667778376e-07
2.848102864691503e-07
2.9601362928401543e-09
7.798721296738627e-07
1.9000549085228444e-06
3.0064617603680563e-07
6.818498907910762e-06
2.5591487480962755e-09
4.473069177395962e-09
7.47825650172914e-09
8.856922344052386e-11
8.461010339538804e-11
8.997208649533063e-07
5.160479514164084e-09
4.158236159835656e-06
3.0792081136782073e-09
2.1040211549657783e-10
7.63177437292225e-09
4.2889061750088275e-09
3.325091351522868e-06
6.585321852040287e-09
4.185007313714043e-09
6.080651623748298e-07
4.2784883149175165e-07
7.164031339987345e-07
1.2790176478183635e-08
4.657536686941285e-07
5.143380615966504e

In [55]:
accuracy = (truepositive + truenegative)/(len(PTestData)+len(ETestData))
precision = truepositive/(truepositive + falsepositive)
recall = truepositive/(truepositive + falsenegative)

print(accuracy,precision,recall)

0.8562992125984252 0.7487091222030982 1.0
