In [48]:
# classifies mushrooms to be either poisonous or edible based on other attributes
# data from: https://www.kaggle.com/uciml/mushroom-classification

# import required libraries
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import math
import pandas as pd

In [49]:
# import data
data = pd.read_csv('./mushrooms.csv')

In [50]:
# separate into poisonous and edible 
data_poison = data.loc[data['class'] == 'p']
data_edible = data.loc[data['class'] == 'e']

# get rid of class column (which tells if it is edible or poisonous)
data_poison.drop('class', inplace=True, axis=1)
data_edible.drop('class', inplace=True, axis=1)

In [51]:
# convert to numpy
data_npy_poison = data_poison.to_numpy()
data_npy_edible = data_edible.to_numpy()

In [52]:
print(data_npy_poison)

[['x' 's' 'n' ... 'k' 's' 'u']
 ['x' 'y' 'w' ... 'k' 's' 'u']
 ['x' 'y' 'w' ... 'k' 'v' 'g']
 ...
 ['k' 's' 'e' ... 'w' 'v' 'd']
 ['k' 'y' 'n' ... 'w' 'v' 'd']
 ['k' 'y' 'n' ... 'w' 'v' 'l']]


In [53]:
# randomizing the data
np.random.shuffle(data_npy_poison)
np.random.shuffle(data_npy_edible)

# splitting into test and train
train_percent = 0.8

poison_cutoff = int(math.floor(len(data_npy_poison) * train_percent))
poison_trainX = data_npy_poison[:poison_cutoff]
poison_trainY = np.tile([1,0,0,0], (len(poison_trainX),1))
poison_testX = data_npy_poison[poison_cutoff:]
poison_testY = np.tile([1,0,0,0], (len(poison_testX),1))

edible_cutoff = int(math.floor(len(data_npy_edible) * train_percent))
edible_trainX = data_npy_edible[:edible_cutoff]
edible_trainY = np.tile([0,1,0,0], (len(edible_trainX),1))
edible_testX = data_npy_edible[edible_cutoff:]
edible_testY = np.tile([0,1,0,0], (len(edible_testX),1))

# building the training and testing arrays

X_train = np.concatenate((poison_trainX, edible_trainX))

Y_train = np.concatenate((poison_trainY, edible_trainY))

X_test = np.concatenate((poison_testX, edible_testX))
Y_test = np.concatenate((poison_testY, edible_testY))

print(X_train)

[['f' 'y' 'y' ... 'h' 'v' 'g']
 ['x' 'y' 'n' ... 'w' 'v' 'p']
 ['k' 'y' 'e' ... 'w' 'v' 'p']
 ...
 ['x' 'y' 'r' ... 'h' 'v' 'd']
 ['x' 'f' 'n' ... 'k' 'a' 'g']
 ['x' 'f' 'n' ... 'n' 'v' 'd']]


In [54]:
# convert the X data to floats and normalize the X data
floaterize = lambda t: float(ord(t)) / 121
vfunc = np.vectorize(floaterize) 

X_train = vfunc(X_train)
X_test = vfunc(X_test)

In [55]:
print(X_train)

[[0.84297521 1.         1.         ... 0.85950413 0.97520661 0.85123967]
 [0.99173554 1.         0.90909091 ... 0.98347107 0.97520661 0.92561983]
 [0.88429752 1.         0.83471074 ... 0.98347107 0.97520661 0.92561983]
 ...
 [0.99173554 1.         0.94214876 ... 0.85950413 0.97520661 0.82644628]
 [0.99173554 0.84297521 0.90909091 ... 0.88429752 0.80165289 0.85123967]
 [0.99173554 0.84297521 0.90909091 ... 0.90909091 0.97520661 0.82644628]]


In [56]:
# building out the model
model = keras.models.Sequential()

# adding the first hidden layer
model.add(layers.Dense(512, input_shape=(22,)))
model.add(layers.Activation('relu'))
model.add(layers.Dropout(0.2))

# adding the second hidden layer
model.add(layers.Dense(512))
model.add(layers.Activation('relu'))
model.add(layers.Dropout(0.2))

# adding the output layer (four outputs)
model.add(layers.Dense(4))
model.add(layers.Activation('softmax'))

In [57]:
# compiling the model
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

In [58]:
# training the model
history = model.fit(X_train, Y_train,
          batch_size=128, epochs=20,
          verbose=2,
          validation_data=(X_test, Y_test))

Train on 6498 samples, validate on 1626 samples
Epoch 1/20
6498/6498 - 0s - loss: 0.7068 - accuracy: 0.5620 - val_loss: 0.6217 - val_accuracy: 0.6661
Epoch 2/20
6498/6498 - 0s - loss: 0.5912 - accuracy: 0.6931 - val_loss: 0.5096 - val_accuracy: 0.7355
Epoch 3/20
6498/6498 - 0s - loss: 0.5119 - accuracy: 0.7562 - val_loss: 0.4325 - val_accuracy: 0.8284
Epoch 4/20
6498/6498 - 0s - loss: 0.4256 - accuracy: 0.8216 - val_loss: 0.3979 - val_accuracy: 0.8438
Epoch 5/20
6498/6498 - 0s - loss: 0.3788 - accuracy: 0.8473 - val_loss: 0.3241 - val_accuracy: 0.8899
Epoch 6/20
6498/6498 - 0s - loss: 0.3293 - accuracy: 0.8766 - val_loss: 0.2823 - val_accuracy: 0.8930
Epoch 7/20
6498/6498 - 0s - loss: 0.2985 - accuracy: 0.8914 - val_loss: 0.2942 - val_accuracy: 0.8856
Epoch 8/20
6498/6498 - 0s - loss: 0.2878 - accuracy: 0.8926 - val_loss: 0.2664 - val_accuracy: 0.9164
Epoch 9/20
6498/6498 - 0s - loss: 0.2943 - accuracy: 0.8917 - val_loss: 0.2507 - val_accuracy: 0.9231
Epoch 10/20
6498/6498 - 0s - loss: