In [1]:
# import dependencies
from path import Path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
import pandas as pd
import tensorflow as tf
#from sklearn.decomposition import PCA
#from sklearn.cluster import KMeans

In [2]:
# import cleaned data (csv)
file_path = "Resources/mushrooms_df_clean.csv"
mushroom_df = pd.read_csv(file_path, index_col='Unnamed: 0')
mushroom_df.head()

Unnamed: 0,Poisonous or Edible,Cap-Shape,Cap-Surface,Cap-Color,Bruises,Odor,Gill-attachment,gill-spacing,Gill-size,Gill-color,...,stalk-surface-below-ring,stalk-color-above-ring,stalk color below ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,p,x,s,n,t,p,f,c,n,k,...,s,w,w,p,w,o,p,k,s,u
1,e,x,f,n,f,n,f,w,b,n,...,f,w,w,p,w,o,e,k,a,g
2,e,x,s,w,f,n,f,w,b,n,...,f,w,w,p,w,o,e,k,s,g
3,e,x,f,g,f,n,f,w,b,k,...,f,w,w,p,w,o,e,k,s,g
4,e,f,f,w,f,n,f,w,b,k,...,f,w,w,p,w,o,e,n,a,g


In [3]:
# Review data info
mushroom_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5644 entries, 0 to 8123
Data columns (total 23 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   Poisonous or Edible       5644 non-null   object
 1   Cap-Shape                 5644 non-null   object
 2   Cap-Surface               5644 non-null   object
 3   Cap-Color                 5644 non-null   object
 4   Bruises                   5644 non-null   object
 5   Odor                      5644 non-null   object
 6   Gill-attachment           5644 non-null   object
 7   gill-spacing              5644 non-null   object
 8   Gill-size                 5644 non-null   object
 9   Gill-color                5644 non-null   object
 10  stalk-shape               5644 non-null   object
 11  stalk-root                5644 non-null   object
 12  stalk-surface-above-ring  5644 non-null   object
 13  stalk-surface-below-ring  5644 non-null   object
 14  stalk-color-above-ring  

In [4]:
# setup data for OneHotEncoder

# Generate our categorical variable lists
app_cat = mushroom_df.dtypes[mushroom_df.dtypes == "object"].index.tolist()
mushroom_df[app_cat].nunique()

Poisonous or Edible         2
Cap-Shape                   6
Cap-Surface                 4
Cap-Color                   8
Bruises                     2
Odor                        7
Gill-attachment             2
gill-spacing                2
Gill-size                   2
Gill-color                  9
stalk-shape                 2
stalk-root                  4
stalk-surface-above-ring    4
stalk-surface-below-ring    4
stalk-color-above-ring      7
stalk color below ring      7
veil-type                   1
veil-color                  2
ring-number                 3
ring-type                   4
spore-print-color           6
population                  6
habitat                     6
dtype: int64

In [5]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encoded_df = pd.DataFrame(enc.fit_transform(mushroom_df[app_cat]))

# Add the encoded variable names to the dataframe
encoded_df.columns = enc.get_feature_names(app_cat)
encoded_df.head()

Unnamed: 0,Poisonous or Edible_e,Poisonous or Edible_p,Cap-Shape_b,Cap-Shape_c,Cap-Shape_f,Cap-Shape_k,Cap-Shape_s,Cap-Shape_x,Cap-Surface_f,Cap-Surface_g,...,population_n,population_s,population_v,population_y,habitat_d,habitat_g,habitat_l,habitat_m,habitat_p,habitat_u
0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [6]:
# Merge one-hot encoded features and drop the originals
mushroom_df = mushroom_df.merge(encoded_df,left_index=True, right_index=True)
mushroom_df = mushroom_df.drop(app_cat, axis=1)
mushroom_df.head()

Unnamed: 0,Poisonous or Edible_e,Poisonous or Edible_p,Cap-Shape_b,Cap-Shape_c,Cap-Shape_f,Cap-Shape_k,Cap-Shape_s,Cap-Shape_x,Cap-Surface_f,Cap-Surface_g,...,population_n,population_s,population_v,population_y,habitat_d,habitat_g,habitat_l,habitat_m,habitat_p,habitat_u
0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [7]:
# Remove class target from features data
y = encoded_df["Poisonous or Edible_e"].values
X = encoded_df.drop(columns=["Poisonous or Edible_e","Poisonous or Edible_p"]).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=29)

In [8]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [9]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train[0])
hidden_nodes_layer1 =  len(X_train[0])
hidden_nodes_layer2 = len(X_train[0])

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu"))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 98)                9702      
                                                                 
 dense_1 (Dense)             (None, 98)                9702      
                                                                 
 dense_2 (Dense)             (None, 1)                 99        
                                                                 
Total params: 19,503
Trainable params: 19,503
Non-trainable params: 0
_________________________________________________________________


In [10]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [11]:
# Create Callback
from keras.callbacks import ModelCheckpoint

filepath = 'Resources/mushrooms.h5'
checkpoint = ModelCheckpoint(filepath=filepath, 
                             monitor='val_loss',
                             verbose=2, 
                             #save_best_only=True,
                             mode='auto',
                             save_freq='epoch',
                             period=5)
callbacks = [checkpoint]

# Train the model
fit_model = nn.fit(X_train_scaled,y_train, epochs=100, validation_split=0.02, callbacks=callbacks)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 5: saving model to Resources\mushrooms.h5
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 10: saving model to Resources\mushrooms.h5
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 15: saving model to Resources\mushrooms.h5
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 20: saving model to Resources\mushrooms.h5
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 25: saving model to Resources\mushrooms.h5
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 30: saving model to Resources\mushrooms.h5
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 35: saving model to Resources\mushrooms.h5
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 40: saving model to Resources\mushrooms.h5
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 45: saving model to Resourc

Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 50: saving model to Resources\mushrooms.h5
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 55: saving model to Resources\mushrooms.h5
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 60: saving model to Resources\mushrooms.h5
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 65: saving model to Resources\mushrooms.h5
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 70: saving model to Resources\mushrooms.h5
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 75: saving model to Resources\mushrooms.h5
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 80: saving model to Resources\mushrooms.h5
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 85: saving model to Resources\mushrooms.h5
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 90: saving model to Resources\

Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 95: saving model to Resources\mushrooms.h5
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Epoch 100: saving model to Resources\mushrooms.h5


In [12]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

45/45 - 0s - loss: 2.6227e-08 - accuracy: 1.0000 - 57ms/epoch - 1ms/step
Loss: 2.6227230165432047e-08, Accuracy: 1.0
