In [4]:
## import packages
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd

In [7]:
## read in dataset
mush_full = pd.read_csv("datasets/mushroom_cleaned.csv")
print(mush_full.head())

## last column is target variable so...
X = mush_full.iloc[:, :-1].values
y = mush_full.iloc[:, -1].values

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

   cap-diameter  cap-shape  gill-attachment  gill-color  stem-height  \
0          1372          2                2          10     3.807467   
1          1461          2                2          10     3.807467   
2          1371          2                2          10     3.612496   
3          1261          6                2          10     3.787572   
4          1305          6                2          10     3.711971   

   stem-width  stem-color    season  class  
0        1545          11  1.804273      1  
1        1557          11  1.804273      1  
2        1566          11  1.804273      1  
3        1566          11  1.804273      1  
4        1464          11  0.943195      1  


In [8]:
# Build the neural network model
model = Sequential(
    [
        Dense(64, activation="relu", input_shape=(X_train.shape[1],)),
        Dense(32, activation="relu"),
        Dense(1, activation="sigmoid"),
    ]
)

# Compile the model
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# Train the model
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2)

# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.2f}")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m1081/1081[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 5ms/step - accuracy: 0.6852 - loss: 0.5785 - val_accuracy: 0.8089 - val_loss: 0.4455
Epoch 2/20
[1m1081/1081[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - accuracy: 0.8212 - loss: 0.4148 - val_accuracy: 0.8700 - val_loss: 0.3279
Epoch 3/20
[1m1081/1081[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - accuracy: 0.8720 - loss: 0.3166 - val_accuracy: 0.9015 - val_loss: 0.2697
Epoch 4/20
[1m1081/1081[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - accuracy: 0.8974 - loss: 0.2602 - val_accuracy: 0.9059 - val_loss: 0.2354
Epoch 5/20
[1m1081/1081[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7ms/step - accuracy: 0.9149 - loss: 0.2251 - val_accuracy: 0.9326 - val_loss: 0.1993
Epoch 6/20
[1m1081/1081[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 9ms/step - accuracy: 0.9282 - loss: 0.1963 - val_accuracy: 0.9367 - val_loss: 0.1843
Epoch 7/20
[1

In [9]:
## for illustrative purposes, repeat using kfold crossvalidation
from sklearn.model_selection import KFold


# Define the K-fold Cross Validator
kf = KFold(n_splits=5, shuffle=True, random_state=42)


# Define the model building function
def build_model():
    model = Sequential(
        [
            Dense(64, activation="relu", input_shape=(X.shape[1],)),
            Dense(32, activation="relu"),
            Dense(1, activation="sigmoid"),
        ]
    )
    model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
    return model


# Lists to hold the results of each fold
fold_accuracies = []

# Perform 5-fold cross-validation
for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    model = build_model()
    model.fit(X_train, y_train, epochs=20, batch_size=32, verbose=0)

    # Evaluate the model
    loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
    fold_accuracies.append(accuracy)
    print(f"Fold Accuracy: {accuracy:.2f}")

# Calculate the average accuracy across all folds
average_accuracy = np.mean(fold_accuracies)
print(f"Average Test Accuracy: {average_accuracy:.2f}")

Fold Accuracy: 0.74
Fold Accuracy: 0.71
Fold Accuracy: 0.72
Fold Accuracy: 0.70
Fold Accuracy: 0.64
Average Test Accuracy: 0.70
