In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

In [3]:
data = pd.read_csv("./mushrooms.csv")

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8124 entries, 0 to 8123
Data columns (total 23 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   class                     8124 non-null   object
 1   cap-shape                 8124 non-null   object
 2   cap-surface               8124 non-null   object
 3   cap-color                 8124 non-null   object
 4   bruises                   8124 non-null   object
 5   odor                      8124 non-null   object
 6   gill-attachment           8124 non-null   object
 7   gill-spacing              8124 non-null   object
 8   gill-size                 8124 non-null   object
 9   gill-color                8124 non-null   object
 10  stalk-shape               8124 non-null   object
 11  stalk-root                8124 non-null   object
 12  stalk-surface-above-ring  8124 non-null   object
 13  stalk-surface-below-ring  8124 non-null   object
 14  stalk-color-above-ring  

In [5]:
data.isnull().sum()

class                       0
cap-shape                   0
cap-surface                 0
cap-color                   0
bruises                     0
odor                        0
gill-attachment             0
gill-spacing                0
gill-size                   0
gill-color                  0
stalk-shape                 0
stalk-root                  0
stalk-surface-above-ring    0
stalk-surface-below-ring    0
stalk-color-above-ring      0
stalk-color-below-ring      0
veil-type                   0
veil-color                  0
ring-number                 0
ring-type                   0
spore-print-color           0
population                  0
habitat                     0
dtype: int64

In [7]:
data.head()

Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,p,x,s,n,t,p,f,c,n,k,...,s,w,w,p,w,o,p,k,s,u
1,e,x,s,y,t,a,f,c,b,k,...,s,w,w,p,w,o,p,n,n,g
2,e,b,s,w,t,l,f,c,b,n,...,s,w,w,p,w,o,p,n,n,m
3,p,x,y,w,t,p,f,c,n,n,...,s,w,w,p,w,o,p,k,s,u
4,e,x,s,g,f,n,f,w,b,k,...,s,w,w,p,w,o,e,n,a,g


In [8]:
data.nunique()

class                        2
cap-shape                    6
cap-surface                  4
cap-color                   10
bruises                      2
odor                         9
gill-attachment              2
gill-spacing                 2
gill-size                    2
gill-color                  12
stalk-shape                  2
stalk-root                   5
stalk-surface-above-ring     4
stalk-surface-below-ring     4
stalk-color-above-ring       9
stalk-color-below-ring       9
veil-type                    1
veil-color                   4
ring-number                  3
ring-type                    5
spore-print-color            9
population                   6
habitat                      7
dtype: int64

In [11]:
encoded_data = pd.get_dummies(data, drop_first=True)

In [14]:
TRAIN_SIZE = int(np.round(encoded_data.shape[0]*0.8))
TRAIN_SIZE

6499

In [16]:
train = encoded_data[:][:TRAIN_SIZE]
test = encoded_data[:][TRAIN_SIZE:]
train.shape, test.shape

((6499, 96), (1625, 96))

In [19]:
X_train = train.drop("class_p", axis=1)
X_test = test.drop("class_p", axis=1)
y_train = train["class_p"]
y_test = test["class_p"]
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((6499, 95), (1625, 95), (6499,), (1625,))

In [20]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

In [21]:
class Layer:
    def __init__(self):
        self.input = None
        self.output = None

    # computes the output Y of a layer for a given input X
    
    def forward_propagation(self, input):
        raise NotImplementedError

    # computes dE/dX for a given dE/dY (and update parameters if any)
    def backward_propagation(self, output_error, learning_rate):
        raise NotImplementedError

In [22]:
class FCLayer(Layer):
    # input_size = number of input neurons
    # output_size = number of edges that connects to neurons in next layer
    def __init__(self, input_size, output_size):
        self.weights = np.random.rand(input_size, output_size) - 0.5
        self.bias = np.random.rand(1, output_size) - 0.5

    # returns output for a given input
    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = np.dot(self.input, self.weights) + self.bias
        return self.output

    # computes dE/dW, dE/dB for a given output_error=dE/dY. Returns input_error=dE/dX.
    def backward_propagation(self, output_error, learning_rate):
        input_error = np.dot(output_error, self.weights.T)
        weights_error = np.dot(self.input.T, output_error)
        # dBias = output_error

        # update parameters
        self.weights -= learning_rate * weights_error
        self.bias -= learning_rate * output_error
        return input_error

In [23]:
class ActivationLayer(Layer):
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime

    # returns the activated input
    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = self.activation(self.input)
        return self.output

    # Returns input_error=dE/dX for a given output_error=dE/dY.
    # learning_rate is not used because there is no "learnable" parameters.
    def backward_propagation(self, output_error, learning_rate):
        return self.activation_prime(self.input) * output_error

In [24]:
def tanh(x):
    return np.tanh(x);

def tanh_prime(x):
    return 1-np.tanh(x)**2;

In [36]:
def log_loss(y_true, y_pred):
    #print(y_true, y_pred)
    return np.mean(-y_true*np.log(y_pred)-(1-y_true)*np.log(1-y_pred))

def log_loss_prime(y_true, y_pred):
    return ((1 - y_true) / (1 - y_pred) - y_true / y_pred) / np.size(y_true)

In [26]:
def sigmoid(x):
    return 1/(1 + np.exp(-x))

def sigmoid_prime(x):
    f = 1/(1+np.exp(-x))
    return f * (1 - f)

In [27]:
def relu(x):
    return np.maximum(0, x)
def relu_prime(x):
    return np.where(x > 0, 1.0, 0.0)

In [28]:
def mse(y_true, y_pred):
    return np.mean(np.power(y_true-y_pred, 2));

def mse_prime(y_true, y_pred):
    return 2*(y_pred-y_true)/y_true.size;

In [29]:
def softmax(vector):
    e = np.exp(vector)
    return e / e.sum()

In [30]:
class Network:
    def __init__(self):
        self.layers = []
        self.loss = None
        self.loss_prime = None

    # add layer to network
    def add(self, layer):
        self.layers.append(layer)

    # set loss to use
    def use(self, loss, loss_prime):
        self.loss = loss
        self.loss_prime = loss_prime

        
    # predict output for given input
    def predict(self, input_data):
        # sample dimension first
        samples = len(input_data)
        result = []

        # run network over all samples
        for i in range(samples):
            # forward propagation
            output = input_data[i]
            for layer in self.layers:
                output = layer.forward_propagation(output)
            result.append(output)

        return result

    # train the network 
    
    def fit(self, x_train, y_train, epochs, learning_rate):
        '''
        Fit function does the training. 
        Training data is passed 1-by-1 through the network layers during forward propagation.
        Loss (error) is calculated for each input and back propagation is performed via partial 
        derivatives on each layer.
        '''
        # sample dimension first
        samples = len(x_train)

        # training loop
        for i in range(epochs):
            err = 0
            for j in range(samples):
                # forward propagation
                output = x_train[j]
                for layer in self.layers:
                    output = layer.forward_propagation(output)

                # compute loss (for display purpose only)
                err += self.loss(y_train[j], output)

                # backward propagation
                error = self.loss_prime(y_train[j], output)
                for layer in reversed(self.layers):
                    error = layer.backward_propagation(error, learning_rate)

            # calculate average error on all samples
            err /= samples
            print('epoch %d/%d   error=%f' % (i+1, epochs, err))

In [31]:
X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))
#y_train = y_train.values.reshape((y_train.shape[0], 1, y_train.shape[1]))
#y_test = y_test.values.reshape((y_test.shape[0], 1, y_test.shape[1]))

In [38]:
net = Network()
net.add(FCLayer(95, 16))
net.add(ActivationLayer(sigmoid, sigmoid_prime))
net.add(FCLayer(16, 1))
net.add(ActivationLayer(sigmoid, sigmoid_prime))

#print(X_train.shape)
# train
net.use(log_loss, log_loss_prime)
net.fit(X_train, y_train, epochs=100, learning_rate=0.01)

# test
out = net.predict(X_test)
print(out)

epoch 1/100   error=26.481628
epoch 2/100   error=4.268107
epoch 3/100   error=2.012996
epoch 4/100   error=1.307882
epoch 5/100   error=0.899384
epoch 6/100   error=0.718305
epoch 7/100   error=0.607254
epoch 8/100   error=0.532815
epoch 9/100   error=0.479109
epoch 10/100   error=0.438691
epoch 11/100   error=0.407059
epoch 12/100   error=0.381584
epoch 13/100   error=0.360479
epoch 14/100   error=0.338822
epoch 15/100   error=0.250653
epoch 16/100   error=0.237949
epoch 17/100   error=0.225483
epoch 18/100   error=0.211685
epoch 19/100   error=0.200085
epoch 20/100   error=0.189167
epoch 21/100   error=0.180116
epoch 22/100   error=0.172355
epoch 23/100   error=0.165542
epoch 24/100   error=0.159455
epoch 25/100   error=0.153940
epoch 26/100   error=0.148879
epoch 27/100   error=0.144166
epoch 28/100   error=0.139677
epoch 29/100   error=0.135215
epoch 30/100   error=0.130504
epoch 31/100   error=0.125713
epoch 32/100   error=0.121770
epoch 33/100   error=0.118750
epoch 34/100   err

In [50]:
y_pred = np.round(out).reshape((1625,)).astype(int)

In [40]:
y_test

6499    1
6500    1
6501    1
6502    1
6503    1
       ..
8119    0
8120    0
8121    0
8122    1
8123    0
Name: class_p, Length: 1625, dtype: uint8

In [58]:
accuracy = (y_test == y_pred).sum() / len(y_test)
print(f"Accuracy of the model: {accuracy}")

Accuracy of the model: 0.9710769230769231


In [65]:
false_negative = y_pred[y_test==0].sum()
true_negative = (y_test==0).sum()-false_negative
false_negative, true_negative

(0, 507)

In [74]:
true_positive = (y_pred[y_test==1]==1).sum()
false_positive = (y_test==1).sum()-true_positive
true_positive, false_positive

(1071, 47)

In [78]:
precision = true_positive/(true_positive+false_positive)
recall = true_positive/(true_positive+false_negative)
f1_score = 2*precision*recall/(precision+recall)

In [79]:
results = {
    "accuracy": accuracy,
    "precision": precision,
    "recall": recall,
    "f1_score": f1_score,
}
print(results)

{'accuracy': 0.9710769230769231, 'precision': 0.9579606440071556, 'recall': 1.0, 'f1_score': 0.9785290086797626}
