In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import nnfs
from nnfs.datasets import spiral_data
import random
import requests
from NNS import NeuralNetwork as NN #import neural net code from github to reduce copy/pasting

from sklearn.neighbors import KNeighborsClassifier

# Chapter 15
## Dropout
Helps correct co-adoption (neurons depending too much on outputs of other neurons and not learning), helps with noise, helps with overfitting

### Forward Pass

Example of one dopout layer

In [31]:
dropout_rate = 0.3 #% of nodes to dropout

example_output = np.random.normal(0,1,10)

example_output *= np.random.binomial(1, 1-dropout_rate, example_output.shape) #Multiply each value by 0 or 1 based off of binomial distribution with probability 1-dropout_rate
example_output /= (1-dropout_rate) #Scale data back up to mimic the mean of the sum of all neurons before neurons were dropped

example_output

array([ 2.36080221,  0.        ,  0.        ,  0.95471935,  0.00474494,
       -0.34674718, -0.26489841,  0.        ,  1.88789292, -2.15683501])

Example showing how scaling mimics the mean of the sum of all neurons

In [32]:
dropout_rate = 0.3
example_output = np.random.normal(0,1,10)

print(f'Sum Initial {sum(example_output)}')

sums = []

for i in range(10000):
    
    example_output_2 = example_output * np.random.binomial(1, 1-dropout_rate, example_output.shape) / (1 - dropout_rate)
    sums.append(sum(example_output_2))
    
print(f'Mean SumL: {np.mean(sums)}')

Sum Initial -7.4606237110977425
Mean SumL: -7.485077497007647


### Backward Pass

The gradient of the dropout layer is 0, if the node was dropped_out or $\frac{1}{1-\text{dropout rate}}$ if the node was not dropped out

In [33]:
class Layer_Dropout:
    
    # Init
    def __init__(self, rate):
        # Store rate, we invert it for use in the binomial distribution
        self.rate = 1 - rate
        
    # Forward Pass
    def forward(self, inputs):
        # Save input values
        self.inputs = inputs
        # Generate and save scaled mask
        self.binary_mask = np.random.binomial(1, self.rate, size = inputs.shape) / self.rate
        # Apply mask to output values
        self.output = inputs * self.binary_mask
        
    # Backward Pass
    def backward(self, dvalues):
        # Gradient on values
        self.dinputs = dvalues * self.binary_mask

In [34]:
X, y = spiral_data(samples=1000, classes=3)

dense1 = NN.Layer_Dense(2, 64, weight_regularizer_l2=5e-4, bias_regularizer_l2=5e-4)
activation1 = NN.Activation_ReLU()
dropout1 = Layer_Dropout(0.1)
dense2 = NN.Layer_Dense(64,3)
loss_activation = NN.Activation_Softmax_Loss_CategoricalCrossentropy()
optimizer = NN.Optimizer_Adam(learning_rate=0.05, decay=5e-5)

for epoch in range(10001):
    
    dense1.forward(X)
    activation1.forward(dense1.output)
    dropout1.forward(activation1.output)
    dense2.forward(dropout1.output)
    
    data_loss = loss_activation.forward(dense2.output, y)
    
    regularization_loss = loss_activation.loss.regularization_loss(dense1) + loss_activation.loss.regularization_loss(dense2)
    
    loss = data_loss + regularization_loss
    
    predictions = np.argmax(loss_activation.output, axis=1)
    if len(y.shape) == 2:
        y = np.argmax(y, axis=1)
    accuracy = np.mean(predictions==y)
    
    if not epoch % 1000:
        print(  f'epoch: {epoch}, ' +
                f'acc: {accuracy :.3f} , ' +
                f'loss: {loss :.3f} (' +
                f'data_loss: {data_loss :.3f} , ' +
                f'reg_loss: {regularization_loss :.3f}), ' +
                f'lr: {optimizer.current_learning_rate}, ' +
                f'validation, acc: {accuracy :.3f} , loss: {loss :.3f} ')
    
    # Backward pass
    loss_activation.backward(loss_activation.output, y)
    dense2.backward(loss_activation.dinputs)
    dropout1.backward(dense2.dinputs)
    activation1.backward(dropout1.dinputs)
    dense1.backward(activation1.dinputs)
    # Update weights and biases
    optimizer.pre_update_params()
    optimizer.update_params(dense1)
    optimizer.update_params(dense2)
    optimizer.post_update_params()

AttributeError: 'Layer_Dense' object has no attribute 'bias_regularizaer_l1'

In [None]:
#  Create a meshgrid for the plot background
x_min, x_max = X[:, 0].min() -0.1 , X[:, 0].max() +0.1
y_min, y_max = X[:, 1].min() -0.1, X[:, 1].max() +0.1
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), np.linspace(y_min, y_max, 100))

# Step 2: Predict the category for each point in the meshgrid based off of y_pred
# Using the nearest neighbors
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(X, predictions)

# Predict categories for the meshgrid points
Z = knn.predict(np.c_[xx.ravel(), yy.ravel()])

# Reshape the predictions back to the grid shape
Z = Z.reshape(xx.shape)

# Create the background color plot
cmap = ListedColormap(['red', 'blue', 'yellow'])  # Assign colors for each category
plt.contourf(xx, yy, Z, cmap=cmap, alpha=0.3)  # Fill the regions with color

# Plot the scatter plot, 
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap, edgecolors='k', s=50, alpha = 0.1)

# Add color bar to indicate values of y_pred
plt.colorbar(label='Category')

# Add labels and title
plt.xlabel('X Value')
plt.ylabel('Y Value')
plt.title('True Spiral with Background Colored by y_pred')

# Show the plot
plt.show()
