## Laboratoria 1 - perceptron, neuron sigmoidalny

In [35]:
import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

### Dane

In [36]:
AND_inputs = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
AND_labels = np.array([0, 0, 0, 1])
OR_inputs = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
OR_labels = np.array([0, 1, 1, 1])

### Funkcje aktywacji

In [37]:
def heaviside(x, threshold=0):
    return (x > threshold).astype(float)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def relu(x):
    return np.maximum(0, x)    

def leaky_relu(x, alpha=0.01):
    return np.maximum(alpha * x, x)

def tanh(x):
    return np.tanh(x)

### Perceptron

In [38]:
class Perceptron:
    """
    Perceptron model for binary classification.
    """
    
    def __init__(self, input_size, learning_rate=0.1, seed=0, random_weights=False, random_bias=False, random_distribution='uniform', activation_function=heaviside):
        """
        :param input_size: number of input features
        :param learning_rate: learning rate hyperparameter (default: 0.1)
        :param seed: random seed for reproducibility (default: 0)
        :param random_weights: if True, initializes weights with random values (default: False)
        :param random_bias: if True, initializes bias with random values (default: False)
        :param random_distribution: specify random distribution for weights and bias - 'normal' or 'uniform' (default: 'uniform')
        :param activation_function: activation function (default: heaviside)
        Perceptron initialization method.
        """
        
        # seed for reproducibility
        self.rng = np.random.RandomState(seed=seed)
        
        # weights
        if random_weights:
            if random_distribution == 'uniform':
                self.weights = self.rng.uniform(low=-0.1, high=0.1, size=input_size)
            elif random_distribution == 'normal':
                self.weights = self.rng.normal(loc=0, scale=1, size=input_size)
        else:
            self.weights = np.zeros(shape=input_size)
        
        # bias
        if random_bias:
            if random_distribution == 'uniform':
                self.bias = self.rng.uniform(low=-0.1, high=0.1, size=1)
            elif random_distribution == 'normal':
                self.bias = self.rng.normal(loc=0, scale=1, size=1)
        else:
            self.bias = np.zeros(shape=1)
            
        # activation function
        self.activation_function = activation_function
        
        # hyperparameters
        self.learning_rate = learning_rate
        
    def predict(self, inputs):
        """
        :param inputs: inputs to predict (numpy array)
        :return: numpy array of predictions (0 or 1)
        """
        return self.activation_function(np.matmul(inputs, self.weights.T) + self.bias)
    
    def train(self, training_inputs, training_labels, epochs=1):
        """
        :param training_inputs: training data (numpy array)
        :param training_labels: training labels (numpy array)
        :param epochs: number of epochs (default: 1)
        Trains the perceptron model using the training data.
        """
        
        # training loop
        for epoch in range(epochs):
            for inputs, label in zip(training_inputs, training_labels):
                y_pred = self.predict(inputs)
                error = label - y_pred
                self.weights += self.learning_rate * error * inputs
                self.bias += self.learning_rate * error

In [39]:
# AND
model_and = Perceptron(2, learning_rate=0.1)
model_and.train(AND_inputs, AND_labels, epochs=5)

print(f"Wagi: {model_and.weights}")
print(f"Bias: {model_and.bias}")
print(f"Predykcje: {model_and.predict(AND_inputs)}")
print(f"Wartości oczekiwane: {AND_labels}\n")

# OR
model_or = Perceptron(2, learning_rate=0.1)
model_or.train(OR_inputs, OR_labels, epochs=5)

print(f"Wagi: {model_or.weights}")
print(f"Bias: {model_or.bias}")
print(f"Predykcje: {model_or.predict(OR_inputs)}")
print(f"Wartości oczekiwane: {OR_labels}\n")

Wagi: [0.2 0.1]
Bias: [-0.2]
Predykcje: [0. 0. 0. 1.]
Wartości oczekiwane: [0 0 0 1]

Wagi: [0.1 0.1]
Bias: [0.]
Predykcje: [0. 1. 1. 1.]
Wartości oczekiwane: [0 1 1 1]


### Testy na bramkach logicznych AND i OR

##### Różne wartości learning rate

In [40]:
# od bardzo małych do bardzo dużych wartości
learning_rates = [0.00001, 0.0001, 0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0, 3.0, 5.0, 7.0, 9.0, 10.0, 30.0, 50.0, 70.0, 90.0, 100.0, 300.0, 500.0, 700.0, 900.0, 1000.0]

# columns for the dataframe
columns=['gate', 'activation function', 'learning_rate', 'epochs' , 'weights', 'bias', 'predictions', 'expected', 'errors']

In [41]:
# AND
df_lr_and = pd.DataFrame(columns=columns)
for lr in learning_rates:
    model_and = Perceptron(2, learning_rate=lr)
    model_and.train(AND_inputs, AND_labels, epochs=5)
    df_lr_and = df_lr_and._append({'gate': 'AND', 'activation function': heaviside.__name__, 'learning_rate': lr, 'epochs': 5, 'weights': model_and.weights, 'bias': model_and.bias, 'predictions': model_and.predict(AND_inputs), 'expected': AND_labels, 'errors': model_and.predict(AND_inputs) - AND_labels}, ignore_index=True)
    
df_lr_and

Unnamed: 0,gate,activation function,learning_rate,epochs,weights,bias,predictions,expected,errors
0,AND,heaviside,1e-05,5,"[2e-05, 1e-05]",[-2e-05],"[0.0, 0.0, 0.0, 1.0]","[0, 0, 0, 1]","[0.0, 0.0, 0.0, 0.0]"
1,AND,heaviside,0.0001,5,"[0.0002, 0.0001]",[-0.0002],"[0.0, 0.0, 0.0, 1.0]","[0, 0, 0, 1]","[0.0, 0.0, 0.0, 0.0]"
2,AND,heaviside,0.001,5,"[0.002, 0.001]",[-0.002],"[0.0, 0.0, 0.0, 1.0]","[0, 0, 0, 1]","[0.0, 0.0, 0.0, 0.0]"
3,AND,heaviside,0.01,5,"[0.02, 0.01]",[-0.02],"[0.0, 0.0, 0.0, 1.0]","[0, 0, 0, 1]","[0.0, 0.0, 0.0, 0.0]"
4,AND,heaviside,0.1,5,"[0.2, 0.1]",[-0.2],"[0.0, 0.0, 0.0, 1.0]","[0, 0, 0, 1]","[0.0, 0.0, 0.0, 0.0]"
5,AND,heaviside,0.3,5,"[0.6, 0.3]",[-0.6],"[0.0, 0.0, 0.0, 1.0]","[0, 0, 0, 1]","[0.0, 0.0, 0.0, 0.0]"
6,AND,heaviside,0.5,5,"[1.0, 0.5]",[-1.0],"[0.0, 0.0, 0.0, 1.0]","[0, 0, 0, 1]","[0.0, 0.0, 0.0, 0.0]"
7,AND,heaviside,0.7,5,"[1.4, 0.7]",[-1.4],"[0.0, 0.0, 0.0, 1.0]","[0, 0, 0, 1]","[0.0, 0.0, 0.0, 0.0]"
8,AND,heaviside,0.9,5,"[1.8, 0.9]",[-1.8],"[0.0, 0.0, 0.0, 1.0]","[0, 0, 0, 1]","[0.0, 0.0, 0.0, 0.0]"
9,AND,heaviside,1.0,5,"[2.0, 1.0]",[-2.0],"[0.0, 0.0, 0.0, 1.0]","[0, 0, 0, 1]","[0.0, 0.0, 0.0, 0.0]"


In [42]:
# OR
df_lr_or = pd.DataFrame(columns=columns)

for lr in learning_rates:
    model_or = Perceptron(2, learning_rate=lr)
    model_or.train(OR_inputs, OR_labels, epochs=3)
    df_lr_or = df_lr_or._append({'gate': 'OR', 'activation function': heaviside.__name__, 'learning_rate': lr, 'epochs': 5, 'weights': model_or.weights, 'bias': model_or.bias, 'predictions': model_or.predict(OR_inputs), 'expected': OR_labels, 'errors': model_or.predict(OR_inputs) - OR_labels}, ignore_index=True)

df_lr_or

Unnamed: 0,gate,activation function,learning_rate,epochs,weights,bias,predictions,expected,errors
0,OR,heaviside,1e-05,5,"[1e-05, 1e-05]",[0.0],"[0.0, 1.0, 1.0, 1.0]","[0, 1, 1, 1]","[0.0, 0.0, 0.0, 0.0]"
1,OR,heaviside,0.0001,5,"[0.0001, 0.0001]",[0.0],"[0.0, 1.0, 1.0, 1.0]","[0, 1, 1, 1]","[0.0, 0.0, 0.0, 0.0]"
2,OR,heaviside,0.001,5,"[0.001, 0.001]",[0.0],"[0.0, 1.0, 1.0, 1.0]","[0, 1, 1, 1]","[0.0, 0.0, 0.0, 0.0]"
3,OR,heaviside,0.01,5,"[0.01, 0.01]",[0.0],"[0.0, 1.0, 1.0, 1.0]","[0, 1, 1, 1]","[0.0, 0.0, 0.0, 0.0]"
4,OR,heaviside,0.1,5,"[0.1, 0.1]",[0.0],"[0.0, 1.0, 1.0, 1.0]","[0, 1, 1, 1]","[0.0, 0.0, 0.0, 0.0]"
5,OR,heaviside,0.3,5,"[0.3, 0.3]",[0.0],"[0.0, 1.0, 1.0, 1.0]","[0, 1, 1, 1]","[0.0, 0.0, 0.0, 0.0]"
6,OR,heaviside,0.5,5,"[0.5, 0.5]",[0.0],"[0.0, 1.0, 1.0, 1.0]","[0, 1, 1, 1]","[0.0, 0.0, 0.0, 0.0]"
7,OR,heaviside,0.7,5,"[0.7, 0.7]",[0.0],"[0.0, 1.0, 1.0, 1.0]","[0, 1, 1, 1]","[0.0, 0.0, 0.0, 0.0]"
8,OR,heaviside,0.9,5,"[0.9, 0.9]",[0.0],"[0.0, 1.0, 1.0, 1.0]","[0, 1, 1, 1]","[0.0, 0.0, 0.0, 0.0]"
9,OR,heaviside,1.0,5,"[1.0, 1.0]",[0.0],"[0.0, 1.0, 1.0, 1.0]","[0, 1, 1, 1]","[0.0, 0.0, 0.0, 0.0]"


##### Różne ilości epok

In [43]:
# od małych do bardzo dużych wartości
epochs = [1, 2, 3, 4, 5, 10, 20, 30, 40, 50, 100, 200, 300, 400, 500, 1000, 2000, 3000, 4000, 5000]

In [44]:
# AND
df_epochs_and = pd.DataFrame(columns=columns)

for epoch in epochs:
    model_and = Perceptron(2, learning_rate=0.1)
    model_and.train(AND_inputs, AND_labels, epochs=epoch)
    df_epochs_and = df_epochs_and._append({'gate': 'AND', 'activation function': heaviside.__name__, 'learning_rate': 0.1, 'epochs': epoch, 'weights': model_and.weights, 'bias': model_and.bias, 'predictions': model_and.predict(AND_inputs), 'expected': AND_labels, 'errors': model_and.predict(AND_inputs) - AND_labels}, ignore_index=True)
    
df_epochs_and

Unnamed: 0,gate,activation function,learning_rate,epochs,weights,bias,predictions,expected,errors
0,AND,heaviside,0.1,1,"[0.1, 0.1]",[0.1],"[1.0, 1.0, 1.0, 1.0]","[0, 0, 0, 1]","[1.0, 1.0, 1.0, 0.0]"
1,AND,heaviside,0.1,2,"[0.2, 0.1]",[0.0],"[0.0, 1.0, 1.0, 1.0]","[0, 0, 0, 1]","[0.0, 1.0, 1.0, 0.0]"
2,AND,heaviside,0.1,3,"[0.2, 0.1]",[-0.1],"[0.0, 0.0, 1.0, 1.0]","[0, 0, 0, 1]","[0.0, 0.0, 1.0, 0.0]"
3,AND,heaviside,0.1,4,"[0.2, 0.2]",[-0.1],"[0.0, 1.0, 1.0, 1.0]","[0, 0, 0, 1]","[0.0, 1.0, 1.0, 0.0]"
4,AND,heaviside,0.1,5,"[0.2, 0.1]",[-0.2],"[0.0, 0.0, 0.0, 1.0]","[0, 0, 0, 1]","[0.0, 0.0, 0.0, 0.0]"
5,AND,heaviside,0.1,10,"[0.2, 0.1]",[-0.2],"[0.0, 0.0, 0.0, 1.0]","[0, 0, 0, 1]","[0.0, 0.0, 0.0, 0.0]"
6,AND,heaviside,0.1,20,"[0.2, 0.1]",[-0.2],"[0.0, 0.0, 0.0, 1.0]","[0, 0, 0, 1]","[0.0, 0.0, 0.0, 0.0]"
7,AND,heaviside,0.1,30,"[0.2, 0.1]",[-0.2],"[0.0, 0.0, 0.0, 1.0]","[0, 0, 0, 1]","[0.0, 0.0, 0.0, 0.0]"
8,AND,heaviside,0.1,40,"[0.2, 0.1]",[-0.2],"[0.0, 0.0, 0.0, 1.0]","[0, 0, 0, 1]","[0.0, 0.0, 0.0, 0.0]"
9,AND,heaviside,0.1,50,"[0.2, 0.1]",[-0.2],"[0.0, 0.0, 0.0, 1.0]","[0, 0, 0, 1]","[0.0, 0.0, 0.0, 0.0]"


In [45]:
# OR
df_epochs_or = pd.DataFrame(columns=columns)

for epoch in epochs:
    model_or = Perceptron(2, learning_rate=0.1)
    model_or.train(OR_inputs, OR_labels, epochs=epoch)
    df_epochs_or = df_epochs_or._append({'gate': 'OR', 'activation function': heaviside.__name__, 'learning_rate': 0.1, 'epochs': epoch, 'weights': model_or.weights, 'bias': model_or.bias, 'predictions': model_or.predict(OR_inputs), 'expected': OR_labels, 'errors': model_or.predict(OR_inputs) - OR_labels}, ignore_index=True)
    
df_epochs_or

Unnamed: 0,gate,activation function,learning_rate,epochs,weights,bias,predictions,expected,errors
0,OR,heaviside,0.1,1,"[0.0, 0.1]",[0.1],"[1.0, 1.0, 1.0, 1.0]","[0, 1, 1, 1]","[1.0, 0.0, 0.0, 0.0]"
1,OR,heaviside,0.1,2,"[0.1, 0.1]",[0.1],"[1.0, 1.0, 1.0, 1.0]","[0, 1, 1, 1]","[1.0, 0.0, 0.0, 0.0]"
2,OR,heaviside,0.1,3,"[0.1, 0.1]",[0.0],"[0.0, 1.0, 1.0, 1.0]","[0, 1, 1, 1]","[0.0, 0.0, 0.0, 0.0]"
3,OR,heaviside,0.1,4,"[0.1, 0.1]",[0.0],"[0.0, 1.0, 1.0, 1.0]","[0, 1, 1, 1]","[0.0, 0.0, 0.0, 0.0]"
4,OR,heaviside,0.1,5,"[0.1, 0.1]",[0.0],"[0.0, 1.0, 1.0, 1.0]","[0, 1, 1, 1]","[0.0, 0.0, 0.0, 0.0]"
5,OR,heaviside,0.1,10,"[0.1, 0.1]",[0.0],"[0.0, 1.0, 1.0, 1.0]","[0, 1, 1, 1]","[0.0, 0.0, 0.0, 0.0]"
6,OR,heaviside,0.1,20,"[0.1, 0.1]",[0.0],"[0.0, 1.0, 1.0, 1.0]","[0, 1, 1, 1]","[0.0, 0.0, 0.0, 0.0]"
7,OR,heaviside,0.1,30,"[0.1, 0.1]",[0.0],"[0.0, 1.0, 1.0, 1.0]","[0, 1, 1, 1]","[0.0, 0.0, 0.0, 0.0]"
8,OR,heaviside,0.1,40,"[0.1, 0.1]",[0.0],"[0.0, 1.0, 1.0, 1.0]","[0, 1, 1, 1]","[0.0, 0.0, 0.0, 0.0]"
9,OR,heaviside,0.1,50,"[0.1, 0.1]",[0.0],"[0.0, 1.0, 1.0, 1.0]","[0, 1, 1, 1]","[0.0, 0.0, 0.0, 0.0]"


##### Różne funkcje aktywacji

In [46]:
activation_functions = [heaviside, sigmoid, relu, leaky_relu, tanh]

In [47]:
# AND
df_af_and = pd.DataFrame(columns=columns)

for af in activation_functions:
    model_and = Perceptron(2, learning_rate=0.1, activation_function=af)
    model_and.train(AND_inputs, AND_labels, epochs=1000)
    df_af_and = df_af_and._append({'gate': 'AND', 'activation function': af.__name__, 'learning_rate': 0.1, 'epochs': 1000, 'weights': model_and.weights, 'bias': model_and.bias, 'predictions': np.round(model_and.predict(AND_inputs), 2), 'expected': AND_labels, 'errors': np.round(model_and.predict(AND_inputs), 2) - AND_labels}, ignore_index=True)
    
df_af_and

Unnamed: 0,gate,activation function,learning_rate,epochs,weights,bias,predictions,expected,errors
0,AND,heaviside,0.1,1000,"[0.2, 0.1]",[-0.2],"[0.0, 0.0, 0.0, 1.0]","[0, 0, 0, 1]","[0.0, 0.0, 0.0, 0.0]"
1,AND,sigmoid,0.1,1000,"[5.602369772963133, 5.596276171199277]",[-8.567229481512781],"[0.0, 0.05, 0.05, 0.93]","[0, 0, 0, 1]","[0.0, 0.05, 0.05, -0.06999999999999995]"
2,AND,relu,0.1,1000,"[0.9999999976810108, 0.9999999975378018]",[-0.9999999963841002],"[0.0, 0.0, 0.0, 1.0]","[0, 0, 0, 1]","[0.0, 0.0, 0.0, 0.0]"
3,AND,leaky_relu,0.1,1000,"[0.9844054567174376, 0.983430797763332]",[-0.9746588672583987],"[-0.01, 0.01, 0.01, 0.99]","[0, 0, 0, 1]","[-0.01, 0.01, 0.01, -0.010000000000000009]"
4,AND,tanh,0.1,1000,"[0.6458267921134033, 0.6146129071981778]",[-0.3229133960567015],"[-0.31, 0.28, 0.31, 0.73]","[0, 0, 0, 1]","[-0.31, 0.28, 0.31, -0.27]"


In [48]:
# OR
df_af_or = pd.DataFrame(columns=columns)

for af in activation_functions:
    model_or = Perceptron(2, learning_rate=0.1, activation_function=af)
    model_or.train(OR_inputs, OR_labels, epochs=2000)
    df_af_or = df_af_or._append({'gate': 'OR', 'activation function': af.__name__, 'learning_rate': 0.1, 'epochs': 2000, 'weights': model_or.weights, 'bias': model_or.bias, 'predictions': np.round(model_or.predict(OR_inputs), 2), 'expected': OR_labels, 'errors': np.round(model_or.predict(OR_inputs), 2) - OR_labels}, ignore_index=True)
    
df_af_or

Unnamed: 0,gate,activation function,learning_rate,epochs,weights,bias,predictions,expected,errors
0,OR,heaviside,0.1,2000,"[0.1, 0.1]",[0.0],"[0.0, 1.0, 1.0, 1.0]","[0, 1, 1, 1]","[0.0, 0.0, 0.0, 0.0]"
1,OR,sigmoid,0.1,2000,"[8.207698543772604, 8.209640649850687]",[-3.636216228347191],"[0.03, 0.99, 0.99, 1.0]","[0, 1, 1, 1]","[0.03, -0.010000000000000009, -0.0100000000000..."
2,OR,relu,0.1,2000,"[0.44444444444444414, 0.47222222222222193]",[0.2777777777777781],"[0.28, 0.75, 0.72, 1.19]","[0, 1, 1, 1]","[0.28, -0.25, -0.28, 0.18999999999999995]"
3,OR,leaky_relu,0.1,2000,"[0.44444444444444414, 0.47222222222222193]",[0.2777777777777781],"[0.28, 0.75, 0.72, 1.19]","[0, 1, 1, 1]","[0.28, -0.25, -0.28, 0.18999999999999995]"
4,OR,tanh,0.1,2000,"[3.3175174939326086, 3.318746020709793]",[0.0052209457622765226],"[0.01, 1.0, 1.0, 1.0]","[0, 1, 1, 1]","[0.01, 0.0, 0.0, 0.0]"


#### Wnioski
Wartość `learning_rate` służy do określenia jak bardzo wagi i bias są aktualizowane w każdej iteracji. Dla małych wartości `learning_rate` model uczy się wolniej, ale może osiągnąć lepsze wyniki. Dla dużych wartości `learning_rate` model uczy się szybciej, ale może osiągnąć gorsze wyniki. Dla wartości `learning_rate` bliskich 0 model uczy się bardzo wolno, a dla wartości `learning_rate` bliskich 1 model uczy się bardzo szybko. Dla wartości `learning_rate` większych od 1 model może nie nauczyć się niczego, ponieważ aktualizacje wag i bias będą zbyt duże, co doprowadzi do sytuacji, w której nigdy nie dojdziemy do szukanego minimum.

Niemniej jednak w przypadku prostego modelu pojedynczego perceptronu oraz zbioru uczącego AND i OR wartość `learning_rate` wpływa jedynie na ostateczną postać wagi i bias. Parametr ten wpływa na to, że wagi i bias są odpowiedno przeskalowane, ale dla dowolnej wartości `learning_rate` zawsze znajdziemy takie wagi i bias, które pozwolą na poprawne przewidywanie wartości dla bramek logicznych AND i OR.

Wartość `epochs` służy do określenia ile razy model ma przejść przez cały zbiór uczący. Im większa wartość, tych więcej iteracji model ma na nauczenie się relacji zawartej w danych. 

W przypadku perceptronu dla bramek logicznych AND i OR wartość `epochs` nie ma znaczącego wpływu na wyniki, ponieważ model jest na tyle prosty, że jest w stanie nauczyć się relacji w danych w kilku iteracjach. Dla bardziej skomplikowanych modeli i zbiorów danych wartość `epochs` ma większe znaczenie, ponieważ model musi przejść przez zbiór uczący wielokrotnie, aby nauczyć się relacji w danych (i co ważne, nie przeuczyć się - liczba epok musi być tak dopasowana, aby nie wystąpiło zjawisko nadmiernego dopasowania). Z powyższych obserwacji wynika, że wartość `epochs` dla uczenia perceptronu bramek logicznych AND wynosząca 5 jest wystarczająca, by model nauczył się dawać poprawne predykcje, natomiast dla bramek logicznych OR jest to 3 (oczywiście przy powyższej implementacji i założeniach, funkcji aktywacji itd).

### Neuron sigmoidalny

In [49]:
class SigmoidNeuron:
    """
    Sigmoid neuron model.
    """
    
    def __init__(self, input_size, learning_rate=0.1, seed=0, activation_function=sigmoid):
        """
        :param input_size: number of input features
        :param learning_rate: learning rate hyperparameter (default: 0.1)
        :param seed: random seed for reproducibility (default: 0)
        :param activation_function: activation function (default: sigmoid)
        Sigmoid neuron initialization method.
        """
        
        # seed for reproducibility
        self.rng = np.random.RandomState(seed=seed)
        
        # weights
        self.weights = self.rng.uniform(low=-0.1, high=0.1, size=input_size)
        
        # bias
        self.bias = self.rng.uniform(low=-0.1, high=0.1, size=1)
        
        # hyperparameters
        self.learning_rate = learning_rate
        
        # activation function
        self.activation_function = activation_function
    
    def feedforward(self, inputs):
        return self.activation_function(np.matmul(inputs, self.weights.T) + self.bias)
    
    def backpropagation(self, inputs, target):
        # feedforward
        predicted = self.feedforward(inputs)
        
        # error
        error = predicted - target
        
        # derivative of the loss function with respect to the weights and bias
        dC_dw = np.dot(inputs.T, error)
        dC_db = np.sum(error)
        
        # update weights and bias
        self.weights -= self.learning_rate * dC_dw
        self.bias -= self.learning_rate * dC_db
    
    def loss(self, inputs, target):
        # feedforward
        prediction = self.feedforward(inputs)
        
        # loss
        return 0.5 * np.square(prediction - target)
    
    def train(self, training_inputs, training_labels, epochs=1, verbose=False):
        # train loop
        for epoch in range(epochs):
            total_loss = 0
            for inputs, target in zip(training_inputs, training_labels):
                inputs = np.array(inputs, ndmin=2)
                
                # backpropagation
                self.backpropagation(inputs, target)
                
                # epoch loss
                total_loss += self.loss(inputs, target)
                if verbose and (epoch + 1) % 100 == 0:
                    print(f"Epoch {epoch + 1}/{epochs}, total loss: {total_loss}")

In [50]:
# AND
model_and = SigmoidNeuron(2, learning_rate=0.1)
model_and.train(AND_inputs, AND_labels, epochs=1000)

print(f"Wagi: {model_and.weights}")
print(f"Bias: {model_and.bias}")
print(f"Predykcje: {model_and.feedforward(AND_inputs)}")
print(f"Wartości oczekiwane: {AND_labels}\n")

# OR
model_or = SigmoidNeuron(2, learning_rate=0.1)
model_or.train(OR_inputs, OR_labels, epochs=1000)

print(f"Wagi: {model_or.weights}")
print(f"Bias: {model_or.bias}")
print(f"Predykcje: {model_or.feedforward(OR_inputs)}")
print(f"Wartości oczekiwane: {OR_labels}\n")


Wagi: [5.60282667 5.59673575]
Bias: [-8.56791652]
Predykcje: [1.90072190e-04 4.87449428e-02 4.90281494e-02 9.32870694e-01]
Wartości oczekiwane: [0 0 0 1]

Wagi: [6.78425253 6.78810429]
Bias: [-2.91436951]
Predykcje: [0.05144778 0.97964243 0.97956547 0.99997649]
Wartości oczekiwane: [0 1 1 1]


##### Różne wartości learning rate

In [51]:
learning_rates = [0.00001, 0.0001, 0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0, 3.0, 5.0, 7.0, 9.0, 10.0, 30.0, 50.0, 70.0, 90.0, 100.0, 300.0, 500.0, 700.0, 900.0, 1000.0]

In [52]:
# AND
df_lr_and = pd.DataFrame(columns=columns)

for lr in learning_rates:
    model_and = SigmoidNeuron(2, learning_rate=lr)
    model_and.train(AND_inputs, AND_labels, epochs=100)
    df_lr_and = df_lr_and._append({'gate': 'AND', 'activation function': sigmoid.__name__, 'learning_rate': lr, 'epochs': 1000, 'weights': model_and.weights, 'bias': model_and.bias, 'predictions': np.round(model_and.feedforward(AND_inputs), 2), 'expected': AND_labels, 'errors': np.round(model_and.feedforward(AND_inputs) - AND_labels, 2)}, ignore_index=True)
    
df_lr_and

Unnamed: 0,gate,activation function,learning_rate,epochs,weights,bias,predictions,expected,errors
0,AND,sigmoid,1e-05,1000,"[0.009737070743234265, 0.04300392884882873]",[0.019506279256317212],"[0.5, 0.52, 0.51, 0.52]","[0, 0, 0, 1]","[0.5, 0.52, 0.51, -0.48]"
1,AND,sigmoid,0.0001,1000,"[0.009531306737583638, 0.042723303690111014]",[0.010137314442084823],"[0.5, 0.51, 0.5, 0.52]","[0, 0, 0, 1]","[0.5, 0.51, 0.5, -0.48]"
2,AND,sigmoid,0.001,1000,"[0.009803004925988828, 0.04224428305354898]",[-0.07897328470309319],"[0.48, 0.49, 0.48, 0.49]","[0, 0, 0, 1]","[0.48, 0.49, 0.48, -0.51]"
3,AND,sigmoid,0.01,1000,"[0.14880944862150583, 0.17397840984276913]",[-0.6791856468956367],"[0.34, 0.38, 0.37, 0.41]","[0, 0, 0, 1]","[0.34, 0.38, 0.37, -0.59]"
4,AND,sigmoid,0.1,1000,"[1.8188862600575684, 1.7977855343202513]",[-2.9408580784034233],"[0.05, 0.24, 0.25, 0.66]","[0, 0, 0, 1]","[0.05, 0.24, 0.25, -0.34]"
5,AND,sigmoid,0.3,1000,"[3.515449937540129, 3.466281649029582]",[-5.350279654965837],"[0.0, 0.13, 0.14, 0.84]","[0, 0, 0, 1]","[0.0, 0.13, 0.14, -0.16]"
6,AND,sigmoid,0.5,1000,"[4.489165052643436, 4.434010001743501]",[-6.780774305227138],"[0.0, 0.09, 0.09, 0.89]","[0, 0, 0, 1]","[0.0, 0.09, 0.09, -0.11]"
7,AND,sigmoid,0.7,1000,"[5.175500994094484, 5.118615919098224]",[-7.799343584666728],"[0.0, 0.06, 0.07, 0.92]","[0, 0, 0, 1]","[0.0, 0.06, 0.07, -0.08]"
8,AND,sigmoid,0.9,1000,"[5.704148832393772, 5.646848714004917]",[-8.587674800812927],"[0.0, 0.05, 0.05, 0.94]","[0, 0, 0, 1]","[0.0, 0.05, 0.05, -0.06]"
9,AND,sigmoid,1.0,1000,"[5.928856455940704, 5.871562208446874]",[-8.923516202598599],"[0.0, 0.05, 0.05, 0.95]","[0, 0, 0, 1]","[0.0, 0.05, 0.05, -0.05]"


In [53]:
# OR
df_lr_or = pd.DataFrame(columns=columns)

for lr in learning_rates:
    model_or = SigmoidNeuron(2, learning_rate=lr)
    model_or.train(OR_inputs, OR_labels, epochs=100)
    df_lr_or = df_lr_or._append({'gate': 'OR', 'activation function': sigmoid.__name__, 'learning_rate': lr, 'epochs': 1000, 'weights': model_or.weights, 'bias': model_or.bias, 'predictions': np.round(model_or.feedforward(OR_inputs), 2), 'expected': OR_labels, 'errors': np.round(model_or.feedforward(OR_inputs) - OR_labels, 2)}, ignore_index=True)
    
df_lr_or

Unnamed: 0,gate,activation function,learning_rate,epochs,weights,bias,predictions,expected,errors
0,OR,sigmoid,1e-05,1000,"[0.0107361932374163, 0.04400305413243955]",[0.021504783722504917],"[0.51, 0.52, 0.51, 0.52]","[0, 1, 1, 1]","[0.51, -0.48, -0.49, -0.48]"
1,OR,sigmoid,0.0001,1000,"[0.019443987547883016, 0.052636263627169756]",[0.029988480462205914],"[0.51, 0.52, 0.51, 0.53]","[0, 1, 1, 1]","[0.51, -0.48, -0.49, -0.47]"
2,OR,sigmoid,0.001,1000,"[0.10149743819530734, 0.13396662073353405]",[0.10684991126297322],"[0.53, 0.56, 0.55, 0.58]","[0, 1, 1, 1]","[0.53, -0.44, -0.45, -0.42]"
3,OR,sigmoid,0.01,1000,"[0.6276554067284316, 0.6549890518403566]",[0.41022906302968604],"[0.6, 0.74, 0.74, 0.84]","[0, 1, 1, 1]","[0.6, -0.26, -0.26, -0.16]"
4,OR,sigmoid,0.1,1000,"[2.507140927039107, 2.5282442628704165]",[-0.5778533936024522],"[0.36, 0.88, 0.87, 0.99]","[0, 1, 1, 1]","[0.36, -0.12, -0.13, -0.01]"
5,OR,sigmoid,0.3,1000,"[4.39339482000521, 4.42344328184911]",[-1.6588114138881587],"[0.16, 0.94, 0.94, 1.0]","[0, 1, 1, 1]","[0.16, -0.06, -0.06, -0.0]"
6,OR,sigmoid,0.5,1000,"[5.420125566852021, 5.454089760500101]",[-2.200454279163118],"[0.1, 0.96, 0.96, 1.0]","[0, 1, 1, 1]","[0.1, -0.04, -0.04, -0.0]"
7,OR,sigmoid,0.7,1000,"[6.118429909495018, 6.154153937975927]",[-2.5614842675798806],"[0.07, 0.97, 0.97, 1.0]","[0, 1, 1, 1]","[0.07, -0.03, -0.03, -0.0]"
8,OR,sigmoid,0.9,1000,"[6.645494792573381, 6.682130245915327]",[-2.8315141819668845],"[0.06, 0.98, 0.98, 1.0]","[0, 1, 1, 1]","[0.06, -0.02, -0.02, -0.0]"
9,OR,sigmoid,1.0,1000,"[6.867245925383133, 6.90417313654303]",[-2.9446435612960706],"[0.05, 0.98, 0.98, 1.0]","[0, 1, 1, 1]","[0.05, -0.02, -0.02, -0.0]"


##### Różne ilości epok

In [54]:
epochs = [1, 2, 3, 4, 5, 10, 20, 30, 40, 50, 100, 200, 300, 400, 500, 1000, 2000, 3000, 4000, 5000]

In [55]:
# AND
df_epochs_and = pd.DataFrame(columns=columns)

for epoch in epochs:
    model_and = SigmoidNeuron(2, learning_rate=0.3)
    model_and.train(AND_inputs, AND_labels, epochs=epoch)
    df_epochs_and = df_epochs_and._append({'gate': 'AND', 'activation function': sigmoid.__name__, 'learning_rate': 0.3, 'epochs': epoch, 'weights': model_and.weights, 'bias': model_and.bias, 'predictions': np.round(model_and.feedforward(AND_inputs), 2), 'expected': AND_labels, 'errors': np.round(model_and.feedforward(AND_inputs) - AND_labels, 2)}, ignore_index=True)
    
df_epochs_and

Unnamed: 0,gate,activation function,learning_rate,epochs,weights,bias,predictions,expected,errors
0,AND,sigmoid,0.3,1,"[0.074938399215815, 0.0950731132379126]",[-0.20922095792653186],"[0.45, 0.47, 0.47, 0.49]","[0, 0, 0, 1]","[0.45, 0.47, 0.47, -0.51]"
1,AND,sigmoid,0.3,2,"[0.1534903736036208, 0.16255439372509362]",[-0.3964912425767355],"[0.4, 0.44, 0.44, 0.48]","[0, 0, 0, 1]","[0.4, 0.44, 0.44, -0.52]"
2,AND,sigmoid,0.3,3,"[0.23823868505762305, 0.23777819357118624]",[-0.5560710354927241],"[0.36, 0.42, 0.42, 0.48]","[0, 0, 0, 1]","[0.36, 0.42, 0.42, -0.52]"
3,AND,sigmoid,0.3,4,"[0.32502700216804586, 0.3162261279138247]",[-0.6970275539066508],"[0.33, 0.41, 0.41, 0.49]","[0, 0, 0, 1]","[0.33, 0.41, 0.41, -0.51]"
4,AND,sigmoid,0.3,5,"[0.4114339591937235, 0.39523572322067424]",[-0.8249992533551076],"[0.3, 0.39, 0.4, 0.5]","[0, 0, 0, 1]","[0.3, 0.39, 0.4, -0.5]"
5,AND,sigmoid,0.3,10,"[0.806809020372371, 0.7634570750444343]",[-1.3587618054860977],"[0.2, 0.36, 0.37, 0.55]","[0, 0, 0, 1]","[0.2, 0.36, 0.37, -0.45]"
6,AND,sigmoid,0.3,20,"[1.4032199598801471, 1.3345344303481284]",[-2.175016819184286],"[0.1, 0.3, 0.32, 0.64]","[0, 0, 0, 1]","[0.1, 0.3, 0.32, -0.36]"
7,AND,sigmoid,0.3,30,"[1.8401708690075322, 1.76501075902905]",[-2.8071339073674966],"[0.06, 0.26, 0.28, 0.69]","[0, 0, 0, 1]","[0.06, 0.26, 0.28, -0.31]"
8,AND,sigmoid,0.3,40,"[2.1887232337610083, 2.1146384896516666]",[-3.3261957974630225],"[0.03, 0.23, 0.24, 0.73]","[0, 0, 0, 1]","[0.03, 0.23, 0.24, -0.27]"
9,AND,sigmoid,0.3,50,"[2.4814314811925224, 2.4111908679818472]",[-3.7685890915123754],"[0.02, 0.2, 0.22, 0.75]","[0, 0, 0, 1]","[0.02, 0.2, 0.22, -0.25]"


In [56]:
# OR
df_epochs_or = pd.DataFrame(columns=columns)

for epoch in epochs:
    model_or = SigmoidNeuron(2, learning_rate=0.3)
    model_or.train(OR_inputs, OR_labels, epochs=epoch)
    df_epochs_or = df_epochs_or._append({'gate': 'OR', 'activation function': sigmoid.__name__, 'learning_rate': 0.3, 'epochs': epoch, 'weights': model_or.weights, 'bias': model_or.bias, 'predictions': np.round(model_or.feedforward(OR_inputs), 2), 'expected': OR_labels, 'errors': np.round(model_or.feedforward(OR_inputs) - OR_labels, 2)}, ignore_index=True)
    
df_epochs_or

Unnamed: 0,gate,activation function,learning_rate,epochs,weights,bias,predictions,expected,errors
0,OR,sigmoid,0.3,1,"[0.2682871679667611, 0.3108065821694219]",[0.28412781082441424],"[0.57, 0.64, 0.63, 0.7]","[0, 1, 1, 1]","[0.57, -0.36, -0.37, -0.3]"
1,OR,sigmoid,0.3,2,"[0.45337491980295896, 0.5013120675693208]",[0.4167328932039728],"[0.6, 0.71, 0.7, 0.8]","[0, 1, 1, 1]","[0.6, -0.29, -0.3, -0.2]"
2,OR,sigmoid,0.3,3,"[0.598815107259879, 0.6499510702742095]",[0.47844546858133036],"[0.62, 0.76, 0.75, 0.85]","[0, 1, 1, 1]","[0.62, -0.24, -0.25, -0.15]"
3,OR,sigmoid,0.3,4,"[0.7208253718268421, 0.7739402500755562]",[0.4993189079706561],"[0.62, 0.78, 0.77, 0.88]","[0, 1, 1, 1]","[0.62, -0.22, -0.23, -0.12]"
4,OR,sigmoid,0.3,5,"[0.8277439664378151, 0.8821285811924395]",[0.49522717598892957],"[0.62, 0.8, 0.79, 0.9]","[0, 1, 1, 1]","[0.62, -0.2, -0.21, -0.1]"
5,OR,sigmoid,0.3,10,"[1.2500538881181296, 1.3064509686802837]",[0.32295955043782015],"[0.58, 0.84, 0.83, 0.95]","[0, 1, 1, 1]","[0.58, -0.16, -0.17, -0.05]"
6,OR,sigmoid,0.3,20,"[1.8788091223718002, 1.9335526845852207]",[-0.11767100316816659],"[0.47, 0.86, 0.85, 0.98]","[0, 1, 1, 1]","[0.47, -0.14, -0.15, -0.02]"
7,OR,sigmoid,0.3,30,"[2.377895665357119, 2.4292523277416946]",[-0.46577260986554864],"[0.39, 0.88, 0.87, 0.99]","[0, 1, 1, 1]","[0.39, -0.12, -0.13, -0.01]"
8,OR,sigmoid,0.3,40,"[2.793950944526616, 2.8415133107432093]",[-0.735283556226876],"[0.32, 0.89, 0.89, 0.99]","[0, 1, 1, 1]","[0.32, -0.11, -0.11, -0.01]"
9,OR,sigmoid,0.3,50,"[3.149177137887739, 3.1930292470869186]",[-0.9528755191775193],"[0.28, 0.9, 0.9, 1.0]","[0, 1, 1, 1]","[0.28, -0.1, -0.1, -0.0]"


##### Różne funkcje aktywacji

In [57]:
activation_functions = [heaviside, relu, leaky_relu, sigmoid, tanh]

In [58]:
# AND
df_af_and = pd.DataFrame(columns=columns)

for af in activation_functions:
    model_and = SigmoidNeuron(2, learning_rate=0.3, activation_function=af)
    model_and.train(AND_inputs, AND_labels, epochs=1000)
    df_af_and = df_af_and._append({'gate': 'AND', 'activation function': af.__name__, 'learning_rate': 0.3, 'epochs': 1000, 'weights': model_and.weights, 'bias': model_and.bias, 'predictions': np.round(model_and.feedforward(AND_inputs), 2), 'expected': AND_labels, 'errors': np.round(model_and.feedforward(AND_inputs) - AND_labels, 2)}, ignore_index=True)
    
df_af_and

Unnamed: 0,gate,activation function,learning_rate,epochs,weights,bias,predictions,expected,errors
0,AND,heaviside,0.3,1000,"[0.6097627007854649, 0.34303787327448393]",[-0.8794473247856713],"[0.0, 0.0, 0.0, 1.0]","[0, 0, 0, 1]","[0.0, 0.0, 0.0, 0.0]"
1,AND,relu,0.3,1000,"[0.9999999999999998, 0.9999999999999997]",[-0.9999999999999997],"[0.0, 0.0, 0.0, 1.0]","[0, 0, 0, 1]","[0.0, 0.0, 0.0, -0.0]"
2,AND,leaky_relu,0.3,1000,"[0.9921414538310408, 0.9891944990176812]",[-0.9823182711198418],"[-0.01, 0.01, 0.01, 1.0]","[0, 0, 0, 1]","[-0.01, 0.01, 0.01, -0.0]"
3,AND,sigmoid,0.3,1000,"[7.833260266400745, 7.827009109782254]",[-11.905421979318573],"[0.0, 0.02, 0.02, 0.98]","[0, 0, 0, 1]","[0.0, 0.02, 0.02, -0.02]"
4,AND,tanh,0.3,1000,"[0.7887429603477558, 0.6762061466965057]",[-0.3943714801738777],"[-0.38, 0.27, 0.38, 0.79]","[0, 0, 0, 1]","[-0.38, 0.27, 0.38, -0.21]"


In [59]:
# OR
df_af_or = pd.DataFrame(columns=columns)

for af in activation_functions:
    model_or = SigmoidNeuron(2, learning_rate=0.3, activation_function=af)
    model_or.train(OR_inputs, OR_labels, epochs=1000)
    df_af_or = df_af_or._append({'gate': 'OR', 'activation function': af.__name__, 'learning_rate': 0.3, 'epochs': 1000, 'weights': model_or.weights, 'bias': model_or.bias, 'predictions': np.round(model_or.feedforward(OR_inputs), 2), 'expected': OR_labels, 'errors': np.round(model_or.feedforward(OR_inputs) - OR_labels, 2)}, ignore_index=True)

df_af_or

Unnamed: 0,gate,activation function,learning_rate,epochs,weights,bias,predictions,expected,errors
0,OR,heaviside,0.3,1000,"[0.309762700785465, 0.3430378732744839]",[-0.27944732478567125],"[0.0, 1.0, 1.0, 1.0]","[0, 1, 1, 1]","[0.0, 0.0, 0.0, 0.0]"
1,OR,relu,0.3,1000,"[0.28571428571428564, 0.3928571428571428]",[0.35714285714285715],"[0.36, 0.75, 0.64, 1.04]","[0, 1, 1, 1]","[0.36, -0.25, -0.36, 0.04]"
2,OR,leaky_relu,0.3,1000,"[0.28571428571428564, 0.3928571428571428]",[0.35714285714285715],"[0.36, 0.75, 0.64, 1.04]","[0, 1, 1, 1]","[0.36, -0.25, -0.36, 0.04]"
3,OR,sigmoid,0.3,1000,"[9.047619812713735, 9.051559097189308]",[-4.058605842307561],"[0.02, 0.99, 0.99, 1.0]","[0, 1, 1, 1]","[0.02, -0.01, -0.01, -0.0]"
4,OR,tanh,0.3,1000,"[3.5296696083207535, 3.5324611952368996]",[0.0034198910381548265],"[0.0, 1.0, 1.0, 1.0]","[0, 1, 1, 1]","[0.0, -0.0, -0.0, -0.0]"


#### Wnioski

W przypadku neuronu sigmoidalnego znacznie lepiej widać istotę parametrów `learning_rate` i `epochs`. Wartość `learning_rate` służy do określenia jak bardzo wagi i bias są aktualizowane w każdej iteracji. Dla małych wartości `learning_rate` model uczy się wolniej (przez co będzie potrzebował więcej epok, aby dobrze się wytrenować), dla większych wartości `learning_rate` model uczy się szybciej (ale istnieje ryzyko, że wartość może być zbyt duża, co doprowadzi do tego, że nie będziemy w stanie dojść do optymalnego rozwiązania). Parametr `epochs` natomiast określa, podobnie jak to było w przypadku perceptronu, ile razy model ma przejść przez cały zbiór uczący. Im większa wartość, tym więcej iteracji model na dopasowanie się do danych. Powyższe obserwacje pokazują, że zbyt mała liczba epok może prowadzić do niedouczenia, co jest zjawiskiem niepożądanym. Oba te parametry są kluczowe dla osiągnięcia dobrych wyników w uczeniu maszynowym i aby efektywnie i szybko uczyć modele, niezbędne jest odpowiednie dobranie ich wartości.