**Podpięcie Google Drive:**

In [0]:
from google.colab import drive
drive.mount('/content/gdrive')
BASE_DIR = '/content/gdrive/My Drive/DL2020/Projekt1a'

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive


**Funkcje aktywacji:**

In [0]:
import numpy as np


def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))


def sigmoid_derivative(x):
    return sigmoid(x) * (1.0 - sigmoid(x))


def relu(x):
    return np.maximum(x, 0)


def relu_derivative(x):
    return np.where(x > 0, 1.0, 0.0)


def identity(x):
    return x


def identity_derivative(x):
    return 1.0


def tanh(x):
    return np.tanh(x)


def tanh_derivative(x):
    return 1.0 - np.square(np.tanh(x))


**Ładowanie danych:**

In [0]:
import numpy as np
import pandas as pd
import os


def load_classification(csv_filename, argument_column_names, class_column_name='cls'):
    df = pd.read_csv(csv_filename)

    output_layer_neuron_count = len(df[class_column_name].unique())
    input_layer_neuron_count = len(argument_column_names)

    training_data = []

    for row in df.itertuples():
        x = np.zeros((input_layer_neuron_count, 1))

        for i in range(len(argument_column_names)):
            x[i] = getattr(row, argument_column_names[i])

        y = vectorize_class(getattr(row, class_column_name),
                            output_layer_neuron_count)

        training_data.append((x, y))
    return training_data


def load_regression(csv_filename, input_column_names, output_column_names):
    df = pd.read_csv(csv_filename)

    output_layer_neuron_count = len(output_column_names)
    input_layer_neuron_count = len(input_column_names)

    training_data = []

    for row in df.itertuples():
        x = np.zeros((input_layer_neuron_count, 1))
        y = np.zeros((output_layer_neuron_count, 1))

        for i in range(len(input_column_names)):
            x[i] = getattr(row, input_column_names[i])

        for i in range(len(output_column_names)):
            y[i] = getattr(row, output_column_names[i])

        training_data.append((x, y))
    return training_data


def load_classification_wrapper(path, name, train_size, test_size=None):
    if test_size == None:
        test_size = train_size

    train = load_classification(os.path.join(
        path, f'{name}.train.{train_size}.csv'), ['x', 'y'])
    test = load_classification(os.path.join(
        path, f'{name}.test.{test_size}.csv'), ['x', 'y'])
    return (train, test)


def load_regression_wrapper(path, name, train_size, test_size=None):
    if test_size == None:
        test_size = train_size

    train = load_regression(os.path.join(
        path, f'{name}.train.{train_size}.csv'), ['x'], ['y'])
    test = load_regression(os.path.join(
        path, f'{name}.test.{test_size}.csv'), ['x'], ['y'])
    return (train, test)


def vectorize_class(class_id, class_length):
    # assume that classification starts at 1 to class_length, e.g. 1,2,3,4
    class_id = class_id - 1
    y = np.zeros((class_length, 1))
    y[class_id] = 1
    return y

def load_data(hidden_layer_sizes, hidden_layers_activation_function, hidden_layers_activation_function_prime, output_layer_activation_function, output_layer_activation_function_prime, cost_derivative, is_bias_enabled, path, name, train_size, test_size=None):
    train_data, test_data = load_classification_wrapper(
        path, name, train_size, test_size)
    sizes = [len(train_data[0][0])]
    sizes.extend(hidden_layer_sizes)
    sizes.append(len(train_data[0][1]))
    return train_data, test_data, Network(sizes, hidden_layers_activation_function, hidden_layers_activation_function_prime, output_layer_activation_function, output_layer_activation_function_prime, cost_derivative, is_bias_enabled)


**Pochodne funkcji straty:**

In [0]:
def quadratic_cost_derivative(output_activations, y):
    return (output_activations - y)

def cross_entropy_cost_derivative(output_activations, y):
    return (output_activations - y)/((1-output_activations) * output_activations)

**Architektura sieci:**

In [0]:
import numpy as np
import json
import codecs

from enum import Enum

class Problem(Enum):
    Classification = 1
    Regression = 2


class Network:

    def __init__(self, sizes, hidden_layers_activation_function, hidden_layers_activation_function_prime, output_layer_activation_function, output_layer_activation_function_prime, cost_derivative, is_bias_enabled):
        self.hidden_layers_activation_function = hidden_layers_activation_function
        self.hidden_layers_activation_function_prime = hidden_layers_activation_function_prime
        self.output_layer_activation_function = output_layer_activation_function
        self.output_layer_activation_function_prime = output_layer_activation_function_prime
        self.cost_derivative = cost_derivative
        self.sizes = sizes
        self.num_layers = len(sizes)
        self.is_bias_enabled = is_bias_enabled
        self.eta = None
        self.epochs = None
        self.batch_size = None
        self.training_data_size = None
        self.problem = None
        self.history = []
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
        self.weights = [np.random.randn(y, x)
                        for x, y in zip(sizes[:-1], sizes[1:])]
        self.classification_area_probing = []
        self.prev_nabla_w = None
        self.prev_nabla_b = None
        self.nabla_w = None
        self.nabla_b = None

    def disable_bias(self):
        self.biases = [np.zeros((y, 1)) for y in self.sizes[1:]]
        self.is_bias_enabled = False

    def change_hidden_layer_activation_function(self, activation_function, activation_function_prime):
        self.hidden_layers_activation_function = activation_function
        self.hidden_layers_activation_function_prime = activation_function_prime

    def change_output_layer_activation_function(self, activation_function, activation_function_prime):
        self.output_layer_activation_function = activation_function
        self.output_layer_activation_function_prime = activation_function_prime

    def feed_forward(self, a):
        for bias, weight in zip(self.biases[:-1], self.weights[:-1]):
            a = self.hidden_layers_activation_function(
                np.dot(weight, a) + bias)
        return self.output_layer_activation_function(np.dot(self.weights[-1], a) + self.biases[-1])

    def backpropagate(self, x, y):
        nabla_b = [np.zeros(bias.shape) for bias in self.biases]
        nabla_w = [np.zeros(weight.shape) for weight in self.weights]

        activation = x
        activations = [x]
        zs = []

        # feed forward hidden layers
        for bias, weight in zip(self.biases[:-1], self.weights[:-1]):
            z = np.dot(weight, activation) + bias
            zs.append(z)
            activation = self.hidden_layers_activation_function(z)
            activations.append(activation)
        # feed forward output layer
        z = np.dot(self.weights[-1], activation) + self.biases[-1]
        zs.append(z)
        activation = self.output_layer_activation_function(z)
        activations.append(activation)

        derivative = self.cost_derivative(activations[-1], y)
        delta = derivative * \
            self.output_layer_activation_function_prime(zs[-1])

        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())

        for layer_id in range(2, self.num_layers):
            z = zs[-layer_id]
            afp = self.hidden_layers_activation_function_prime(z)
            delta = np.dot(self.weights[-layer_id+1].transpose(), delta) * afp
            nabla_b[-layer_id] = delta
            nabla_w[-layer_id] = np.dot(delta,
                                        activations[-layer_id - 1].transpose())

        return (nabla_b, nabla_w)

    def run_batch(self, batch, eta, alpha=0):
        self.prev_nabla_b = self.nabla_b
        self.prev_nabla_w = self.nabla_w

        self.nabla_b = [np.zeros(bias.shape) for bias in self.biases]
        self.nabla_w = [np.zeros(weight.shape) for weight in self.weights]

        for x, y in batch:
            delta_nabla_b, delta_nabla_w = self.backpropagate(x, y)

            self.nabla_b = [nb + dnb for nb,
                            dnb in zip(self.nabla_b, delta_nabla_b)]
            self.nabla_w = [nw + dnw for nw,
                            dnw in zip(self.nabla_w, delta_nabla_w)]

        eta_normalized = eta/len(batch)
        alpha_normalzied = alpha/len(batch)

        if self.prev_nabla_w == None:
            self.weights = [w - eta_normalized * nw
                            for w, nw in zip(self.weights, self.nabla_w)]
        else:
            self.weights = [w - eta_normalized * nw - alpha_normalzied * pnw
                            for w, nw, pnw in zip(self.weights, self.nabla_w, self.prev_nabla_w)]

        if self.is_bias_enabled:
            if self.prev_nabla_b == None:
                self.biases = [b - eta_normalized * nb
                               for b, nb in zip(self.biases, self.nabla_b)]
            else:
                self.biases = [b - eta_normalized * nb - alpha_normalzied * pnb
                               for b, nb, pnb in zip(self.biases, self.nabla_b, self.prev_nabla_b)]

    def validate_classification(self, validation_data):
        correct = 0
        for x, y in validation_data:
            expected = np.argmax(y)
            result = np.argmax(self.feed_forward(x))
            if expected == result:
                correct = correct + 1
        return correct / len(validation_data)

    def validate_regression(self, validation_data):
        error = 0
        for x, y in validation_data:
            error = error + np.square(self.feed_forward(x)-y).mean()
        return error / len(validation_data)

    def stochastic_gradient_descent_classification(self, training_data, epochs, batch_size, eta,
                                                   test_data=None, probe=None, alpha=0):
        self.epochs = epochs
        self.batch_size = batch_size
        self.eta = eta
        self.training_data_size = len(training_data)
        self.take_snapshot(f'Start {Problem.Classification}', None)
        self.problem = Problem.Classification

        for epoch in range(epochs):
            np.random.shuffle(training_data)
            batches = [training_data[i:i+batch_size] for
                       i in range(0, self.training_data_size, batch_size)]
            for batch, batch_id in zip(batches, range(1, 1 + len(batches))):
                self.run_batch(batch, eta, alpha)
                train_fitness = self.validate_classification(training_data)
                test_fitness = None
                if test_data != None:
                    test_fitness = self.validate_classification(test_data)

                self.take_snapshot(
                    f'Epoch {epoch}: [{batch_id}/{len(batches)}]', train_fitness=train_fitness, test_fitness=test_fitness)
            if probe != None:
                self.classification_area_probing.append(
                    self.classificate(probe))

    def stochastic_gradient_descent_regression(self, training_data, epochs, batch_size, eta,
                                               validation_data=None):
        self.__stochastic_gradient_descent(
            training_data, epochs, batch_size, eta, Problem.Regression, validation_data)

    def __stochastic_gradient_descent(self, training_data, epochs, batch_size, eta, problem,
                                      validation_data=None):
        self.epochs = epochs
        self.batch_size = batch_size
        self.eta = eta
        self.training_data_size = len(training_data)
        self.take_snapshot(f'Start {problem}', None)
        self.problem = problem

        for epoch in range(epochs):
            batches = [training_data[i:i+batch_size] for
                       i in range(0, self.training_data_size, batch_size)]
            for batch, batch_id in zip(batches, range(1, 1 + len(batches))):
                self.run_batch(batch, eta)
                fitness = None
                if validation_data != None:
                    if problem == Problem.Classification:
                        fitness = self.validate_classification(validation_data)
                    if problem == Problem.Regression:
                        fitness = self.validate_regression(validation_data)
                self.take_snapshot(
                    f'Epoch {epoch}: [{batch_id}/{len(batches)}]', fitness)

    def take_snapshot(self, name, train_fitness=None, test_fitness=None):
        dump = dict(biases=[bias.tolist() for bias in self.biases],
                    weights=[weight.tolist() for weight in self.weights],
                    name=name,
                    train_fitness=train_fitness,
                    test_fitness=test_fitness)
        self.history.append(dump)

    def save_history(self, out_filepath):
        network_info = dict(sizes=self.sizes, eta=self.eta,
                            epochs=self.epochs, batch_size=self.batch_size,
                            training_data_size=self.training_data_size,
                            hidden_layers_activation_function=self.hidden_layers_activation_function.__name__,
                            output_layer_activation_function=self.output_layer_activation_function.__name__)
        json.dump(dict(history=self.history, network_info=network_info),
                  codecs.open(out_filepath, 'w', encoding='utf-8'))

    def classificate(self, test_data):
        results = []
        for x in test_data:
            classification = np.argmax(self.feed_forward(x))
            results.append(classification)
        return results

    def regression_result(self, test_data):
        results = []
        for x in test_data:
            regression = self.feed_forward(x)
            results.append(regression)
        return results

**Uruchamianie:**

In [0]:
train_data, test_data, net = load_data([5], sigmoid, sigmoid_derivative, sigmoid, sigmoid_derivative, quadratic_cost_derivative, True, BASE_DIR + r'/data/classification', 'data.simple', 100)
net.stochastic_gradient_descent_classification(
    train_data, 100, 10, 0.03, test_data)
net.save_history(BASE_DIR + '/results/classification.json')


**Klasyfikacja - wykresy błędu i wizualizacja rezultatów**

**Regresja - wykresy błędu i wizualizacja rezultatów**