<a href="https://colab.research.google.com/github/Xeena2812/71f3914c00cec6ea10b81e91d3262177#file-deep-learning-homework-2-ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np
from tensorboardX import SummaryWriter
from datetime import datetime
from sklearn import preprocessing

logdir = "logs"

In [2]:
def activation(x):
    return 1.0 / (1.0 + np.exp(-x))

def dactivation(x):
    return np.exp(-x)/(1.0 + np.exp(-x))**2

In [21]:
#  (2,3,1)
#There are reshapes of matrices, these are needed because numpy often outputs
# (n, ) instead of (n, 1) as a result of an aoperation, but do not impact the outcome
#
#  The main change is at line 66 
class MLP:

    def __init__(self, *args):
        np.random.seed(123)
        self.shape = args
        n = len(args)
        
        self.layers = []

        self.layers.append(np.ones(self.shape[0]+1))

        for i in range(1, n):
            self.layers.append(np.ones(self.shape[i]))

        self.weights = []
        for i in range(n-1):
            self.weights.append(
                np.zeros((self.layers[i].size, self.layers[i+1].size)))

        self.dw = [0, ]*len(self.weights)

        self.reset()

    def reset(self):
        for i in range(len(self.weights)):
            Z = np.random.random((self.weights[i].shape[0], self.weights[i].shape[1]))

            self.weights[i][...] = (2*Z-1)*1

    def propagate_forward(self, data):
        data = np.append(data, [[1]] * data.shape[0], axis=1)
        
        self.layers[0] = data

        for i in range(1, len(self.shape)):
            s_i = np.dot(self.layers[i-1], self.weights[i-1])
            self.layers[i] = activation(s_i).reshape(s_i.shape[0], s_i.shape[1])

        return self.layers[-1]

    def propagate_backward(self, target, lrate=0.1):
        deltas = []
        
        target = target.reshape(-1, 1)
        derror = -(target-self.layers[-1])
        s_last = np.dot(self.layers[-2], self.weights[-1])
        delta_last = derror * dactivation(s_last).reshape(s_last.shape[0], s_last.shape[1])
        
        deltas.append(delta_last)
        
        for i in range(len(self.shape)-2, 0, -1):
            s_i = np.dot(self.layers[i-1], self.weights[i-1])
            delta_i = np.dot(deltas[0], self.weights[i].T)
            delta_i *= (dactivation(s_i).reshape(s_i.shape[0], s_i.shape[1]))
            deltas.insert(0, delta_i)

        for i in range(len(self.weights)):
            layer = np.atleast_2d(self.layers[i])
            delta = np.atleast_2d(deltas[i])
            #All the differences from the batches are added up in 
            #the dot product so we need to divide by the batch size
            dw = -lrate*np.dot(layer.T, delta) / target.shape[0]
            self.weights[i] += dw
            
            self.dw[i] = dw

        error = (target - self.layers[-1])**2

        return error.sum()
 

def learn(network, X, Y, valid_split, test_split, write, epochs=20, lrate=0.1, batch_size=1):

    nb_samples = len(Y)
    X_train = X[0:int(nb_samples*(1-valid_split-test_split))]
    Y_train = Y[0:int(nb_samples*(1-valid_split-test_split))]
    X_valid = X[int(nb_samples*(1-valid_split-test_split))                :int(nb_samples*(1-test_split))]
    Y_valid = Y[int(nb_samples*(1-valid_split-test_split))                :int(nb_samples*(1-test_split))]
    X_test = X[int(nb_samples*(1-test_split)):]
    Y_test = Y[int(nb_samples*(1-test_split)):]

    scaler = preprocessing.StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_valid = scaler.fit_transform(X_valid)
    X_test = scaler.fit_transform(X_test)

    #Also randomizing the validation and test data yields better result in validation and test error
    randperm = np.random.permutation(len(X_train))
    X_train, Y_train = X_train[randperm], Y_train[randperm]
    randperm = np.random.permutation(len(X_valid))
    X_valid, Y_valid = X_valid[randperm], Y_valid[randperm]
    randperm = np.random.permutation(len(X_test))
    X_test, Y_test = X_test[randperm], Y_test[randperm]

    for i in range(epochs):
        train_err = 0
        end = X_train.shape[0] - (X_train.shape[0] % batch_size)
        
        for k in range(0, end, batch_size):
            network.propagate_forward(X_train[k:k+batch_size])
            train_err += network.propagate_backward(Y_train[k:k+batch_size], lrate)

        if(X_train.shape[0] % batch_size != 0):
            network.propagate_forward(X_train[end:])
            train_err += network.propagate_backward(Y_train[end:], lrate)
        
        train_err /= X_train.shape[0]

        valid_err = 0
        o_valid = np.zeros((X_valid.shape[0], 1))
        #Remainder in case the training data is not divisible by batch size
        end = X_valid.shape[0] - (X_valid.shape[0] % batch_size)
        for k in range(0, end, batch_size):
            o_valid[k:k+batch_size] = network.propagate_forward(X_valid[k:k+batch_size])
            valid_err += ((o_valid[k:k+batch_size]-Y_valid[k:k+batch_size])**2).sum()

        if(X_valid.shape[0] % batch_size != 0):
            o_valid[end:] = network.propagate_forward(X_valid[end:])
            valid_err += ((o_valid[end:]-Y_valid[end:])**2).sum()

        valid_err /= X_valid.shape[0]

        write.add_scalar('train', scalar_value=train_err, global_step=i)
        write.add_scalar('validation', scalar_value=valid_err, global_step=i)
        print("{} epoch, train_err: {}, valid_err: {}".format(
            i, train_err, valid_err))

    print("TESZT")
    test_err = 0
    o_test = np.zeros((X_test.shape[0], 1))
    end = X_test.shape[0] - (X_test.shape[0] % batch_size)
    for k in range(0, end, batch_size):
        o_test[k:k+batch_size] = network.propagate_forward(X_test[k:k+batch_size])
        test_err += ((o_test[k:k+batch_size]-Y_test[k:k+batch_size])**2).sum()

    if(X_test.shape[0] % batch_size != 0):
        o_test[end:] = network.propagate_forward(X_test[end:])
        test_err += ((o_test[end:]-Y_test[end:])**2).sum()
 
    test_err /= X_test.shape[0]
    print(test_err)


In [22]:
network = MLP (2,10,1)


In [23]:
nb_samples=1000
X = np.zeros((nb_samples,2))
Y = np.zeros(nb_samples)
for i in range(0,nb_samples,4):
    noise = np.random.normal(0,1,8)
    X[i], Y[i] = (-2+noise[0],-2+noise[1]), 0
    X[i+1], Y[i+1] = (2+noise[2],-2+noise[3]), 1
    X[i+2], Y[i+2] = (-2+noise[4],2+noise[5]), 1
    X[i+3], Y[i+3] = (2+noise[6],2+noise[7]), 0

In [26]:
# a pontos időt lekérdezzük majd string-re alakítjuk
now = datetime.now()
date_time = now.strftime("%Y%m%d_%H-%M-%S")
writer = SummaryWriter(logdir+"/"+date_time, flush_secs=1)


# Tanítás/Tesztelés indítása
network.reset()
valid_split = 0.2; test_split = 0.1
#I cannot for the life of me figure otu whats wrong and why
#the validation and training error increases instead of decreasing
learn(network, X, Y, valid_split, test_split, writer, 100, lrate=0.1, batch_size=1000)

0 epoch, train_err: 0.2516715411958866, valid_err: 50.69295608356027
1 epoch, train_err: 0.25154554746715463, valid_err: 50.66987724766405
2 epoch, train_err: 0.2514236794440746, valid_err: 50.64761100098168
3 epoch, train_err: 0.2513057953628182, valid_err: 50.62612966723297
4 epoch, train_err: 0.2511917578041762, valid_err: 50.60540641831608
5 epoch, train_err: 0.25108143359892976, valid_err: 50.58541525550812
6 epoch, train_err: 0.25097469373228537, valid_err: 50.56613099052046
7 epoch, train_err: 0.2508714132476767, valid_err: 50.54752922646563
8 epoch, train_err: 0.25077147115021087, valid_err: 50.52958633878711
9 epoch, train_err: 0.2506747503100154, valid_err: 50.51227945619965
10 epoch, train_err: 0.25058113736571686, valid_err: 50.49558644168275
11 epoch, train_err: 0.2504905226282642, valid_err: 50.4794858735668
12 epoch, train_err: 0.250402799985289, valid_err: 50.463957026747096
13 epoch, train_err: 0.25031786680617507, valid_err: 50.448979854057626
14 epoch, train_err: 0.2

In [7]:
%tensorboard --logdir logs


UsageError: Line magic function `%tensorboard` not found.
