In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
def g_sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [None]:
def g_sigmoid_derivative(z):
    return g_sigmoid(z) * (1 - g_sigmoid(z))

In [7]:
def identity(x):
    return x

In [8]:
def hypothesis(w, x):
    if len(x.shape) == 1:
        wx = w.transpose().dot(x)[0]
    elif len(x.shape) == 2:
        wx = w.transpose().dot(x)[0][0]
    else:
        raise ValueError(f'Nekorektne dimenzije (shape) elementa {x}')
    
    return g_sigmoid(wx)

In [92]:
def unroll_matrix(matrix):
    return matrix.ravel()

def unroll_matrix_array(matrix_vec):
    unrolled = np.array([])
    for matrix in matrix_vec:
        unrolled = np.concatenate((unrolled, unroll_matrix(matrix)), axis=None)
        
    return np.array(unrolled).ravel()

def roll_vec_to_matrix(vec, matrix_size):
    rows = matrix_size[0]
    cols = matrix_size[1]
    
    if vec.shape[0] != rows * cols:
        raise ValueError(f'Nekorektno razvijanje vektora velicine {vec.shape[0]} u matricu'
                         f' dimenzija {rows}x{cols} (matrica od {rows*cols} elemenata)')
    
    matrix = np.zeros(matrix_size)
    for i in range(rows):
        matrix[i] = vec[i*cols : (i + 1)*cols]
        
    return matrix

def roll_vec_to_matrix_array(long_vec, matrix_sizes):
    matrix_array = []
    prev_rows = 0
    prev_cols = 0
    
    for matrix_size in matrix_sizes:
        rows = matrix_size[0]
        cols = matrix_size[1]
        
        i = prev_rows * prev_cols
        j = rows * cols
        matrix_array.append(roll_vec_to_matrix(long_vec[i : i+j], matrix_size))
        
        prev_rows = rows
        prev_cols = cols
        
    return np.array(matrix_array)

In [99]:
class NeuralNetwork:
    def __init__(self, input_layer_size, hidden_layer_options, output_layer_size, eps=10e-3):
        self.input_layer_size = input_layer_size
        self.hidden_layers_num = len(hidden_layer_options)
        self.layers_num = self.hidden_layers_num + 2
        self.output_layer_size = output_layer_size
        self.layer_sizes = [input_layer_size] + hidden_layer_options + [output_layer_size]
        self.layer_indices = range(self.layers_num)
        
        # NAPOMENA: velicina slojeva neuralne mreze (layer_sizes) se racuna kao broj jedinica
        # BEZ bias jedinice, ali prilikom postavljanja slojeva mreza ocekuje bias jedinice.
        # Dakle, svaki sloj je velicine za jedan vise, osim poslednjeg sloja.
        layers = [np.ones(input_layer_size + 1)]
        for i in range(self.hidden_layers_num):
            layers.append(np.ones(hidden_layer_options[i] + 1))
            
        layers.append(np.ones(output_layer_size))
        self.neural_network = np.array(layers)
        
        # Cuvam dimenzije matrica Wij koje mapiraju slojeve j -> j + 1
        # s(j+1) x (s(j) + 1)
        mappers = {}
        for l in range(self.layers_num - 1):
           rows = self.layer_sizes[l + 1]
           cols = self.layer_sizes[l] + 1
           
           mappers[l] = (rows, cols)
        
        self.layer_mapper_sizes = mappers
        
        # inicijalizacija modela, tj. matrica W_i za svaki sloj
        Ws = []
        for l in range(self.layers_num - 1):
            W_l = 2*eps * np.random.random(self.layer_mapper_sizes[l]) - eps
            Ws.append(W_l)
            
        self.model = np.array(Ws)
        
        # Analogno kao za slojeve, delte ocekuju bias jedinice, koje se NECE koristiti
        # u proracunima backpropagation algoritma. Prvi sloj mora da ima
        # podrazumevano za delte sve nule, jer ulazni podaci nemaju gresku.
        deltas = []
        for l in self.layer_indices:
            if l == self.layers_num - 1:
                delta_l = np.zeros(self.layer_sizes[l])
            else:
                delta_l = np.zeros(self.layer_sizes[l] + 1)
                
            deltas.append(delta_l)
            
        self.deltas = np.array(deltas)
        
    def __str__(self):
        self.print_network()
        
    def __repr__(self):
        self.print_network()
        
    def __layer_index_check(self, layer_index):
        if layer_index < 0 or layer_index >= self.layers_num:
            raise IndexError(f'Nekorektan indeks sloja neuralne mreze {layer_index}: '
                             f'dostupni indeksi 0-{self.layers_num - 1}')
        
    def set_layer(self, layer_index, units_vec):
        self.__layer_index_check(layer_index)
        
        # poslednji sloj NEMA bias unit
        # -1 jer mreza inicijalno ne sadrzi bias unit, a ja ocekujem vektor koji sadrzi bias unit
        if layer_index != self.layers_num - 1 and \
                units_vec.shape[0] != self.layer_sizes[layer_index] + 1:
            error_message = f'Nekorektna dimenzija vektora {units_vec.shape[0]} za sloj {layer_index}: ' \
                f'ocekivana {self.layer_sizes[layer_index] + 1}'
            
            raise ValueError(error_message)
        elif layer_index == self.layers_num - 1 and \
                units_vec.shape[0] != self.layer_sizes[layer_index]: # ovde je layer_index poslednji sloj
            error_message = f'Nekorektna dimenzija vektora {units_vec.shape[0]} za izlazni sloj {layer_index}: ' \
                f'ocekivana {self.layer_sizes[layer_index]}'
            
            raise ValueError(error_message)
        
        self.neural_network[layer_index] = units_vec
        
    def set_all_layers(self, all_layers):
        for l in range(self.layers_num):
            self.set_layer(l, all_layers[l])
            
    def set_mapper(self, layer_index, mapper):
        self.__layer_index_check(layer_index)
        
        if mapper.shape != self.model[layer_index].shape:
            error_message = f'Nekorektna dimenzija matrice {mapper.shape[0]}x{mapper.shape[1]} ' \
                f'za preslikavanje sloja {layer_index} -> {layer_index + 1}: ocekivana ' \
                f'{self.model[layer_index].shape[0]}x{self.model[layer_index].shape[1]}'
            
            raise ValueError(error_message)
        
        self.model[layer_index] = mapper
        
    # Funkcija po analogiji za slojeve, radi doslednosti
    def set_all_mappers(self, all_mappers):
        for l in range(self.layers_num - 1):
            self.set_mapper(l, all_mappers[l])
            
    def set_model(self, all_mappers):
        self.set_all_mappers(all_mappers)
        
    def set_delta(self, layer_index, delta_vec):
        self.__layer_index_check(layer_index)
        
        if layer_index != self.layers_num - 1 and \
                delta_vec.shape[0] != self.layer_sizes[layer_index] + 1:
            error_message = f'Nekorektna dimenzija vektora {delta_vec.shape[0]}:' \
                f'ocekivana {self.layer_sizes[layer_index] + 1}'
            
            raise ValueError(error_message)
        elif layer_index == self.layers_num - 1 and \
                delta_vec.shape[0] != self.layer_sizes[layer_index]:
            error_message = f'Nekorektna dimenzija vektora {delta_vec.shape[0]} za poslednji sloj: ' \
                f'ocekivana {self.layer_sizes[layer_index]}' 
            
            raise ValueError(error_message)
            
        self.deltas[layer_index] = delta_vec
        
    def set_all_deltas(self, all_deltas):
        for l in self.layer_indices:
            self.set_delta(l, all_deltas[l])
        
    def unroll_mapper(self, layer_index):
        self.__layer_index_check(layer_index)
        
        return self.model[layer_index].ravel()
    
    def unroll_model(self):
        unrolled_Ws = np.array([])
        for l in range(self.layers_num - 1):
            W_l = self.unroll_mapper(l)
            unrolled_Ws = np.concatenate((unrolled_Ws, W_l), axis=None)
            
        return np.array(unrolled_Ws).ravel()
    
    @staticmethod
    def __activate_layer(z_vec, activation_func):
        return activation_func(z_vec)
    
    def forward_propagation(self, input_layer_data):
        a_l = input_layer_data
        self.set_layer(0, a_l)
        
        for l in range(self.layers_num - 1):
            W_l = self.model[l]
            z_lp1 = W_l.dot(a_l)
            a_l = g_sigmoid(z_lp1)
            
            # Dodavanje bias jedinice u a_l vektor
            if l != self.layers_num - 2:
                a_l = np.concatenate((np.array([1]), a_l))
            
            self.set_layer(l + 1, a_l)
            
    def backward_propagation_deltas(self, y_data):
        delta_output = self.neural_network[self.layers_num - 1] - y_data
        self.set_delta(self.layers_num - 1, delta_output)
        
        for l in range(self.layers_num - 2, 0, -1):
            z_l = self.model[l - 1].dot(self.neural_network[l - 1]) # z(l) = W(l-1)*a(l-1)
            g_prim_vec = np.concatenate((np.array([1]), g_sigmoid_derivative(z_l)))
            
            if l + 1 == self.layers_num - 1:
                delta_lp1 = self.deltas[l + 1] # ne postoji bias jedinica za poslednji sloj
            else:
                delta_lp1 = self.deltas[l + 1][1:] # ignorisem bias jedinicu delta vektora
                
            delta_l = self.model[l].transpose().dot(delta_lp1) * g_prim_vec
            self.set_delta(l, delta_l)
            
        # delta_0 je uvek nula-vektor, postavljen jos prilikom inicijalizacije same mreze
        
    # Funkcija koja racuna uporedo parcijalne izvode (gradijent) i funkciju gubitka
    def backward_propagation(self, X_training, y_training, lambda_param=0):
        accs = []
        for l in range(self.layers_num - 1):
            delta_acc_l = np.zeros(self.layer_mapper_sizes[l])
            accs.append(delta_acc_l)
            
        # Delta_l akumulatori za parcijalne izvode i inicijalni parcijalni izvodi
        delta_accumulators = np.array(accs)
        gradient = np.array(accs)
        loss = 0
        
        N = X_training.shape[0]
        for i in range(N):
            self.forward_propagation(X_training[i]) # a_0 = X[i] ...
            self.backward_propagation_deltas(y_training[i])
            self.__accumulate_deltas(delta_accumulators)
            
            # OVDE je problem: akumulatori u jednoj iteraciji svi postanu nan
            print(f'Delta accumulators:')
            print(delta_accumulators)
            
            # Hocu da y_predict i y budu realne vrednosti a ne vektori velicine 1
            y_predict = self.neural_network[self.layers_num - 1][0]
            y_i = y_training[i][0]
            loss += y_i*np.log(y_predict) + (1 - y_i)*np.log(1 - y_predict)
            
            # print(f'y_predict = {y_predict}, y_{i} = {y_i}')
            # print(f'Loss_{i}: {loss}')
            
        self.__set_partial_derivatives(gradient, delta_accumulators, lambda_param)
        return -loss / N, gradient
            
    def __accumulate_deltas(self, delta_accumulators):
        for l in range(self.layers_num - 1):
            if l + 1 == self.layers_num - 1:
                delta_lp1 = self.deltas[l + 1].reshape(-1, 1)
            else:
                delta_lp1 = self.deltas[l + 1][1:].reshape(-1, 1)
                
            a_l = self.neural_network[l].reshape(-1, 1).transpose()

            delta_accumulators[l] += delta_lp1.dot(a_l)
            
    def __set_partial_derivatives(self, partial_derivatives, delta_accumulators, lambda_param):
        for l in range(self.layers_num - 1):
            for i in range(self.model[l].shape[0]):
                for j in range(self.model[l].shape[1]):
                    if j == 0:
                        partial_derivatives[l][i][j] = delta_accumulators[l][i][j] / N
                    else:
                        partial_derivatives[l][i][j] = delta_accumulators[l][i][j] / N + \
                                                       lambda_param * self.model[l][i][j]                                        
        
    def print_layer(self, layer_index):
        self.__layer_index_check(layer_index)
        
        print(f'Layer {layer_index}:')
        print(self.neural_network[layer_index])
        
    def print_network(self):
        print('~ Neural network ~')
        
        for i in range(self.neural_network.shape[0]):
            print(f'Layer {i + 1}:')
            print(self.neural_network[i])
            
    def print_layer_mapper_sizes(self):
        print('Dimenzije matrica modela koji mapiraju slojeve:')
        
        for k, v in self.layer_mapper_sizes.items():
            print(f'{k} -> {k + 1}: {v[0]} x {v[1]}')
        
    def print_mapper(self, layer_index):
        self.__layer_index_check(layer_index)
        
        print(f'W_{layer_index}: {layer_index} -> {layer_index + 1}')
        print(self.model[layer_index])           
            
    def print_model(self):
        print('Model:')
        
        for l in range(self.layers_num - 1):
            print(f'W_{l}: {l} -> {l + 1}')
            print(self.model[l])
            
    def print_delta(self, layer_index):
        self.__layer_index_check(layer_index)
        
        print(self.deltas[layer_index])
            
    def print_deltas(self):
        print('Deltas:')
        
        for l in range(self.layers_num):
            print(f'delta_{l}:')
            print(self.deltas[l])
        

In [11]:
def gradient_descent(X, y, w_init, alpha=0.01, num_iter=1000, eps=10e-4):
    loss_history = np.zeros((num_iter, 1))
    it_break = num_iter
    
    w = w_init
    for i in range(num_iter):
        # loss, gradient = loss_function(X, y, w)
        # w = w - alpha * gradient
        # loss_history[i] = loss
        
        
        if i > 0 and abs(loss_history[i] - loss_history[i-1]) <= eps:
            it_break = i
            break
            
    return loss_history, it_break, w
# ---------------------------------------------------------------------------------------------------------

# UCITAVANJE PODATAKA I TESTIRANJE

In [12]:
df = pd.read_csv('../datasets/auto-mpg.csv')
df.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model-year
0,18.0,8,307.0,130.0,3504,12.0,70
1,15.0,8,350.0,165.0,3693,11.5,70
2,18.0,8,318.0,150.0,3436,11.0,70
3,16.0,8,304.0,150.0,3433,12.0,70
4,17.0,8,302.0,140.0,3449,10.5,70


In [None]:
N = df.shape[0]
print(f'Ukupan broj podataka: {N}')

m = df.shape[1] - 1
print(f'Broj atributa svakog podatka: {m}')

attributes = list(df.drop('mpg', axis=True).columns)
print(f'Atributi: {attributes}')

Ukupan broj podataka: 398
Broj atributa svakog podatka: 6
Atributi: ['cylinders', 'displacement', 'horsepower', 'weight', 'acceleration', 'model-year']


In [14]:
X = np.ones((N, m + 1))
for i, attr in enumerate(attributes):
    X[:, i + 1] = df[attr]
    
print('~ Podaci ~')
print(X)

y = df['mpg']
y = np.array(y).reshape(-1, 1)

print('~ Ciljne promenljive ~')
print(y[:5])
print(' ...')

~ Podaci ~
[[1.000e+00 8.000e+00 3.070e+02 ... 3.504e+03 1.200e+01 7.000e+01]
 [1.000e+00 8.000e+00 3.500e+02 ... 3.693e+03 1.150e+01 7.000e+01]
 [1.000e+00 8.000e+00 3.180e+02 ... 3.436e+03 1.100e+01 7.000e+01]
 ...
 [1.000e+00 4.000e+00 1.350e+02 ... 2.295e+03 1.160e+01 8.200e+01]
 [1.000e+00 4.000e+00 1.200e+02 ... 2.625e+03 1.860e+01 8.200e+01]
 [1.000e+00 4.000e+00 1.190e+02 ... 2.720e+03 1.940e+01 8.200e+01]]
~ Ciljne promenljive ~
[[18.]
 [15.]
 [18.]
 [16.]
 [17.]]
 ...


In [100]:
nn = NeuralNetwork(m, [3, 4], 1)
nn.print_network()
print()

nn.print_layer_mapper_sizes()
print()

nn.print_model()

~ Neural network ~
Layer 1:
[1. 1. 1. 1. 1. 1. 1.]
Layer 2:
[1. 1. 1. 1.]
Layer 3:
[1. 1. 1. 1. 1.]
Layer 4:
[1.]

Dimenzije matrica modela koji mapiraju slojeve:
0 -> 1: 3 x 7
1 -> 2: 4 x 4
2 -> 3: 1 x 5

Model:
W_0: 0 -> 1
[[-0.00900768  0.00940961  0.00330907  0.00610733  0.00615164  0.00584938
  -0.00362416]
 [ 0.00380569 -0.00566026 -0.00893182 -0.00738214 -0.0015806  -0.00707129
  -0.0015423 ]
 [-0.0095262  -0.00583721 -0.00447285 -0.00957816 -0.0068259  -0.00046939
   0.0070976 ]]
W_1: 1 -> 2
[[ 0.00818788  0.00192153  0.00338092  0.00037704]
 [ 0.00889514 -0.00795702  0.00829218  0.00035336]
 [-0.00523314  0.00884447  0.00116489  0.00745759]
 [-0.00505942  0.00971626  0.0057214   0.00460395]]
W_2: 2 -> 3
[[ 0.0091985   0.00650294  0.0095527   0.00211727 -0.00136243]]


In [None]:
# W_0 = np.array([[1, 1, 1, 1, 1, 1, 1], [2, 2, 2, 2, 2, 2, 2], [3, 3, 3, 3, 3, 3, 3]])
# W_1 = np.array([[-1, -1, -1, -1], [-0.1, -0.2, -0.3, -0.4], [7, 7, 4, 4], [0, 2, 0, -3]])
# W_2 = np.array([[5, 2, -5, -5, 10]])
# # W_0 = np.array([[10, 1]])
# # W_1 = np.array([[7, -10]])
# 
# Ws = np.array([W_0, W_1, W_2])
# nn.set_model(Ws)
# nn.print_model()

In [102]:
unrolled_model = nn.unroll_model()
print('Unrolled model:')
print(unrolled_model)

Unrolled model:
[-0.00900768  0.00940961  0.00330907  0.00610733  0.00615164  0.00584938
 -0.00362416  0.00380569 -0.00566026 -0.00893182 -0.00738214 -0.0015806
 -0.00707129 -0.0015423  -0.0095262  -0.00583721 -0.00447285 -0.00957816
 -0.0068259  -0.00046939  0.0070976   0.00818788  0.00192153  0.00338092
  0.00037704  0.00889514 -0.00795702  0.00829218  0.00035336 -0.00523314
  0.00884447  0.00116489  0.00745759 -0.00505942  0.00971626  0.0057214
  0.00460395  0.0091985   0.00650294  0.0095527   0.00211727 -0.00136243]


In [103]:
y_train = np.random.randint(0, 2, (N, 1))
print(y_train)

[[0]
 [1]
 [0]
 [1]
 [1]
 [0]
 [0]
 [0]
 [0]
 [1]
 [0]
 [0]
 [1]
 [1]
 [1]
 [0]
 [0]
 [1]
 [0]
 [0]
 [0]
 [1]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [1]
 [1]
 [1]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [1]
 [0]
 [0]
 [0]
 [1]
 [0]
 [0]
 [0]
 [0]
 [0]
 [1]
 [0]
 [1]
 [1]
 [0]
 [0]
 [1]
 [0]
 [0]
 [0]
 [0]
 [1]
 [0]
 [1]
 [1]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [1]
 [0]
 [0]
 [1]
 [0]
 [1]
 [1]
 [1]
 [1]
 [1]
 [0]
 [0]
 [1]
 [1]
 [1]
 [0]
 [1]
 [1]
 [1]
 [0]
 [1]
 [0]
 [1]
 [0]
 [0]
 [1]
 [0]
 [0]
 [0]
 [0]
 [1]
 [0]
 [0]
 [1]
 [1]
 [0]
 [0]
 [0]
 [0]
 [1]
 [0]
 [1]
 [0]
 [1]
 [1]
 [0]
 [1]
 [0]
 [0]
 [1]
 [1]
 [0]
 [0]
 [0]
 [1]
 [0]
 [1]
 [0]
 [0]
 [1]
 [1]
 [0]
 [1]
 [1]
 [0]
 [0]
 [1]
 [1]
 [0]
 [0]
 [1]
 [0]
 [0]
 [1]
 [0]
 [1]
 [0]
 [1]
 [0]
 [0]
 [0]
 [0]
 [1]
 [0]
 [0]
 [1]
 [1]
 [0]
 [1]
 [1]
 [0]
 [0]
 [0]
 [0]
 [1]
 [1]
 [0]
 [0]
 [0]
 [0]
 [1]
 [1]
 [1]
 [0]
 [0]
 [1]
 [0]
 [1]
 [1]
 [1]
 [0]
 [1]
 [1]
 [0]
 [0]
 [0]
 [0]
 [0]
 [1]
 [0]
 [1]
 [1]
 [1]
 [1]
 [0]
 [1]
 [0]
 [1]


In [104]:
loss, gradient = nn.backward_propagation(X, y_train)
print(f'\nLoss = {loss}')
print(f'\nGradient:')
print(gradient)

Delta accumulators:
[array([[-5.85999298e-16, -4.68799438e-15, -1.79901784e-13,
        -7.61799087e-14, -2.05334154e-12, -7.03199157e-15,
        -4.10199508e-14],
       [ 9.28156983e-10,  7.42525586e-09,  2.84944194e-07,
         1.20660408e-07,  3.25226207e-06,  1.11378838e-08,
         6.49709888e-08],
       [ 8.93350095e-18,  7.14680076e-17,  2.74258479e-15,
         1.16135512e-15,  3.13029873e-14,  1.07202011e-16,
         6.25345067e-16]])
 array([[ 8.20009174e-04,  8.20009173e-04,  6.29612488e-08,
         3.78590208e-15],
       [ 1.20460834e-03,  1.20460834e-03,  9.24912155e-08,
         5.56155876e-15],
       [ 2.66989545e-04,  2.66989545e-04,  2.04997648e-08,
         1.23266459e-15],
       [-1.71803649e-04, -1.71803649e-04, -1.31912821e-08,
        -7.93200630e-16]])
 array([[0.50440557, 0.25347762, 0.25232116, 0.25265819, 0.25279007]])]
Delta accumulators:
[array([[-4.59541078e-16, -3.67632862e-15, -1.35641407e-13,
        -5.53143024e-14, -1.58633133e-12, -5.5777220

In [86]:
v = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])

rolled_v = roll_vec_to_matrix(v, (12, 1))
print(rolled_v)

[[ 1.]
 [ 2.]
 [ 3.]
 [ 4.]
 [ 5.]
 [ 6.]
 [ 7.]
 [ 8.]
 [ 9.]
 [10.]
 [11.]
 [12.]]


In [None]:
long_v = np.ones((23,))
matrix_sizes = [(2, 3), (3, 4), (5, 1)]

matrix_array = roll_vec_to_matrix_array(long_v, matrix_sizes)
print(matrix_array)

[array([[1., 1., 1.],
       [1., 1., 1.]])
 array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])
 array([[1.],
       [1.],
       [1.],
       [1.],
       [1.]])]


In [94]:
print(unroll_matrix_array(matrix_array))


[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
