# Method 3: Neural network

In [2]:
import pandas as pd
import plotly as py
import plotly.graph_objs as go
import plotly.express as px
import kaleido
import os
from PIL import Image

import matplotlib.pyplot as plt
import seaborn as sns

import numpy as np
import numdifftools as nd
from numpy.random import uniform
import math as m
import time
from numba import jit

from scipy import stats

from sklearn import datasets
from sklearn.model_selection import train_test_split , KFold
from sklearn.preprocessing import Normalizer
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
import warnings

from collections import Counter
# setup offline mode
py.offline.init_notebook_mode(connected=True)

pd.set_option('display.max_rows', 20)
warnings.filterwarnings('ignore')

In [15]:
CZ_ALPHABET = ['a', 'á', 'b', 'c', 'č', 'd', 'ď',
               'e', 'é', 'ě', 'f', 'g', 'h', 'ch',
               'i', 'í', 'j', 'k', 'l', 'm', 'n',
               'ň', 'o', 'ó', 'p', 'q', 'r', 'ř',
               's', 'š', 't', 'ť', 'u', 'ú','ů',
               'v', 'w', 'x', 'y', 'ý','z', 'ž', ' ']

def word_to_vector(word: str) -> np.array:
    array = np.zeros(len(CZ_ALPHABET))
    for i, alph in enumerate(CZ_ALPHABET):
        array[i] = word.count(alph)
    return pd.Series(array)

def calculate_hidden_layer_size(Ns: int, Ni: int, No: int, alpha: int) -> float:
    Nh = Ns / (alpha * (Ni - No))
    return Nh

def sigmoid(x):
    y = 1 / (1 + np.e ** (-x))
    return y

def sigmoid_derivative(x):
    y = sigmoid(x) * (1 - sigmoid(x))
    return y

def ReLU(x):
    y = x if x >= 0 else 0
    return y

array([0.09003057, 0.24472847, 0.66524096])

In [4]:
data = pd.read_csv('data.csv', encoding = 'ansi', usecols=['Obec', 'Kraj'])
data['Obec'] = data['Obec'].str.lower()
output_dict = list(np.sort(data['Kraj'].unique()).flatten())

data[CZ_ALPHABET] = data['Obec'].apply(word_to_vector)
data['expected'] = data['Kraj'].apply(lambda x: (np.array(output_dict) == x).astype(int))

data.head()

Unnamed: 0,Obec,Kraj,a,á,b,c,č,d,ď,e,...,ů,v,w,x,y,ý,z,ž,Unnamed: 20,expected
0,abertamy,Karlovarský kraj,0.632456,0.0,0.316228,0.0,0.0,0.0,0.0,0.316228,...,0.0,0.0,0.0,0.0,0.316228,0.0,0.0,0.0,0.0,"[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
1,adamov,Jihočeský kraj,0.707107,0.0,0.0,0.0,0.0,0.353553,0.0,0.0,...,0.0,0.353553,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
2,adamov,Jihomoravský kraj,0.707107,0.0,0.0,0.0,0.0,0.353553,0.0,0.0,...,0.0,0.353553,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
3,adamov,Středočeský kraj,0.707107,0.0,0.0,0.0,0.0,0.353553,0.0,0.0,...,0.0,0.353553,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]"
4,adršpach,Královéhradecký kraj,0.603023,0.0,0.0,0.301511,0.0,0.301511,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]"


Using the following to get the size of the hidden layer

$$
N_\mathrm{h} = \frac{N_\mathrm{s}}{\alpha (N_\mathrm{i} + N_\mathrm{o})}
$$
- $N_\mathrm{s}$ - number of samples in training data
- $N_\mathrm{i}$ - number of input neurons (43)
- $N_\mathrm{o}$ - number of output neurons (14)
- $\alpha$ - an arbitrary number usually between 2 and 10

In [40]:
class NN:
    def __init__(self, size: np.array, func: str = 'sigmoid', x_train: np.ndarray = None,
                 y_train: np.array = None, eta: float = 0.01, backpropagation: bool = False):

        self.train = x_train
        self.expected = y_train
        self.size = size
        self.func = func
        self.eta = eta
        self.backpropagation = backpropagation
        self.layers = np.array([uniform(size=n) for n in size], dtype=object)

        self.n_layers = len(size)

        self.n_weights = np.dot(size[:-1], size[1:])
        self.n_biases = np.sum(size[1:])

        self.n_params = self.n_weights + self.n_biases
        self.params = uniform(size=self.n_params)
        # initialize weights
        self.W = None
        self.update_weights()
        # initialize biases
        self.B = None
        self.update_biases()

    def a(self, L: int, k: int):
        if L == 0:
            a = layers[0, k]
        else:
            a = sigmoid(self.z(L=L, k=k))
        return a

    def z(self, L: int, k: int):
        z = np.dot(self.W[L-1, k] * self.layers[L-1]) + self.B[L-1, k]
        return z

    def w(self, L: int, i: int, j: int):
        w = self.W[L-1][i, j]
        return w

    def b(self, L: int, k: int):
        b = self.B[L-1, k]
        return b

    def update_weights(self):
        weights = self.params[:self.n_weights]
        lengths = [self.size[i] * self.size[i+1] for i in range(self.n_layers-1)]
        slices = self.slice_from_lengths(lengths=lengths)
        weights = np.split(weights, slices)[:-1]

        for i in range(len(weights)):
            newshape = (self.size[i+1], self.size[i])
            weights[i] = weights[i].reshape(newshape)
        self.W = weights

    def update_biases(self):
        biases = self.params[:self.n_biases]
        lengths = self.size[1:]
        slices = self.slice_from_lengths(lengths=lengths)
        biases = np.split(biases, slices)
        self.B = biases

    def propagate_forward(self):
        self.update_weights()
        self.update_biases()
        f = ReLU if self.func == 'ReLU' else sigmoid
        for i in range(self.n_layers-1):
            self.layers[i+1] = f(np.dot(self.W[i], self.layers[i]) + self.B[i])

    def feed_input(self, layer0):
        self.layers[0] = layer0

    def get_output(self):
        return self.layers[-1]

    def slice_from_lengths(self, lengths):
        slices = [np.sum(lengths[:i]) for i in range(self.n_layers)][1:]
        return slices

    def predict(self, vector: np.array):
        if vector.shape[0] != self.size[0]:
            print(f'Vector shape {vector.shape} does not match layer shape {self.size[0]}')
            raise TypeError
        self.feed_input(vector)
        self.propagate_forward()
        prediction = self.get_output()
        return prediction

    def predict_word(self, word: str):
        vector = word_to_vector(word)
        return self.predict(vector)

    def cost(self, vector: np.array, expected: np.array):
        prediction = self.predict(vector)
        c = np.sum((prediction - expected) ** 2)
        return c

    def cost_of_sample(self, params: np.ndarray):
        sample_size = len(self.train)
        self.params = params
        costs = np.array([self.cost(self.train[i], self.expected[i]) for i in range(sample_size)])
        return np.mean(costs)

    def neg_grad(self):
        if self.backpropagation:
            grad = 0
        else:
            grad = nd.Gradient(self.cost_of_sample)(self.params)
        return -grad

    def d_l(self, l: int):
        if l == self.n_layers - 1:
            d_l = np.multiply(self.dE_dA(l), self.dA_dZ(l))
        else:
            d_l = np.dot(self.d_l(l+1).T, self.W[l])
            d_l = np.multiply(d_l, self.dA_dZ(l))
        return d_l

    def X(self, l: int):
        return self.layers[l]

    def dE_dA(self, l: int):
        if l == self.n_layers - 1:
            pass

    def dA_dZ(self, l: int):
        pass

    def dE_dW(self, l: int):
        dE_dW = np.multiply(self.d_l(l), self.X(l-1))


    def train_network(self):
        neg_grad = self.neg_grad()
        norm_factor = self.eta * np.linalg.norm(self.params) / np.linalg.norm(neg_grad)
        self.params += norm_factor * neg_grad

    def compute_accuracy(self, x_test: np.ndarray, y_test: np.ndarray):
        pass

In [41]:
# normalizovat input!!!

x = np.array(data[CZ_ALPHABET])
y = np.array(data['expected'])

X_TRAIN, X_TEST, Y_TRAIN, Y_TEST= train_test_split(x, y, test_size = 0.2, shuffle = True, random_state = 0)

# normalize input?
Ns = len(X_TRAIN)
Ni = len(CZ_ALPHABET)
No = len(output_dict)
ALPHA = 5
FUNC = 'sigmoid'
ETA = 0.05
N_ITER = 8

X_TRAIN

array([[0.19245009, 0.        , 0.        , ..., 0.        , 0.        ,
        0.19245009],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.37796447, 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.40824829, 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ]])

In [42]:
np.random.seed(0)

hidden_layer_size = int(np.ceil(calculate_hidden_layer_size(Ns=Ns, Ni=Ni, No=No, alpha=ALPHA)))
network_size = np.array([Ni, hidden_layer_size, No], dtype=object)


print(f'Training the neural network with a sample of {Ns} names.')
print(f'alpha = {ALPHA} → optimal hidden layer size: {hidden_layer_size}')
print(f'Initialising a neural network with shape {(Ni, hidden_layer_size, No)}')
print(f'Activation function is set to {FUNC}')
print(f'Learning rate is set to {ETA}')


network = NN(size=network_size, func=FUNC, x_train=X_TRAIN[:700], y_train=Y_TRAIN[:700], eta=ETA, backpropagation=False)

print(network.W[0].shape)
start = time.time()
print('Starting learning session')
# network.cost_of_sample(vector_sample=vector_sample, expected_sample=expected_sample)
print('Cost before', network.cost_of_sample(network.params))

for i in range(N_ITER):
    s = time.time()
    print(f'Currently at i = {i}', end='\r')
    network.train_network()
    e = time.time()
    print(f'Iteration took {e - s:.2f} seconds')

print('Cost after', network.cost_of_sample(network.params))
end = time.time()
print(f'Finished in {end-start:.2f} seconds')

# print(network.cost_of_sample(vector_sample=vector_sample, expected_sample=expected_sample))


Training the neural network with a sample of 5007 names.
Initialising a neural network with shape (43, 20, 14)
Activation function is set to sigmoid
Learning rate is set to 0.05
Network size: [43 20 14]
[array([0.12019656, 0.2961402 , 0.11872772, 0.31798318, 0.41426299,
       0.0641475 , 0.69247212, 0.56660145, 0.26538949, 0.52324805,
       0.09394051, 0.5759465 , 0.9292962 , 0.31856895, 0.66741038,
       0.13179786, 0.7163272 , 0.28940609, 0.18319136, 0.58651293]), array([0.02010755, 0.82894003, 0.00469548, 0.67781654, 0.27000797,
       0.73519402, 0.96218855, 0.24875314, 0.57615733, 0.59204193,
       0.57225191, 0.22308163, 0.95274901, 0.44712538])]
Starting learning session
Cost before 1.0039495219832373
Currently at i = 0

KeyboardInterrupt: 

In [172]:
data['prediction'] = data['Obec'].apply(lambda x: output_dict[np.argmax(network.predict_word(word=x))])
data['is_correct'] = data['Kraj'] == data['prediction']

data['is_correct'].mean()

0.034350535229269855

In [34]:
def function(u: np.array, v:np.array):
    return np.cross(u, v)

function(np.array([1, 0]), np.array([0, 1]))

u = np.array([1, 0, 0])
v = np.array([0, 1, 0])
f(u, v)

30.923986673355103


In [11]:
def f(u, v):
    return u + v

u = np.array([0, 0, 0])
v = np.array([1, 2, 3])

np.diagonal(nd.Gradient(f)(u, v))

array([1., 1., 1.])

In [13]:
arr = np.tile([1, 2, 3], (3, 1))
arr.shape

(3, 3)

In [None]:
;