# Method 3: Neural network

In [1]:
import pandas as pd
import plotly as py
import plotly.graph_objs as go
import plotly.express as px
import kaleido
import os
from PIL import Image

import matplotlib.pyplot as plt
import seaborn as sns

import numpy as np
import numdifftools as nd
from numpy.random import uniform
import math as m
import time

from scipy import stats

from sklearn import datasets
from sklearn.model_selection import train_test_split , KFold
from sklearn.preprocessing import Normalizer
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier

from collections import Counter
# setup offline mode
py.offline.init_notebook_mode(connected=True)

pd.set_option('display.max_rows', 20)

In [2]:
CZ_ALPHABET = ['a', 'á', 'b', 'c', 'č', 'd', 'ď',
               'e', 'é', 'ě', 'f', 'g', 'h', 'ch',
               'i', 'í', 'j', 'k', 'l', 'm', 'n',
               'ň', 'o', 'ó', 'p', 'q', 'r', 'ř',
               's', 'š', 't', 'ť', 'u', 'ú','ů',
               'v', 'w', 'x', 'y', 'ý','z', 'ž', ' ']

def word_to_vector(word: str) -> np.array:
    array = np.zeros(len(CZ_ALPHABET))
    for i, alph in enumerate(CZ_ALPHABET):
        array[i] = word.count(alph)
    return pd.Series(array)

def calculate_hidden_layer_size(Ns: int, Ni: int, No: int, alpha: int) -> float:
    Nh = Ns / (alpha * (Ni - No))
    return Nh

def sigmoid(x):
    y = 1 / (1 + np.e ** (-x))
    return y

def ReLU(x):
    y = x if x >= 0 else 0
    return y

In [3]:
data = pd.read_csv('data.csv', encoding = 'ansi', usecols=['Obec', 'Kraj'])
data['Obec'] = data['Obec'].str.lower()
output_dict = list(np.sort(data['Kraj'].unique()).flatten())

data[CZ_ALPHABET] = data['Obec'].apply(word_to_vector)

data.head()

Unnamed: 0,Obec,Kraj,a,á,b,c,č,d,ď,e,...,ú,ů,v,w,x,y,ý,z,ž,Unnamed: 21
0,abertamy,Karlovarský kraj,2.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,adamov,Jihočeský kraj,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,adamov,Jihomoravský kraj,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,adamov,Středočeský kraj,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,adršpach,Královéhradecký kraj,2.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Using the following to get the size of the hidden layer

$$
N_\mathrm{h} = \frac{N_\mathrm{s}}{\alpha (N_\mathrm{i} + N_\mathrm{o})}
$$
- $N_\mathrm{s}$ - number of samples in training data
- $N_\mathrm{i}$ - number of input neurons (43)
- $N_\mathrm{o}$ - number of output neurons (14)
- $\alpha$ - an arbitrary number usually between 2 and 10

In [13]:
class NN:
    def __init__(self, size: np.array, func: str = 'sigmoid', train_sample: np.ndarray = None,
                 expected_sample: np.array = None):
        if train_sample is None or expected_sample is None:
            print('Check training and expected data: None was passed!')
        self.train = train_sample
        self.expected = expected_sample
        self.size = size
        self.func = func
        self.layers = np.array([uniform(size=n) for n in size], dtype=object)

        self.n_layers = len(size)

        self.n_weights = np.dot(size[:-1], size[1:])
        self.n_biases = np.sum(size[1:])

        self.n_params = self.n_weights + self.n_biases
        self.params = uniform(size=self.n_params)
        # initialize weights
        self.W = None
        self.weights()
        # initialize biases
        self.b = None
        self.biases()
        self.biases = np.array([uniform(size=n) for n in size[1:]], dtype=object)

    def weights(self):
        weights = self.params[:self.n_weights]
        lengths = [self.size[i] * self.size[i+1] for i in range(self.n_layers-1)]
        slices = self.slice_from_lengths(lengths=lengths)
        weights = np.split(weights, slices)[:-1]

        for i in range(len(weights)):
            newshape = (self.size[i+1], self.size[i])
            weights[i] = weights[i].reshape(newshape)
        self.W = weights

    def biases(self):
        biases = self.params[:self.n_biases]
        lengths = self.size[1:]
        slices = self.slice_from_lengths(lengths=lengths)
        biases = np.split(biases, slices)
        self.b = biases

    def propagate_forward(self):
        f = ReLU if self.func == 'ReLU' else sigmoid
        for i in range(self.n_layers-1):
            self.layers[i+1] = f(np.dot(self.W[i], self.layers[i]) + self.b[i])

    def feed_input(self, layer0):
        self.layers[0] = layer0

    def get_output(self):
        return self.layers[-1]

    def slice_from_lengths(self, lengths):
        slices = [np.sum(lengths[:i]) for i in range(self.n_layers)][1:]
        return slices

    def predict(self, vector: np.array):
        if vector.shape[0] != self.size[0]:
            print(f'Vector shape {vector.shape} does not match layer shape {self.size[0]}')
            raise TypeError
        self.feed_input(vector)
        self.propagate_forward()
        prediction = self.get_output()
        return prediction

    def predict_word(self, word: str):
        vector = word_to_vector(word)
        return self.predict(vector)

    def cost(self, vector: np.array, expected: np.array):
        prediction = self.predict(vector)
        c = np.sum((prediction - expected) ** 2)
        return c

    def cost_of_sample(self, params: np.ndarray):
        sample_size = len(self.train)
        self.params = params
        costs = np.array([self.cost(self.train[i], self.expected[i]) for i in range(sample_size)])
        return np.mean(costs)

    def neg_grad(self):
        grad = nd.Gradient(self.cost_of_sample)(self.params)
        return -grad

    def train(self, vector: np.array, expected: np.array):
        c = self.cost(vector=vector, expected=expected)
        pass

In [14]:
x = data['Obec']
y = data[CZ_ALPHABET]

x_train, x_test, y_train, y_test= train_test_split(x, y,
                                                   test_size = 0.2,
                                                   shuffle = True,
                                                   random_state = 0)

In [15]:
np.random.seed(0)

Ns = len(x_train)
Ni = len(CZ_ALPHABET)
No = len(output_dict)
ALPHA = 5
FUNC = 'sigmoid'
SAMPLE_SIZE = 10

hidden_layer_size = int(np.ceil(calculate_hidden_layer_size(Ns=Ns, Ni=Ni, No=No, alpha=ALPHA)))
network_size = np.array([Ni, hidden_layer_size, No], dtype=object)
train_sample = np.array(data[CZ_ALPHABET])[:SAMPLE_SIZE]
expected_sample = uniform(size=(6259, 14))[:SAMPLE_SIZE]

print(f'Training the neural network with a sample of {Ns} names.')
print(f'alpha = {ALPHA} → optimal hidden layer size: {hidden_layer_size}')
print(f'Initialising a neural network with shape {(Ni, hidden_layer_size, No)}')
print(f'Activation function is set to {FUNC}')

network = NN(size=network_size, func=FUNC, train_sample=train_sample, expected_sample=expected_sample)

WORD = 'Zdařilá Víska'
word_vector = word_to_vector(WORD)
#network.predict_word(word='Zdařilá Víska')

print('train_sample.shape', train_sample.shape)
print('expected_sample.shape', expected_sample.shape)
start = time.time()
print('starting')
# network.cost_of_sample(vector_sample=vector_sample, expected_sample=expected_sample)
print(network.neg_grad())
end = time.time()
print('Finished in', end-start)
print('Average time per row:', (end-start) / SAMPLE_SIZE)
# print(network.cost_of_sample(vector_sample=vector_sample, expected_sample=expected_sample))

Training the neural network with a sample of 5007 names.
alpha = 5 → optimal hidden layer size: 35
Initialising a neural network with shape (43, 35, 14)
Activation function is set to sigmoid
train_sample.shape (10, 43)
expected_sample.shape (10, 14)
starting
[-0. -0. -0. ... -0. -0. -0.]
Finished in 15.315056085586548
Average time per row: 1.5315056085586547


In [6]:
def cost(vector, expected):
    return np.sum((vector - expected) ** 2)

def cost_of_sample(vector_sample: np.ndarray, expected_sample: np.ndarray):
    costs = np.array(list(map(cost, vector_sample, expected_sample)))
    return np.mean(costs)

vector_sample = np.ones(shape=(5, 2))
expected_sample = np.array(uniform(size=(5,2)))

cost_of_sample(vector_sample=vector_sample, expected_sample=expected_sample)

0.5136586341627692

In [8]:
def function(u: np.array, v:np.array):
    return np.cross(u, v)

function(np.array([1, 0]), np.array([0, 1]))

u = np.array([1, 0, 0])
v = np.array([0, 1, 0])
f(u, v)

AxisError: axisa: axis -1 is out of bounds for array of dimension 0

In [11]:
def f(u, v):
    return u + v

u = np.array([0, 0, 0])
v = np.array([1, 2, 3])

np.diagonal(nd.Gradient(f)(u, v))

array([1., 1., 1.])