**Melhorias**

- Mostrar estrutura da rede em um grafo

https://github.com/amir7d0/classification-neural-network/blob/main/ANN.ipynb

In [1]:
import numpy as np

In [2]:
def mse(y, y_hat):
    return np.sum((y_hat - y)**2)/y.size

In [7]:
y = np.array([0,0,1,0,2,2,1])
y_hat = np.array([0.5, 0.7, 0.5, 0.4, 1.2, 1.3, 0.3])

In [12]:
y = y.reshape(1,7)
y_hat = y_hat.reshape(1,7)

In [13]:
mse(y, y_hat)

0.39571428571428563

In [14]:
CategoricalCrossentropy(y_hat, y)

0.1439639269309892

In [4]:
def CategoricalCrossentropy(y_hat, y_true):
        
    m = y_true.shape[1]

    epsilon = 1e-07
    # y_hat and y_true (1, m)
    cce = np.sum(y_true * np.log(y_hat + epsilon), axis=0)

    cce = (-1.0 / m) * np.sum(cce)
    return cce

In [16]:

# Activation Functions
# TODO: IMPLEMENTAR CATEGORICAL CROSS ENTROPY
def tanh(x):
    return np.tanh(x)

def d_tanh(x):
    return 1 - np.square(np.tanh(x))

def sigmoid(x):
    return 1/(1 + np.exp(-x))

def softmax(x):
    return np.exp(x)/np.exp(x).sum()

def d_sigmoid(x):
    return (1 - sigmoid(x)) * sigmoid(x)

# Loss Functions 
def logloss(y, a):
    return -(y*np.log(a) + (1-y)*np.log(1-a))

def d_logloss(y, a):
    return (a - y)/(a*(1 - a))

In [17]:
# The layer class
class Layer:

    activationFunctions = {
        'tanh': (tanh, d_tanh),
        'sigmoid': (sigmoid, d_sigmoid),
        'softmax':(softmax, d_sigmoid)
    }
    learning_rate = 0.1

    def __init__(self, inputs, neurons, activation):
        self.W = np.random.randn(neurons, inputs)
        self.b = np.zeros((neurons, 1))
        self.act, self.d_act = self.activationFunctions.get(activation)
        self.shpW = self.W.shape
        self.shpb = self.b.shape
        

    def feedforward(self, A_prev):
        self.A_prev = A_prev
        self.Z = np.dot(self.W, self.A_prev) + self.b
        self.A = self.act(self.Z)
        return self.A

    def backprop(self, dA):
        dZ = np.multiply(self.d_act(self.Z), dA)
        dW = 1/dZ.shape[1] * np.dot(dZ, self.A_prev.T)
        db = 1/dZ.shape[1] * np.sum(dZ, axis=1, keepdims=True)
        dA_prev = np.dot(self.W.T, dZ)

        self.W = self.W - self.learning_rate * dW
        self.b = self.b - self.learning_rate * db

        return dA_prev

# ES

In [6]:
def mutation(filho, a):
    for i in range(len(filho)):
        filho[i] += np.random.normal(0, a, size=filho[i].shape)
    return filho

def marriage(pais,p):
    """
    Escolhe os pais para o casamento
    
    Params:
        - pais: população de pais
        - p: numero de pais escolhidos para o casamento
    return:
        - pais_escolhidos: lista de pais escolhidos
        - indices_pais: indices dos pais escolhidos
    """
    # indice dos pais
    pais_ids = np.arange(0,len(pais))
    # escolha p idices aleatorioamente
    indices_pais = np.random.choice(pais_ids,size=p, replace=False)
    # seleciona os p pais referentes aos indices
    pais_escolhidos = [pais[i] for i in indices_pais]

    return pais_escolhidos

def recombination(pais):
    filho = [np.zeros_like(p) for p in pais[0]]
    n_layers = len(pais[0])
    for l in range(n_layers):

        j = np.random.randint(0, len(pais))
        pai = pais[j][l] 
        # se for a ultima camada       
        if (l==n_layers-1):
            # percorre vetores de pesos da ultima camada
            for c in range(len(filho[l])):
                # percorre valores, exceto bias
                for ci in range(len(filho[l][c])-1):
                    a = pai[c][:-1]
                    filho[l][c][ci] = np.random.choice(a)
                filho[l][c][-1] = pai[c][-1]
        else:
            for i in range(len(filho[l])-1):
                # a ultima coluna é o bias
                k = np.random.randint(0, filho[l].shape[1]-1)
                filho[l][:, i] = pai[:, k]
            filho[l][:, -1] = pai[:, -1]
    return filho

In [27]:
#layers = [Layer(2, 3, 'tanh'), Layer(1, 3, 'softmax')]
layers = [Layer(2, 3, 'tanh')]

In [26]:
softmax(np.array([[0.1, 0.04, 0.8],[0.1, 0.04, 0.8]]))

array([[0.12640572, 0.11904442, 0.25454986],
       [0.12640572, 0.11904442, 0.25454986]])

In [261]:
def initialize_population(layers,n):
    population = []
    for i in range(n):
        network = []
        for layer in layers:
            neurons, inputs =  layer.shpW
            W = np.random.randn(neurons, inputs)
            b = np.zeros((neurons, 1))
            p = np.concatenate([W,b], axis=1)
            network.append(p)
        population.append(network)
    return population

def eval_individual(individual, net_layers, x_train, y_train):
    A = x_train
    lim_w = net_layers[0].W.shape[1]
    lim_wo = net_layers[-1].W.shape[1]
    # atualiza w da camada
    #assert(layers[0].shpW[0] == x_train.shape[0])
    for i in range(len(net_layers)):
        layer = net_layers[i]        
        # se for a ultima camada
        if i == len(net_layers)-1:
            layer.W = individual[i][:, :lim_wo]            
            layer.b = individual[i][:, lim_wo:]
            A = layer.feedforward(A)
        else:
            layer.W = individual[i][:, :lim_w]
            layer.b = individual[i][:, -1:]
            A = layer.feedforward(A)
        
    
    #cost = 1/m * np.sum(logloss(y_train, A))
    cost = mse(y_train, A)
    # #costs.append(cost)
    return cost  

In [10]:
dataset = np.array([[2.7810836,2.550537003,0],
	[1.465489372,2.362125076,0],
	[3.396561688,4.400293529,0],
	[1.38807019,1.850220317,0],
	[3.06407232,3.005305973,0],
	[7.627531214,2.759262235,1],
	[5.332441248,2.088626775,1],
	[6.922596716,1.77106367,1],
	[8.675418651,-0.242068655,1],
	[7.673756466,3.508563011,1]])

In [11]:
x_train = dataset[:, :2]
x_train = x_train.T
yy_train = dataset[:,-1]

In [12]:
y_train = np.array([yy_train]) # 1 x m

In [228]:
yy_train.shape

(10,)

# training

In [9]:

def train_nn(x_train=None, y_train=None, epochs=50, layers=[], mu=20, lam=60, p=2, a=0.15, lograte=10):
    costs = [] # to plot graph
    best, best_score = None, np.inf
    population = initialize_population(layers, mu)
    for epoch in range(epochs):
        children = list()
        scores_children = list()
        for l in range(lam):
            pais_marriage = marriage(population, p)
            filho = recombination(pais_marriage)
            filho = mutation(filho, a)
            score_filho = eval_individual(filho, layers, x_train, y_train)
            if (score_filho<best_score):
                best_score = score_filho
                best = filho
            scores_children.append(score_filho)
            children.append(filho)
        ranks = np.argsort(scores_children)
        population = [children[ranks[k]] for k,_ in enumerate(ranks[:mu])]
        if epoch%lograte==0:
            print ("Epoch={} bestscore={:.3f}".format(epoch, best_score))
    return best
        
def set_weights(weights, net_layers):
    lim_w = net_layers[0].W.shape[1]
    lim_wo = net_layers[-1].W.shape[1]
    for i in range(len(net_layers)):
        layer = net_layers[i]        
        # se for a ultima camada
        if i == len(net_layers)-1:
            layer.W = weights[i][:, :lim_wo]            
            layer.b = weights[i][:, lim_wo:]
        else:
            layer.W = weights[i][:, :lim_w]
            layer.b = weights[i][:, -1:]

def predict(x_train, layers, weights=None):
    # Making predictions
    A = x_train
    if (weights):
        set_weights(weights, layers)
    for layer in layers:
        A = layer.feedforward(A)
    return A

In [304]:
np.argmax(softmax([0, 0.5, 1]))

2

# Multicategorical

- usar relu na primeira camada
- usar softmax na ultima camada com n_outputs=n_classes
- a saida da ultima camada deve ser transposta

In [32]:
layers = [Layer(2, 3, 'tanh'), Layer(3, 3, 'softmax')]
y_hat = np.argmax(predict(x_train, layers).T, axis=1)

In [40]:
mse(y_hat, y_train)

0.5

In [274]:
m = 10
# definição da estrutura da rede
layers = [Layer(2, 3, 'tanh'), Layer(3, 1, 'sigmoid')]


In [251]:
best = train_nn(x_train=x_train, y_train=y_train, epochs=150, layers=layers, mu=20, lam=80, p=2, a=0.15)

Epoch=0 bestscore=0.183
Epoch=10 bestscore=0.174
Epoch=20 bestscore=0.107
Epoch=30 bestscore=0.107
Epoch=40 bestscore=0.069
Epoch=50 bestscore=0.056
Epoch=60 bestscore=0.056
Epoch=70 bestscore=0.051
Epoch=80 bestscore=0.024
Epoch=90 bestscore=0.013
Epoch=100 bestscore=0.013
Epoch=110 bestscore=0.013
Epoch=120 bestscore=0.013
Epoch=130 bestscore=0.013
Epoch=140 bestscore=0.013


In [167]:
predict(x_train, layers, best)

[[0.05374989 0.00358972 0.00865499 0.00738564 0.04367041 0.9695276
  0.89824152 0.97560667 0.99034849 0.94558455]]


In [278]:
import copy
# mantem os valores originais
temp_layers = copy.deepcopy(layers)
temp_layers[0].W

array([[ 0.59123459,  0.45565665],
       [-0.21885318, -0.85219604],
       [ 0.76345889, -0.0843669 ]])

In [259]:
y = np.array([1, 2, 3])
y_hat = np.array([1,2,3])

In [260]:
mse(y, y_hat)

0.0

In [212]:
import pandas as pd

In [214]:
wheat_seeds = pd.read_csv("wheat-seeds.csv", header=None)
wheat_seeds.head()

Unnamed: 0,0,1,2,3,4,5,6,7
0,15.26,14.84,0.871,5.763,3.312,2.221,5.22,1
1,14.88,14.57,0.8811,5.554,3.333,1.018,4.956,1
2,14.29,14.09,0.905,5.291,3.337,2.699,4.825,1
3,13.84,13.94,0.8955,5.324,3.379,2.259,4.805,1
4,16.14,14.99,0.9034,5.658,3.562,1.355,5.175,1


In [245]:
wheat_seeds.describe()

Unnamed: 0,0,1,2,3,4,5,6,7
count,210.0,210.0,210.0,210.0,210.0,210.0,210.0,210.0
mean,14.847524,14.559286,0.870999,5.628533,3.258605,3.700201,5.408071,2.0
std,2.909699,1.305959,0.023629,0.443063,0.377714,1.503557,0.49148,0.818448
min,10.59,12.41,0.8081,4.899,2.63,0.7651,4.519,1.0
25%,12.27,13.45,0.8569,5.26225,2.944,2.5615,5.045,1.0
50%,14.355,14.32,0.87345,5.5235,3.237,3.599,5.223,2.0
75%,17.305,15.715,0.887775,5.97975,3.56175,4.76875,5.877,3.0
max,21.18,17.25,0.9183,6.675,4.033,8.456,6.55,3.0


In [215]:
wheat_seeds[7].value_counts()

1    70
2    70
3    70
Name: 7, dtype: int64

In [289]:
dataset = wheat_seeds.values[:, :-1]
labels = wheat_seeds.values[:, -1]

#y_train = np.array([y_train])

m = dataset.shape[1]

In [320]:
# definição da estrutura da rede
layers = [Layer(7, 3, 'sigmoid'), Layer(3, 1, 'softmax')]

In [286]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [311]:
X_train, X_test, y_train, y_teste = train_test_split(dataset, labels, test_size=0.1, random_state=42)

In [312]:
X_train = MinMaxScaler().fit_transform(X_train)
X_train = X_train.T

In [313]:
X_test = MinMaxScaler().fit_transform(X_test)
X_test = X_test.T

In [314]:
y_train = y_train - 1
y_train = np.array([y_train])

In [315]:
y_train

array([[0., 2., 1., 1., 2., 0., 1., 0., 2., 2., 0., 1., 0., 1., 1., 2.,
        1., 1., 2., 2., 2., 2., 1., 2., 0., 2., 1., 1., 0., 0., 2., 1.,
        0., 0., 1., 1., 0., 0., 0., 2., 0., 1., 1., 1., 2., 2., 0., 0.,
        0., 0., 0., 1., 1., 2., 0., 1., 2., 1., 0., 2., 1., 1., 0., 2.,
        2., 1., 0., 1., 0., 1., 1., 1., 1., 2., 0., 1., 0., 0., 1., 1.,
        0., 0., 2., 2., 0., 0., 2., 2., 2., 0., 0., 1., 2., 1., 2., 1.,
        0., 0., 2., 2., 2., 0., 2., 2., 1., 2., 0., 0., 1., 0., 1., 0.,
        2., 2., 1., 0., 2., 0., 1., 2., 0., 2., 0., 1., 0., 1., 2., 2.,
        2., 0., 1., 0., 0., 1., 1., 1., 2., 2., 1., 0., 0., 0., 2., 1.,
        0., 2., 1., 2., 1., 2., 2., 0., 0., 1., 0., 2., 2., 2., 0., 0.,
        1., 0., 0., 2., 2., 1., 0., 2., 2., 0., 0., 2., 1., 2., 1., 1.,
        1., 1., 1., 1., 2., 0., 2., 1., 1., 0., 1., 2., 1.]])

In [323]:
best = train_nn(x_train=X_train, y_train=y_train, epochs=150, layers=layers, mu=20, lam=80, p=2, a=0.3)

Epoch=0 bestscore=1.617
Epoch=10 bestscore=1.614
Epoch=20 bestscore=1.611
Epoch=30 bestscore=1.609
Epoch=40 bestscore=1.609
Epoch=50 bestscore=1.609
Epoch=60 bestscore=1.609
Epoch=70 bestscore=1.609
Epoch=80 bestscore=1.609
Epoch=90 bestscore=1.609
Epoch=100 bestscore=1.609
Epoch=110 bestscore=1.609
Epoch=120 bestscore=1.609
Epoch=130 bestscore=1.609
Epoch=140 bestscore=1.609


In [331]:
y_pred = predict(X_test, layers, best)

In [337]:
y_pred[0][2]

5.277854969268684e-08

In [335]:
y_teste

array([1., 3., 2., 3., 1., 3., 1., 3., 1., 3., 2., 3., 3., 2., 1., 2., 3.,
       1., 3., 2., 2.])