In [196]:
import os
import sys
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
#import seaborn as sn
import math
from sklearn.preprocessing import OneHotEncoder

In [197]:
def get_data(filename):
    df = pd.read_csv(filename, names = ['all'])
    df['all'] = df['all'].str.strip()
    df['all1']  = df['all'].apply(lambda x: np.array(x.split(' ')))
    df[['target','attr1','attr2','attr3','attr4','attr5','attr6','id']] = pd.DataFrame(df.all1.tolist(), index= df.index)
    return df.drop(columns=['all','all1'])

In [198]:
df = get_data(r"\Users\s512fj-ej021t\OneDrive\Desktop\ML\monks-1.train")
df_test = get_data(r"\Users\s512fj-ej021t\OneDrive\Desktop\ML\monks-1.test")

In [199]:
X_train, y_train = df.drop(columns=['target','id']), df['target'].apply(lambda x: int(x))
X_test, y_test = df_test.drop(columns=['target','id']), df_test['target'].apply(lambda x: int(x))

In [200]:
ohe = OneHotEncoder()
ohe.fit(X_train, y_train)
X_train = ohe.transform(X_train).toarray()
X_test = ohe.transform(X_test).toarray()
X_train = X_train.T
y_train = y_train.T

In [201]:



#linear activation function 
def linear(x):
    """_summary_

    Args:
        x (_type_): _description_

    Returns:
        _type_: _description_
    """    
    return x

def d_linear(x):
    """_summary_

    Args:
        x (_type_): _description_

    Returns:
        _type_: _description_
    """    
    return 1

#sigmoid activation function
def sigmoid(x):
    """_summary_

    Args:
        x (_type_): _description_

    Returns:
        _type_: _description_
    """    
    return 1/(1+np.exp(-x))

def d_sigmoid(x):
    """_summary_

    Args:
        x (_type_): _description_

    Returns:
        _type_: _description_
    """    
    f =sigmoid(x)
    return f * (1-f)

#ReLu activation function
def relu(x):
    if x > 0: return x
    else: return 0

def d_relu(x):
    if x > 0: return 1
    else: return 0
    

#hyperbolic tangent activation function
def TanH(x):
    return np.tanh(x)

def d_TanH(x):
    return 1 - math.pow(np.tanh(x), 2) 

#Dictionary for the activation functions
act_func = {
    'lin': linear,
    'sigm': sigmoid,
    'relu': relu,
    'tanh': TanH
}

#A second dictionary for their derivatives
d_act_func = {
    'lin': d_linear,
    'sigm': d_sigmoid,
    'relu': d_relu,
    'tanh': d_TanH
}

def MSE(layer, target):
    return (layer - target)/target.shape[1]

def binary_crossentropy(layer, target):
    return (layer - target)/(layer*(1-layer)*target.shape[1])

class Layer:

    def __init__(self, input, prev_layer, dim_layer, act_function, target):
  
        self.prev_layer = prev_layer
        self.target = target
        self.init_params(dim_layer, act_function, input)

    def init_params(self, dim_layer, act_function, input):
        self.dim_batch = self.prev_layer.dim_batch
        self.dim_layer = dim_layer
        self.W = np.random.uniform(-0.5, 0.5, (dim_layer, self.prev_layer.dim_layer))    #inizializzo la matrice dei pesi
        self.b = np.random.uniform(-0.5, 0.5, (dim_layer, 1))      #inizializzo il vettore dei bias
        self.layer = np.empty((dim_layer, self.dim_batch))
        self.act_function = np.vectorize(act_func[act_function])
        self.d_act_function = np.vectorize(d_act_func[act_function])

    def forward(self, mode = 'train'):
        if mode == 'train':
            self.z = self.W.dot(self.prev_layer.forward()) + self.b
            self.layer = self.act_function(self.z)
            #print(f'layer = {self.layer}')
            return self.layer
        elif mode == 'predict':
            return self.act_function(self.W.dot(self.prev_layer.forward()) + self.b)
        
    
    def backward(self, next_delta = None, next_weights = None, lossfunc = None):
        #print(f'Entered backward: target = {self.target}')
        
        if self.target is None:

            #print('self.target == None: hidden')
            delta = self.d_act_function(self.z) * next_weights.T.dot(next_delta)
            #self.prev_layer.backward(delta,self.weights)
            self.d_W = delta.dot(self.prev_layer.backward(delta,self.W).T)
            self.d_b = delta.sum(axis=1).reshape((delta.shape[0],1))
            return self.layer
            
        else:
            #print('self.target != None: output')
            #print(self.d_act_function(self.z).shape)
            #print(lossfunc(self.layer,self.target).shape)
            #print(self.layer.shape)
            #print(self.target.shape)
            delta = self.d_act_function(self.z) * lossfunc(self.layer,self.target)
            #self.prev_layer.backward(delta,self.weights)
            self.d_W = delta.dot(self.prev_layer.backward(delta,self.W).T)
            self.d_b = delta.sum(axis=1).reshape((delta.shape[0],1))
            return self.layer    

    def update_weights(self, eta, lam):

        self.W = self.W - eta * self.d_W - lam * self.W
        self.prev_layer.update_weights(eta, lam)

    def err(self):
        return np.sqrt((self.layer[0]-self.target[0])**2+(self.layer[1]-self.target[1])**2+(self.layer[2]-self.target[2])**2).mean()
    
    def rel_err(self):
        return np.sqrt((self.layer[0]-self.target[0])**2/self.target[0]**2+(self.layer[1]-self.target[1])**2/self.target[1]**2+(self.layer[2]-self.target[2])**2/self.target[2]**2).mean()

    def err_i(self,i):
        return np.sqrt((self.layer[i]-self.target[i])**2).mean()
    
    def rel_err_i(self,i):
        return (np.sqrt((self.layer[i]-self.target[i])**2)/self.target[i]).mean()
    
class Input(Layer):

    def __init__(self, input, input_dim):

        Layer.__init__(self, input, None, input_dim, 'lin', None)

    def init_params(self, dim_layer, act_function, input):
        self.layer = input
        self.dim_batch = input.shape[1]
        self.dim_layer = dim_layer

    def forward(self,mode = 'train'):
        return self.layer
    
    def backward(self, next_delta = None, next_weights = None):
        return self.layer
    
    def update_weights(self, eta, lam):
        return self.layer
        



In [202]:
lossfunc = {'MSE':MSE,
            'binary_crossentropy':binary_crossentropy}

In [203]:
class NeuralNetwork:

    def __init__(self,input_layer,output_layer,loss):
        self.input_layer = input_layer
        self.output_layer = output_layer
        self.lossfunc = lossfunc[loss]




    

    #def add_layer():

    def predict(self,input):
        self.input_layer.layer = input
        self.output_layer.forward(mode = 'predict')




    def train(self,input,target,epoche,eta,lam,n_batch,validation_split): #callbacks
        #print(np.floor(validation_split*input.shape[1]).astype(int))
        #print(input.shape,target.shape)
        val_input = input[:,np.floor(validation_split*input.shape[1]).astype(int):]
        val_target = target[:,np.floor(validation_split*input.shape[1]).astype(int):]
        train_input = input[:,:np.floor(validation_split*input.shape[1]).astype(int)]
        train_target = target[:,:np.floor(validation_split*input.shape[1]).astype(int)]

        index = np.arange(len(train_input))
    


        j = 0
        while j < epoche:
                if j%100 == 0:
                     print(j)
                np.random.shuffle(index)
                input_new = train_input[:,index]
                target_new = train_target[:,index]
                #print(input_new.shape)
                #print(target_new.shape)
                
                for k in range(input_new.shape[1]//n_batch):
                    if k == input_new.shape[1]//n_batch -1:
                        self.input_layer.input = input_new[:,k*n_batch:]
                        self.output_layer.target = target_new[:,k*n_batch:]
                    else:
                        self.input_layer.input = input_new[:,k*n_batch:(k+1)*n_batch]
                        self.output_layer.target = target_new[:,k*n_batch:(k+1)*n_batch]
                    
                    self.output_layer.forward()
                    #self.trainloss.append(self.output_layer.trainloss())
                    #self.validloss.append(self.output_layer.validloss())
                    self.output_layer.backward(lossfunc = self.lossfunc)
                    self.output_layer.update_weights(eta, lam)


                j += 1

            


    

In [204]:
input_layer = Input(X_train, 17)
hidden_layer = Layer(None, input_layer, 8, 'tanh', None)
output_layer = Layer(None, hidden_layer, 1, 'sigm', y_train.to_numpy().reshape((1,124)))

model = NeuralNetwork(input_layer,output_layer,'binary_crossentropy')
model.train(X_train,y_train.to_numpy().reshape((1,124)),epoche = 1000,eta = 0.01,lam = 0.01,n_batch = 32,validation_split = 0.3)

0
100
200
300
400
500
600
700
800
900
