In [4]:
# General imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

diabetes_binary = pd.read_csv('features10csv.csv')
target = diabetes_binary['Diabetes_binary']
features = diabetes_binary.drop(columns=['Diabetes_binary'])

# Applying linear reg
regressor = LinearRegression().fit(features, target)
# Checking the accuracy
print(r2_score(regressor.predict(features), target))

-4.607246333221022


In [10]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=92)


In [14]:
# General imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, accuracy_score

diabetes_binary = pd.read_csv('features10csv.csv')
target = diabetes_binary['Diabetes_binary']
features = diabetes_binary.drop(columns=['Diabetes_binary'])

# Applying linear reg
regressor = LinearRegression().fit(X_train, y_train)
prediction = regressor.predict(X_test)
# Checking the accuracy
print(r2_score(prediction, y_test))

-4.754074112173753


In [6]:
# General imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import r2_score, accuracy_score

diabetes_binary = pd.read_csv('features10csv.csv')
target = diabetes_binary['Diabetes_binary']
features = diabetes_binary.drop(columns=['Diabetes_binary'])

# Applying linear reg
regressor = MLPClassifier().fit(features, target)
# Checking the accuracy
print(r2_score(regressor.predict(features), target))
print(accuracy_score(regressor.predict(features), target))

-2.598259667892939
0.8657100809418691


In [19]:
import time
class MLP():

    
    def __init__(self,L=1,N_l=128):
        self.L = L
        self.N_l = N_l
        self.__init_weights()
        self.train_loss = list()
        self.train_acc = list()
        self.val_loss = list()
        self.val_acc = list()
        self.train_time = list()
        self.tot_time = list()
        self.metrics = [self.train_loss,self.train_acc,self.val_loss,self.val_acc,self.train_time,self.tot_time]
        
    def __sigmoid(self,x):
        # VCompute the sigmoid
        return 1./(1.+np.exp(-x))
    
    def __softmax(self,x):
        # Compute softmax along the rows of the input
        exponent = np.exp(x)
        return exponent/exponent.sum(axis=1,keepdims=True)
    
    def __loss(self,y_pred,y):
        # Compute the loss along the rows, averaging along the number of samples
        return ((-np.log(y_pred))*y).sum(axis=1).mean()
    def __accuracy(self,y_pred,y):  
        # Compute the accuracy along the rows, averaging along the number of samples
        return np.all(y_pred==y,axis=1).mean()
    
    def __sigmoid_prime(self,h):
        # Compute the derivative of sigmoid where h=sigmoid(x)
        return h*(1-h)
    
    def __to_categorical(self,x):  
        # Transform probabilities into categorical predictions row-wise, by simply taking the max probability
        categorical = np.zeros((x.shape[0],self.Y.shape[1]))
        categorical[np.arange(x.shape[0]),x.argmax(axis=1)] = 1
        return categorical
    
    def __init_weights(self):
        # Initialize the weights of the network given the sizes of the layers
        self.weights = list()
        for i in range(self.layer_sizes.shape[0]-1):
            self.weights.append(np.random.uniform(-1,1,size=[self.layer_sizes[i],self.layer_sizes[i+1]]))
        self.weights = np.asarray(self.weights)
    
    def __init_layers(self,batch_size):
        # Initialize and allocate arrays for the hidden layer activations 
        self.__h = [np.empty((batch_size,layer)) for layer in self.layer_sizes]
    
    def __feed_forward(self,batch):
        # Perform a forward pass of `batch` samples (N_samples x N_features)
        h_l = batch
        self.__h[0] = h_l
        for i,weights in enumerate(self.weights):
            h_l = self.__sigmoid(h_l.dot(weights))
            self.__h[i+1]=h_l
        self.__out = self.__softmax(self.__h[-1])

    def __back_prop(self,batch_y):
        # Update the weights of the network through back-propagation
        delta_t = (self.__out - batch_y)*self.__sigmoid_prime(self.__h[-1])
        for i in range(1,len(self.weights)+1):
            self.weights[-i]-=self.lr*(self.__h[-i-1].T.dot(delta_t))/self.batch_size
            delta_t = self.__sigmoid_prime(self.__h[-i-1])*(delta_t.dot(self.weights[-i].T))
            
    def predict(self,X):
        # Generate a categorical, one-hot, prediction given an input X
        X = np.concatenate((X,np.ones((X.shape[0],1))),axis=1)
        self.__init_layers(X.shape[0])
        self.__feed_forward(X)
        return self.__to_categorical(self.__out)
    
    def evaluate(self,X,Y):
        # Evaluate the performance (accuracy) predicting on X with true labels Y
        prediction = self.predict(X)
        return self.__accuracy(prediction,Y)
    
    def train(self,batch_size=8,epochs=25,lr=1.0):
        # Train the model with a given batch size, epochs, and learning rate. Store and print relevant metrics.
        self.lr = lr
        self.batch_size=batch_size
        for epoch in range(epochs):
            start = time.time()
            
            self.__init_layers(self.batch_size)
            shuffle = np.random.permutation(self.n_samples)
            train_loss = 0
            train_acc = 0
            X_batches = np.array_split(self.X[shuffle],self.n_samples/self.batch_size)
            Y_batches = np.array_split(self.Y[shuffle],self.n_samples/self.batch_size)
            for batch_x,batch_y in zip(X_batches,Y_batches):
                self.__feed_forward(batch_x)  
                train_loss += self.__loss(self.__out,batch_y)
                train_acc += self.__accuracy(self.__to_categorical(self.__out),batch_y)
                self.__back_prop(batch_y)
                
            train_loss = (train_loss/len(X_batches))
            train_acc = (train_acc/len(X_batches))
            self.train_loss.append(train_loss)
            self.train_acc.append(train_acc)
            
            train_time = round(time.time()-start,3)
            self.train_time.append(train_time)
            
            self.__init_layers(self.X_val.shape[0])
            self.__feed_forward(self.X_val)
            val_loss = self.__loss(self.__out,self.Y_val)
            val_acc = self.__accuracy(self.__to_categorical(self.__out),self.Y_val)
            self.val_loss.append(val_loss)
            self.val_acc.append(val_acc)
            
            tot_time = round(time.time()-start,3)
            self.tot_time.append(tot_time)
            
            print(f"Epoch {epoch+1}: loss = {train_loss.round(3)} | acc = {train_acc.round(3)} | val_loss = {val_loss.round(3)} | val_acc = {val_acc.round(3)} | train_time = {train_time} | tot_time = {tot_time}")
    

In [20]:
diabetes_binary = pd.read_csv('features10csv.csv')
target = diabetes_binary['Diabetes_binary']
features = diabetes_binary.drop(columns=['Diabetes_binary'])

# Applying linear reg
regressor = model = MLP(L=1,N_l=128)
model.train(batch_size=8,epochs=25,lr=1.0)
# Checking the accuracy
print(r2_score(regressor.predict(features), target))
print(accuracy_score(regressor.predict(features), target))

AttributeError: 'MLP' object has no attribute 'layer_sizes'

In [30]:
#import required libaries
import numpy as np
from matplotlib import pyplot as plt

class MLP():
    
    """
    This is the MLP class used to feedforward and backpropagate the network across a defined number 
    of iterations and produce predictions. After iteration the predictions are assessed using 
    Binary Cross Entropy Cost function.  
    """
    
    print('Running...')
    
    def __init__(self, design_matrix, Y, iterations=100000, lr=1e-1, input_layer = 2, hidden_layer = 3,output_layer =1):
        self.design_matrix = design_matrix #design matrix attibute
        self.iterations = iterations #iterations attibute
        self.lr = lr #learning rate attibute
        self.input_layer = input_layer #input layer attibute 
        self.hidden_layer = hidden_layer #hidden layer attibute
        self.output_layer = output_layer #output layer attibute
        self.weight_matrix_1 = np.random.randn(self.input_layer, self.hidden_layer) #weight attribute connecting to the hidden layer
        self.weight_matrix_2 = np.random.randn(self.hidden_layer, self.output_layer)#weight attribute connecting to the output layer
        self.cost = [] #cost list attribute 
        self.p_hats = [] #predictions list attribute
        self.Y = Y

    def sigmoid(self, x): # sigmoid function used at the hidden layer and output layer
        return 1 / (1 + np.exp(-x))

    def sigmoid_derivative(self, x): # sigmoid derivative used for backpropgation 
        return self.sigmoid(x) * (1 - self.sigmoid(x))

    def forward_propagation(self):#define function to feedforward the network 
        z = np.dot(self.design_matrix, self.weight_matrix_1) #linear transformation to the hidden layer
        activation_func = self.sigmoid(z)#hidden layer activation function
        zh = np.dot(activation_func, self.weight_matrix_2)#linear transformation to the output layer
        p_hat = self.sigmoid(zh)#output layer prediction
        return z, activation_func, zh, p_hat

    def BCECost(self, y, p_hat): # binary cross entropy cost function
        bce_cost = -(np.sum(y * np.log(p_hat) + (1 - y) * np.log(1 - p_hat))) / len(y)
        return bce_cost

    def backword_prop(self, z_1, activation_func, z_2, p_hat): #backpropagation
        del_2_1 = p_hat - self.Y
        partial_deriv_2 = np.dot(activation_func.T, del_2_1) #∂loss/∂p *∂p/∂zh * ∂zh/∂wh
        del_1_1 = del_2_1 
        del_1_2 = np.multiply(del_1_1, self.weight_matrix_2.T) 
        del_1_3 = np.multiply(del_1_2, self.sigmoid_derivative(z_1))
        partial_deriv_1 = np.dot(self.design_matrix.T, del_1_3) #∂loss/∂p * ∂p/∂zh * ∂zh/∂h * ∂h/∂z * ∂z/∂w
        return partial_deriv_2, partial_deriv_1

    def train(self):#train the network
        for i in range(self.iterations): #loop based on number of iterations
            z_1, activation_func, z_2, p_hat = self.forward_propagation()# feedforward
            partial_deriv_2, partial_deriv_1 = self.backword_prop(z_1, activation_func, z_2, p_hat)#backpropgate
            self.weight_matrix_1 = self.weight_matrix_1 - self.lr * partial_deriv_1#update weights connecting to the hidden layer (gradient descent)
            self.weight_matrix_2 = self.weight_matrix_2 - self.lr * partial_deriv_2#update weights connecting to the output layer (gradient descent )
            self.cost.append(self.BCECost(self.Y, p_hat))#store BCE cost in list
            self.p_hats.append(p_hat)#store predictions in list
        
        
        print('Training Complete')
        print('----------------------------------------------------------------------------')


Running...


In [31]:
mlp = MLP(X_train,y_train, input_layer=10) #Pass data to the model (design matrix and y label)
mlp.train() #Train the model


MemoryError: Unable to allocate 110. GiB for an array with shape (121766, 121766) and data type float64

# IRIS DATA
https://abtinmy.github.io/CS-SBU-NeuralNetwork/lectures/introduction/MLP-Scratch-Iris

In [64]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def sigmoid_derivative(z):
    s = sigmoid(z)
    return s * (1 - s)

class MLP:
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.01):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.learning_rate = learning_rate
        
        # initialize weights randomly
        self.weights1 = np.random.randn(self.input_size, self.hidden_size)
        self.weights2 = np.random.randn(self.hidden_size, self.output_size)
        
        # initialize biases to 0
        self.bias1 = np.zeros((1, self.hidden_size))
        self.bias2 = np.zeros((1, self.output_size))
    
    def fit(self, X, y, epochs=1000):
        for epoch in range(epochs):
            # feedforward
            layer1 = X.dot(self.weights1) + self.bias1
            activation1 = sigmoid(layer1)
            layer2 = activation1.dot(self.weights2) + self.bias2
            activation2 = sigmoid(layer2)
            
            # backpropagation
            error = activation2 - y
            d_weights2 = activation1.T.dot(error * sigmoid_derivative(layer2))
            d_bias2 = np.sum(error * sigmoid_derivative(layer2), axis=0, keepdims=True)
            error_hidden = error.dot(self.weights2.T) * sigmoid_derivative(layer1)
            d_weights1 = X.T.dot(error_hidden)
            d_bias1 = np.sum(error_hidden, axis=0, keepdims=True)
            
            # update weights and biases
            self.weights2 -= self.learning_rate * d_weights2
            self.bias2 -= self.learning_rate * d_bias2
            self.weights1 -= self.learning_rate * d_weights1
            self.bias1 -= self.learning_rate * d_bias1
    
    def predict(self, X):
        layer1 = X.dot(self.weights1) + self.bias1
        activation1 = sigmoid(layer1)
        layer2 = activation1.dot(self.weights2) + self.bias2
        activation2 = sigmoid(layer2)
        return (activation2 > 0.5).astype(int)


In [63]:
import numpy as np
from sklearn import datasets

# load iris dataset
iris = datasets.load_iris()
X = iris["data"][:, (2, 3)]  # petal length, petal width
y = (iris["target"] == 2).astype(int)  # 1 if Iris-Virginica, else 0
y = y.reshape([150,1])

In [65]:
# create an instance of the MLP class
mlp = MLP(input_size=2, hidden_size=4, output_size=1)

# train the MLP on the training data
mlp.fit(X, y)

# make predictions on the test data
y_pred = mlp.predict(X)

# evaluate the accuracy of the MLP
accuracy = np.mean(y_pred == y)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.95


### Own data

In [70]:
# General imports
import numpy as np
import pandas as pd
from sklearn.metrics import r2_score, accuracy_score

diabetes_binary = pd.read_csv('features10csv.csv')
sample = diabetes_binary.sample(200, random_state=92)
target = sample['Diabetes_binary']
features = sample.drop(columns=['Diabetes_binary'])
x_t, x_te, y_t, y_te = train_test_split(features, target, test_size=0.2, random_state=92)

x_t.shape

(160, 10)

In [62]:
# create an instance of the MLP class
mlp = MLP(input_size=10, hidden_size=1, output_size=1)

# train the MLP on the training data
mlp.fit(x_t, y_t)

# make predictions on the test data
y_pred = mlp.predict(x_te)

# evaluate the accuracy of the MLP
print(f"Accuracy: {accuracy_score(y_pred,y_te)}")

ValueError: Dot product shape mismatch, (160, 161) vs (1, 1)

# YT VID
https://github.com/musikalkemist/DeepLearningForAudioWithPython/blob/master/8-%20Training%20a%20neural%20network%3A%20Implementing%20back%20propagation%20from%20scratch/code/mlp.py

In [72]:
import numpy as np
from random import random


class MLP(object):
    """A Multilayer Perceptron class.
    """

    def __init__(self, num_inputs=3, hidden_layers=[3, 3], num_outputs=2):
        """Constructor for the MLP. Takes the number of inputs,
            a variable number of hidden layers, and number of outputs

        Args:
            num_inputs (int): Number of inputs
            hidden_layers (list): A list of ints for the hidden layers
            num_outputs (int): Number of outputs
        """

        self.num_inputs = num_inputs
        self.hidden_layers = hidden_layers
        self.num_outputs = num_outputs

        # create a generic representation of the layers
        layers = [num_inputs] + hidden_layers + [num_outputs]

        # create random connection weights for the layers
        weights = []
        for i in range(len(layers) - 1):
            w = np.random.rand(layers[i], layers[i + 1])
            weights.append(w)
        self.weights = weights

        # save derivatives per layer
        derivatives = []
        for i in range(len(layers) - 1):
            d = np.zeros((layers[i], layers[i + 1]))
            derivatives.append(d)
        self.derivatives = derivatives

        # save activations per layer
        activations = []
        for i in range(len(layers)):
            a = np.zeros(layers[i])
            activations.append(a)
        self.activations = activations


    def forward_propagate(self, inputs):
        """Computes forward propagation of the network based on input signals.

        Args:
            inputs (ndarray): Input signals
        Returns:
            activations (ndarray): Output values
        """

        # the input layer activation is just the input itself
        activations = inputs

        # save the activations for backpropogation
        self.activations[0] = activations

        # iterate through the network layers
        for i, w in enumerate(self.weights):
            # calculate matrix multiplication between previous activation and weight matrix
            net_inputs = np.dot(activations, w)

            # apply sigmoid activation function
            activations = self._sigmoid(net_inputs)

            # save the activations for backpropogation
            self.activations[i + 1] = activations

        # return output layer activation
        return activations


    def back_propagate(self, error):
        """Backpropogates an error signal.
        Args:
            error (ndarray): The error to backprop.
        Returns:
            error (ndarray): The final error of the input
        """

        # iterate backwards through the network layers
        for i in reversed(range(len(self.derivatives))):

            # get activation for previous layer
            activations = self.activations[i+1]

            # apply sigmoid derivative function
            delta = error * self._sigmoid_derivative(activations)

            # reshape delta as to have it as a 2d array
            delta_re = delta.reshape(delta.shape[0], -1).T

            # get activations for current layer
            current_activations = self.activations[i]

            # reshape activations as to have them as a 2d column matrix
            current_activations = current_activations.reshape(current_activations.shape[0],-1)

            # save derivative after applying matrix multiplication
            self.derivatives[i] = np.dot(current_activations, delta_re)

            # backpropogate the next error
            error = np.dot(delta, self.weights[i].T)


    def train(self, inputs, targets, epochs, learning_rate):
        """Trains model running forward prop and backprop
        Args:
            inputs (ndarray): X
            targets (ndarray): Y
            epochs (int): Num. epochs we want to train the network for
            learning_rate (float): Step to apply to gradient descent
        """
        # now enter the training loop
        for i in range(epochs):
            sum_errors = 0

            # iterate through all the training data
            for j, input in enumerate(inputs):
                target = targets[j]

                # activate the network!
                output = self.forward_propagate(input)

                error = target - output

                self.back_propagate(error)

                # now perform gradient descent on the derivatives
                # (this will update the weights
                self.gradient_descent(learning_rate)

                # keep track of the MSE for reporting later
                sum_errors += self._mse(target, output)

            # Epoch complete, report the training error
            print("Error: {} at epoch {}".format(sum_errors / len(items), i+1))

        print("Training complete!")
        print("=====")


    def gradient_descent(self, learningRate=1):
        """Learns by descending the gradient
        Args:
            learningRate (float): How fast to learn.
        """
        # update the weights by stepping down the gradient
        for i in range(len(self.weights)):
            weights = self.weights[i]
            derivatives = self.derivatives[i]
            weights += derivatives * learningRate


    def _sigmoid(self, x):
        """Sigmoid activation function
        Args:
            x (float): Value to be processed
        Returns:
            y (float): Output
        """

        y = 1.0 / (1 + np.exp(-x))
        return y


    def _sigmoid_derivative(self, x):
        """Sigmoid derivative function
        Args:
            x (float): Value to be processed
        Returns:
            y (float): Output
        """
        return x * (1.0 - x)


    def _mse(self, target, output):
        """Mean Squared Error loss function
        Args:
            target (ndarray): The ground trut
            output (ndarray): The predicted values
        Returns:
            (float): Output
        """
        return np.average((target - output) ** 2)

In [76]:
# General imports
import numpy as np
import pandas as pd
from sklearn.metrics import r2_score, accuracy_score

diabetes_binary = pd.read_csv('features10csv.csv')
sample = diabetes_binary.sample(200, random_state=92)
target = sample['Diabetes_binary'].astype(int)
features = sample.drop(columns=['Diabetes_binary'])
x_t, x_te, y_t, y_te = train_test_split(features, target, test_size=0.2, random_state=92)

In [78]:
features.keys()

Index(['HighBP', 'HighChol', 'CholCheck', 'BMI', 'PhysActivity', 'Fruits',
       'Veggies', 'AnyHealthcare', 'GenHlth', 'DiffWalk'],
      dtype='object')

In [77]:
# create a Multilayer Perceptron with one hidden layer
mlp = MLP(10, [3], 2)

# train network
mlp.train(features, target, 50, 0.1)

# get a prediction
output = mlp.forward_propagate(y_te)

print()
print("Our network believes that {} + {} is equal to {}".format(input[0], input[1], output[0]))

KeyError: 0