# Manual implementation of a Neural Network

## Imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import codecarbon
import time
import pprint
np.random.seed(0)

In [2]:
class DNN():
    def __init__(self, input_size, output_size, layers = [], activations = [], learning_rate = 0.1, loss = 'mean_squared_error'):
        self.input_size = input_size
        self.output_size = output_size
        self.layers = layers
        self.activations_functions = []
        self.loss_function = lambda x, y: np.mean(np.square(x - y))
        self.learning_rate = learning_rate
        self.weights = []
        self.biases = []
        self.num_layers = len(layers)
        self.initialize_weights_and_biases()
        self.set_activations_and_loss(activations, loss)

    def set_activations_and_loss(self, activations, loss):
        if len(activations) == 0:
            activations = ['sigmoid'] * (len(self.layers) + 1)
        elif len(activations) != len(self.layers) + 1:
            raise ValueError('Number of activations must be equal to the number of layers + 1')
        
        self.activations = activations
        self.activations_functions = [None] * len(activations)
        for i in range(len(activations)):
            if isinstance(activations[i], str):
                if activations[i] == 'sigmoid':
                    self.activations_functions[i] = lambda x: 1 / (1 + np.exp(-x))
                elif activations[i] == 'relu':
                    self.activations_functions[i] = lambda x: np.maximum(0, x)
                elif activations[i] == 'tanh':
                    self.activations_functions[i] = lambda x: np.tanh(x)
                elif activations[i] == 'none':
                    self.activations_functions[i] = lambda x: x
                elif activations[i] == 'softmax':
                    self.activations_functions[i] = lambda x: np.exp(x) / np.sum(np.exp(x), axis=0)
                else:
                    raise ValueError('Activation function must be a string, supported functions are: sigmoid, relu, tanh, softmax, none')
            else:
                raise ValueError('Activation function must be a string, supported functions are: sigmoid, relu, tanh, softmax, none')
        
        self.derivatives = [None] * len(activations)
        for i in range(len(activations)):
            if activations[i] == 'sigmoid':
                self.derivatives[i] = lambda x: (1 / (1 + np.exp(-x))) * (1 - (1 / (1 + np.exp(-x))))
            elif activations[i] == 'relu':
                self.derivatives[i] = lambda x: [1 if j > 0 else 0 for j in x]
            elif activations[i] == 'tanh':
                self.derivatives[i] = lambda x: 1 - np.tanh(x)**2
            elif activations[i] == 'none':
                self.derivatives[i] = lambda x: 1
            elif activations[i] == 'softmax':
                self.derivatives[i] = lambda x: x * (1 - x)
            else:
                raise ValueError('Activation function must be a string, supported functions are: sigmoid, relu, tanh, softmax, none')

        if isinstance(loss, str):
            if loss == 'mean_squared_error':
                self.loss_function = lambda x, y: np.mean(np.square(x - y))
                self.loss_derivative = lambda x, y: 2 * (x - y)
            elif loss == 'binary_crossentropy':
                self.loss_function = lambda x, y: -np.mean(y * np.log(x) + (1 - y) * np.log(1 - x))
                self.loss_derivative = lambda x, y: (x - y) / (x * (1 - x))
            elif loss == 'categorical_crossentropy':
                self.loss_function = lambda x, y: -np.mean(y * np.log(x))
                self.loss_derivative = lambda x, y: x - y
            elif loss == 'crossentropy':
                self.loss_function = lambda x, y: -np.mean(y * np.log(x))
                self.loss_derivative = lambda x, y: x - y   
            elif loss == 'mean_absolute_error':
                self.loss_function = lambda x, y: np.mean(np.abs(x - y))
                self.loss_derivative = lambda x, y: np.sign(x - y)
            else:
                raise ValueError('Loss function must be a string, supported functions are: mean_squared_error, binary_crossentropy, categorical_crossentropy, mean_absolute_error, crossentropy')
        else:
            raise ValueError('Loss function must be a string, supported functions are: mean_squared_error, binary_crossentropy, categorical_crossentropy, mean_absolute_error, crossentropy')
        
        self.loss = loss

    def initialize_weights_and_biases(self):
        for i in range(len(self.layers)):
            if i == 0:
                #random in range -0.5, 0.5
                self.weights.append(np.random.rand(self.input_size, self.layers[i]) - 0.5)
                self.biases.append(np.random.rand(self.layers[i]) - 0.5)
            else:
                self.weights.append(np.random.rand(self.layers[i-1], self.layers[i]) - 0.5)
                self.biases.append(np.random.rand(self.layers[i]) - 0.5)
        self.weights.append(np.random.rand(self.layers[-1], self.output_size) - 0.5)
        self.biases.append(np.random.rand(self.output_size) - 0.5)

    def forward(self, X):
        self.z = []
        self.a = []
        for i in range(len(self.weights)):
            if i == 0:
                self.z.append(np.dot(X, self.weights[i]) + self.biases[i])
            else:
                self.z.append(np.dot(self.a[i-1], self.weights[i]) + self.biases[i])
            self.a.append(self.activations_functions[i](self.z[i]))
        return self.a[-1]
    
    def backward(self, X, y, y_pred):
        self.deltas = [None] * (self.num_layers + 1)
        error = y_pred - np.asarray(y).reshape(-1, 1)
        mean_error = np.mean(error, axis=0)
        for i in range(self.num_layers, -1, -1): # need to handle batches
            self.deltas[i] = (mean_error * self.derivatives[i](self.z[i].mean(axis = 0))) if i == self.num_layers else self.compute_deltas(i, X)
    
    def compute_deltas(self, index, X):
        return (np.dot(self.deltas[index+1], self.weights[index+1].T) * self.derivatives[index](self.z[index].mean(axis = 0)))

    def update_weights_and_biases(self, inputs):
        for k in range(len(self.weights)-1):
            for i in range(self.weights[k].shape[0]-1):
                for j in range(self.weights[k].shape[1]-1):                
                    self.weights[k][i][j] -= (self.learning_rate * self.deltas[k].T[j] * self.a[k-1][:,i].mean() if k != 0 else self.learning_rate * self.deltas[k].T[j] * inputs.T[i].mean(axis=0))
                    self.biases[k][j] -= self.learning_rate * self.deltas[k].T[j]
        
    def fit(self, X, y, epochs = 30, batch_size = 32):
        self.batch_size = batch_size
        self.epochs = epochs
        for epoch in range(self.epochs):
            for i in range(0, len(X), self.batch_size):
                try:
                    X_batch = X[i:i+self.batch_size]
                    y_batch = y[i:i+self.batch_size]
                except:
                    X_batch = X[i:]
                    y_batch = y[i:]
                y_pred = self.forward(X_batch)
                self.backward(X_batch, y_batch, y_pred)
                self.update_weights_and_biases(X_batch)
            print(f'Epoch {epoch+1}/{self.epochs} - Loss: {self.evaluate(X, y)}')

    def predict(self, X):
        return self.forward(X)
    
    def evaluate(self, X, y):
        y_pred = self.forward(X)
        y = np.asarray(y).reshape(-1, 1)
        return self.loss_function(y_pred, y)
    
    def summary(self):
        print('Model Summary')
        print('Input Size:', self.input_size)
        print('Output Size:', self.output_size)
        print('Layers:', self.layers)
        print('Activations:', [self.activations[i] for i in range(len(self.activations))])
        print('Loss:', self.loss)
        print('Learning Rate:', self.learning_rate)
        

## Load spotify data & preprocess it

In [3]:
spotify_songs = pd.read_csv('/Users/camille.hascoet/Documents/Green AI/Datasets/tracks.csv')
X = spotify_songs.drop(['popularity', 'id', 'name', 'artists', 'id_artists', 'release_date'], axis=1)
y = spotify_songs['popularity']
correlation = X.corrwith(y).sort_values(ascending=False)
correlation

loudness            0.327028
energy              0.302315
explicit            0.211758
danceability        0.187000
time_signature      0.086759
tempo               0.071364
duration_ms         0.027681
key                 0.015299
valence             0.004643
mode               -0.033655
speechiness        -0.047357
liveness           -0.048740
instrumentalness   -0.236487
acousticness       -0.370882
dtype: float64

In [4]:
training_size = 20000
test_size = 5000

In [5]:
X_drop = X.drop(['mode', 'valence', 'key', 'duration_ms', 'speechiness', 'liveness'], axis=1)
X_drop = X_drop.fillna(X_drop.mean())
from sklearn.preprocessing import StandardScaler
X_scaler = StandardScaler()
X_scaled = X_scaler.fit_transform(X_drop)
y_scaled = y / 100

from sklearn.utils import shuffle
X_shuffled, y_shuffled = shuffle(X_scaled, y_scaled, random_state=282)

X_train, X_test = X_shuffled[:training_size], X_shuffled[training_size:training_size+test_size]
y_train, y_test = y_shuffled[:training_size], y_shuffled[training_size:training_size+test_size]

In [6]:
print(y_train.mean(), y_scaled.mean())

0.274065 0.2757005277224752


In [7]:
model = DNN(input_size=8, output_size=1, layers=[50, 10], activations=['relu', 'relu', 'none'], learning_rate=0.01, loss='mean_absolute_error')

In [8]:
start_time = time.time()
model.fit(X_train, y_train, epochs=30, batch_size=10)
end_time = time.time()
print('Training Time:', end_time - start_time)

Epoch 1/30 - Loss: 0.2135477996751101
Epoch 2/30 - Loss: 0.19907510299888218
Epoch 3/30 - Loss: 0.19217312692425134
Epoch 4/30 - Loss: 0.18814100910319356
Epoch 5/30 - Loss: 0.18533838501061087
Epoch 6/30 - Loss: 0.1830499638776352
Epoch 7/30 - Loss: 0.1812824083293911
Epoch 8/30 - Loss: 0.17974270783642352
Epoch 9/30 - Loss: 0.1784245804181317
Epoch 10/30 - Loss: 0.17718902124583916
Epoch 11/30 - Loss: 0.17589182866881173
Epoch 12/30 - Loss: 0.1747352431291355
Epoch 13/30 - Loss: 0.17352006202713843
Epoch 14/30 - Loss: 0.17252868912938238
Epoch 15/30 - Loss: 0.17153665941058052
Epoch 16/30 - Loss: 0.17055643673725618
Epoch 17/30 - Loss: 0.16965660594962007
Epoch 18/30 - Loss: 0.16878463315764772
Epoch 19/30 - Loss: 0.16792394257072327
Epoch 20/30 - Loss: 0.167115288676454
Epoch 21/30 - Loss: 0.16637580859637532
Epoch 22/30 - Loss: 0.16563635193270887
Epoch 23/30 - Loss: 0.16506924063506542
Epoch 24/30 - Loss: 0.16461624049904125
Epoch 25/30 - Loss: 0.16399824705134516
Epoch 26/30 - Lo