# Neural Networks

In this task you are suppose to implement 2 types of mulilayer Perceptrons: 

1. Using only Python. 
2. Using a high level library

Download the Ecoli dataset: https://archive.ics.uci.edu/ml/datasets/Ecoli
* Predict the two classes: cp and im (remove the rest of the dataset).
* Make the necessary adjustments to the data.
* Implement and test a Multilayer Perceptron from scratch using only Python and standard libraries (do not train this).
* Implement, train and test a Multilayer Perceptron using a high level library (e.g., Torch, Keras, TensorFlow).
* Choose the network architecture with care.
* Train and validate all algorithms.
* Make the necessary assumptions.

# Load the data

In [59]:
import pandas as pd

df = pd.read_csv('ecoli/ecoli.data', header=None, delim_whitespace=True)

# Remove all rows not containing everything except cp and im and drop the non-numeric columns but keep label
df = df[df[8].isin(['cp', 'im'])].drop([0], axis=1)
df

Unnamed: 0,1,2,3,4,5,6,7,8
0,0.49,0.29,0.48,0.5,0.56,0.24,0.35,cp
1,0.07,0.40,0.48,0.5,0.54,0.35,0.44,cp
2,0.56,0.40,0.48,0.5,0.49,0.37,0.46,cp
3,0.59,0.49,0.48,0.5,0.52,0.45,0.36,cp
4,0.23,0.32,0.48,0.5,0.55,0.25,0.35,cp
...,...,...,...,...,...,...,...,...
215,0.10,0.49,0.48,0.5,0.41,0.67,0.21,im
216,0.30,0.51,0.48,0.5,0.42,0.61,0.34,im
217,0.61,0.47,0.48,0.5,0.00,0.80,0.32,im
218,0.63,0.75,0.48,0.5,0.64,0.73,0.66,im


# Shuffle the data and split into training and test data

In [60]:
# Shuffle the data
df = df.sample(frac=1).reset_index(drop=True)
y = df.iloc[:, -1]
X = df.iloc[:, :-1]

# Split the data into training and test data 80/20
X_train = X.iloc[:int(len(X) * 0.8)]
X_test = X.iloc[int(len(X) * 0.8):]
y_train = y.iloc[:int(len(y) * 0.8)]
y_test = y.iloc[int(len(y) * 0.8):]

# Implement and test a Multilayer Perceptron from scratch using only Python and standard libraries (do not train this).

In [61]:
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score


class MLP:
    def __init__(self, input_size, hidden_size, output_size):
        # Initialize weights and biases
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.weights_input_hidden = np.random.randn(input_size, hidden_size)
        self.weights_hidden_output = np.random.randn(hidden_size, output_size)
        self.bias_hidden = np.zeros((1, hidden_size))
        self.bias_output = np.zeros((1, output_size))

    def set_weights_input_hidden(self, weights):
        if weights.shape == (self.input_size, self.hidden_size):
            self.weights_input_hidden = weights
        else:
            raise ValueError(f'Weights must be of shape ({self.input_size}, {self.hidden_size})')

    def set_weights_hidden_output(self, weights):
        if weights.shape == (self.hidden_size, self.output_size):
            self.weights_hidden_output = weights
        else:
            raise ValueError(f'Weights must be of shape ({self.hidden_size}, {self.output_size})')

    def forward(self, inputs):
        # Compute the input to the hidden layer
        self.hidden_input = np.dot(inputs, self.weights_input_hidden) + self.bias_hidden
        # Apply the sigmoid activation function
        self.hidden_output = self._sigmoid(self.hidden_input)
        # Compute the input to the output layer
        self.final_input = np.dot(self.hidden_output, self.weights_hidden_output) + self.bias_output
        # Apply the sigmoid activation function
        self.final_output = self._sigmoid(self.final_input)
        return self.final_output

    def predict(self, inputs):
        # Perform a forward pass and threshold the output
        predictions = self.forward(inputs) > 0.5
        return predictions.astype(int)

    @staticmethod
    def _sigmoid(x):
        # Sigmoid activation function
        return 1 / (1 + np.exp(-x))


# Initialize MLP
input_size = 7  # Number of features in the dataset
hidden_size = 5  # You can choose another size for the hidden layer
output_size = 1  # Binary classification, one output neuron is enough

mlp = MLP(input_size, hidden_size, output_size)
weights_i_h = np.array([[-1.03901234, 1.34852808, -1.59770746, -0.98452313, 1.80660242],
               [0.57697243, -1.01469221, -1.0597239, -0.3189779, -1.61257962],
               [0.71232018, 1.35518689, 1.09432925, -0.39497866, 0.81997304],
               [-1.18617227, -1.98978287, -0.5950365, -0.34994781, -0.45480538],
               [-0.74376669, -1.65325417, 0.24469606, -1.49469729, -0.42877386],
               [0.79975916, 1.30670578, -0.26306507, -0.11331429, 1.03342756],
               [-0.50877096, 1.19815737, -0.34944548, 1.07487525, -1.67910252]])
weights_h_o = np.array([[0.64319517],
               [0.60016733],
               [-0.80560469],
               [-0.25233522],
               [-1.33179876]])

# Set the weights
mlp.set_weights_input_hidden(weights_i_h)
mlp.set_weights_hidden_output(weights_h_o)

# Encode the labels to 0 ('cp') and 1 ('im')
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Predict the test set
y_pred_probs = mlp.predict(X.values)  # Get probabilities
y_pred = (y_pred_probs > 0.5).astype(int).flatten()  # Apply threshold and flatten

# Decode the predicted classes to original labels
y_pred_labels = label_encoder.inverse_transform(y_pred)

pred_df = pd.DataFrame({'Expected': y, 'Predicted': y_pred_labels})

pred_df


Unnamed: 0,Expected,Predicted
0,cp,cp
1,cp,cp
2,cp,cp
3,cp,cp
4,cp,cp
...,...,...
215,cp,cp
216,cp,cp
217,im,cp
218,cp,cp


In [62]:
from sklearn.metrics import classification_report, confusion_matrix
print("Confusion matrix:")
print(confusion_matrix(y_encoded, y_pred))

Confusion matrix:
[[143   0]
 [ 69   8]]


In [63]:
print("Classification report:")
print(classification_report(y_encoded, y_pred))

Classification report:
              precision    recall  f1-score   support

           0       0.67      1.00      0.81       143
           1       1.00      0.10      0.19        77

    accuracy                           0.69       220
   macro avg       0.84      0.55      0.50       220
weighted avg       0.79      0.69      0.59       220


# Use sklearn to create an MLP Classifier

In [64]:
from sklearn.model_selection import GridSearchCV
import sklearn.neural_network as nn
from sklearn.metrics import classification_report, confusion_matrix

# Use cross validation to find the best parameters
param_grid = {'hidden_layer_sizes': [(2, 3), (3, 3), (3, 4), (4, 4), (4, 5), (5, 5), (5, 6), (6, 6), (6, 7),
                                     (7, 7), (7, 8), (8, 8), (8, 9), (9, 9), (9, 10), (10, 10)],
              'activation': ['identity', 'tanh', 'relu'],
              'solver': ['lbfgs', 'sgd'],
              'max_iter': [100000]}
grid = GridSearchCV(nn.MLPClassifier(), param_grid, cv=5, n_jobs=-1)
grid.fit(X_train, y_train)

print("Best parameters set found via cross-validation:")
print(grid.best_params_)

Best parameters set found via cross-validation:
{'activation': 'identity', 'hidden_layer_sizes': (3, 4), 'max_iter': 100000, 'solver': 'sgd'}


In [65]:
# Evaluate the model
y_pred = grid.predict(X_test)

print("Confusion matrix:")
print(confusion_matrix(y_test, y_pred))

Confusion matrix:
[[26  0]
 [ 3 15]]


In [66]:
print("Classification report:")
print(classification_report(y_test, y_pred))

Classification report:
              precision    recall  f1-score   support

          cp       0.90      1.00      0.95        26
          im       1.00      0.83      0.91        18

    accuracy                           0.93        44
   macro avg       0.95      0.92      0.93        44
weighted avg       0.94      0.93      0.93        44
