In [49]:
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import pandas as pd


class FeedForwardNN:
    def __init__(self, input_size, hidden_layers, activations, output_size, learning_rate=0.001, max_iter=1000,
                 tol=1e-4, batch_size=32, random_state=None):
        self.input_size = input_size
        self.hidden_layers = hidden_layers
        self.activations = activations
        self.output_size = output_size
        self.learning_rate = learning_rate
        self.max_iter = max_iter
        self.tol = tol
        self.batch_size = batch_size
        if random_state:
            np.random.seed(random_state)

        self.weights = []
        self.biases = []
        self.activation_functions = []
        self.activation_derivatives = []

        self._initialize_weights_and_biases()
        self._initialize_activation_functions()

    def _initialize_weights_and_biases(self):
        layer_sizes = [self.input_size] + self.hidden_layers + [self.output_size]
        for i in range(1, len(layer_sizes)):
            # Initialize weights with random values from a normal distribution
            weight_matrix = np.random.randn(layer_sizes[i - 1], layer_sizes[i])
            # Initialize biases with random values from a normal distribution
            bias_vector = np.random.randn(layer_sizes[i])
            self.weights.append(weight_matrix)
            self.biases.append(bias_vector)

    def _initialize_activation_functions(self):
        for activation in self.activations:
            if activation == 'relu':
                self.activation_functions.append(self._relu)
                self.activation_derivatives.append(self._relu_derivative)
            elif activation == 'sigmoid':
                self.activation_functions.append(self._sigmoid)
                self.activation_derivatives.append(self._sigmoid_derivative)
            elif activation == 'linear':
                self.activation_functions.append(self._linear)
                self.activation_derivatives.append(self._linear_derivative)
            elif activation == 'softmax':
                self.activation_functions.append(self._softmax)
                self.activation_derivatives.append(self._softmax_derivative)

    @staticmethod
    def _relu(x):
        return np.maximum(0, x)

    @staticmethod
    def _sigmoid(x):
        return 1 / (1 + np.exp(-x))

    @staticmethod
    def _linear(x):
        return x

    @staticmethod
    def _softmax(x):
        exp_scores = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

    def _relu_derivative(self, x):
        x[x <= 0] = 0
        x[x > 0] = 1
        return x

    def _sigmoid_derivative(self, x):
        return self._sigmoid(x) * (1 - self._sigmoid(x))

    def _linear_derivative(self, x):
        return np.ones_like(x)

    def _softmax_derivative(self, x):
        s = self._softmax(x)
        return s * (1 - s)

    def _feedforward(self, X):
        activations = [X]
        for i in range(len(self.weights)):
            z = np.dot(activations[-1], self.weights[i]) + self.biases[i]
            activations.append(self.activation_functions[i](z))
        return activations

    def _backpropagation(self, X_batch, y_batch, activations_batch):
        m = X_batch.shape[0]
        gradient_weights = [np.zeros_like(w) for w in self.weights]
        gradient_biases = [np.zeros_like(b) for b in self.biases]

        # Compute error in output layer
        error = activations_batch[-1] - y_batch

        # Backpropagate the error
        for i in range(len(self.weights) - 1, -1, -1):
            gradient_biases[i] = np.sum(error, axis=0) / m
            gradient_weights[i] = np.dot(activations_batch[i].T, error) / m
            if i > 0:
                error = np.dot(error, self.weights[i].T) * self.activation_derivatives[i](activations_batch[i])

        return gradient_weights, gradient_biases

    def _update_weights_and_biases(self, gradient_weights, gradient_biases):
        for i in range(len(self.weights)):
            self.weights[i] = self.weights[i] + self.learning_rate * -(gradient_weights[i])
            self.biases[i] = self.biases[i] + self.learning_rate * -(gradient_biases[i])

    def fit(self, X, y):
        one_hot_encoder = OneHotEncoder()
        y_encoded = one_hot_encoder.fit_transform(y.reshape(-1, 1)).toarray()

        for _ in range(self.max_iter):
            # Shuffle data
            indices = np.arange(X.shape[0])
            np.random.shuffle(indices)
            X_shuffled = X.iloc[indices]
            y_shuffled = y_encoded[indices]

            # Split data into batches
            for batch_start in range(0, X.shape[0], self.batch_size):
                X_batch = X_shuffled.iloc[batch_start:batch_start+self.batch_size]
                y_batch = y_shuffled[batch_start:batch_start+self.batch_size]

                activations_batch = self._feedforward(X_batch.values)
                gradient_weights, gradient_biases = self._backpropagation(X_batch.values, y_batch, activations_batch)
                self._update_weights_and_biases(gradient_weights, gradient_biases)

            # Calculate loss (MSE) for the entire dataset
            activations = self._feedforward(X.values)
            
            if self.activations[-1] != 'softmax':
                loss = np.mean(np.square(activations[-1] - y_encoded))
            else:
                loss = -np.mean(np.sum(y_encoded * np.log(activations[-1]), axis=1))

            if loss < self.tol:
                break

    def predict(self, X):
        activations = self._feedforward(X.values)
        return np.argmax(activations[-1], axis=1)

In [47]:
from sklearn.preprocessing import OneHotEncoder
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import numpy as np

# Load data from CSV
data = pd.read_csv('models/iris.csv')

# Reset index to ensure alignment after shuffling
data.reset_index(drop=True, inplace=True)

# Preprocess data
X = data.drop(columns=['Species', 'Id'])
y = data['Species']

# Encode categorical labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define network architecture
input_size = X_train.shape[1]
hidden_layers = [10, 5]  # Example: 2 hidden layers with 10 and 5 neurons
output_size = len(np.unique(y_train))  # Number of classes in the output layer
activations = ['relu', 'relu', 'softmax']  # Example: ReLU activation for hidden layers, Softmax for output layer

# Train and evaluate the model
model = FeedForwardNN(input_size=input_size, hidden_layers=hidden_layers, activations=activations,
                      output_size=output_size, learning_rate=0.001, max_iter=1000, tol=1e-4, batch_size=100,
                      random_state=42)

model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

print(y_pred)

Accuracy: 0.97
[1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 1 2 2 2 0 0]


In [26]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler

# Load data from CSV
data = pd.read_csv('models/iris.csv')

# Preprocess data
X = data.drop(columns=['Species'])
y = data['Species']

# Encode categorical labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Set hyperparameters
hidden_layer_sizes = (4,3,2) # 2 hidden layers, 10 neurons in the first layer, 5 neurons in the second layer
activation_functions = 'relu'
learning_rate = 0.001
error_threshold = 0.0001
max_iterations = 1000
batch_size = 100

# Train model
model = MLPClassifier(hidden_layer_sizes=hidden_layer_sizes, activation=activation_functions, learning_rate_init=learning_rate, tol=error_threshold, max_iter=max_iterations, batch_size=batch_size, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train the model on the training data
model.fit(X_train, y_train)

# Print the weights matrices
print(model.coefs_)
 
# Make predictions on the test data
y_pred = model.predict(X_test)
 
# Calculate the accuracy of the model
accuracy = (y_pred == y_test).mean()
print(f'Accuracy: {accuracy:.2f}')

# Evaluate model
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

[array([[ 5.53833006e-01,  8.82165144e-01,  8.22300431e-01,
        -7.06161479e-01],
       [-2.02272885e-01, -3.95715375e-01, -8.54365851e-02,
        -3.38212766e-01],
       [ 4.43767470e-02,  3.16145392e-01, -3.32323997e-01,
         1.21725028e+00],
       [ 8.34625303e-01, -2.69601867e-01,  5.37637370e-04,
        -1.50138873e+00],
       [ 2.35781372e-01,  2.27771134e-01,  4.97546745e-01,
        -1.31345241e+00]]), array([[-0.83603583,  0.07835199, -0.95442628],
       [-0.52551007,  0.03165069, -0.87777477],
       [-0.17176614, -0.17532494, -0.54611671],
       [ 2.35990022,  2.32848588,  1.53583121]]), array([[ 1.23614441e+00, -1.75417434e-02],
       [ 1.09572712e+00, -6.15946830e-02],
       [ 2.02640968e+00, -2.17999655e-04]]), array([[ 4.20790952e-01,  3.03805380e-01, -1.97721465e+00],
       [ 6.43628311e-02,  1.07635724e-03,  4.50708769e-02]])]
Accuracy: 1.00
                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00      

