<a href="https://colab.research.google.com/github/iamudyavar/banana_quality_neural_network/blob/main/banana_quality_neural_network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Identifying the quality of a banana with a neural network


## Dataset Information:

Dependent variables:

1. **Size** - size of fruit (continuous)
2. **Weight** - weight of fruit (continuous)
3. **Sweetness** - sweetness of fruit (continuous)
4. **Softness** - softness of fruit (continuous)
5. **HarvestTime** - amount of time passed from harvesting of the fruit (continuous)
6. **Ripeness** - ripeness of fruit (continuous)
7. **Acidity** - acidity of fruit (continuous)


Independent variable:
1. **Quality** - quality of fruit (Good, Bad)

In [1]:
# Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

class NeuralNetwork:
    # Global variables
    weights_input_hidden = weights_hidden_output = None     # Weights
    bias_hidden = bias_output = None                        # Biases

    # Hyperparameters
    activation = None               # Activation function
    activation_derivative = None    # Derivative of activation function
    learning_rate = 0               # Learning rate
    num_epochs = 0                  # Number of epochs
    momentum_constant = 0           # Momentum constant

    def __init__(self, activation, activation_derivative, learning_rate, num_epochs, momentum_constant):
        self.activation = activation
        self.activation_derivative = activation_derivative
        self.learning_rate = learning_rate
        self.num_epochs = num_epochs
        self.momentum_constant = momentum_constant

    def preprocess(self, dataset):
        # Encode categorical features
        le = LabelEncoder()
        categorical_cols = dataset.select_dtypes(include=['object']).columns
        for col in categorical_cols:
            dataset[col] = le.fit_transform(dataset[col])

        # Fill in missing values (if they exist)
        dataset = dataset.ffill()

        # Split data into X and y
        X = dataset.drop('Quality', axis=1)
        y = dataset['Quality']

        # Split data into train and test sets
        return train_test_split(X, y, test_size=0.3)

    def train(self, X_train, y_train, input_size, hidden_size, output_size):
        # Initialize weights and biases
        self.weights_input_hidden = np.random.uniform(-1, 1, (input_size, hidden_size))
        self.bias_hidden = np.zeros((1, hidden_size))
        self.weights_hidden_output = np.random.uniform(-1, 1, (hidden_size, output_size))
        self.bias_output = np.zeros((1, output_size))

        # Convert input and target values into an array
        inputs = np.array(X_train, dtype=np.float64)
        targets = np.array(y_train, dtype=np.int64)

        # Start training
        hidden_momentum_optimizer = np.zeros((1, hidden_size))
        input_momentum_optimizer = np.zeros((hidden_size, input_size))
        for epoch in range(self.num_epochs):
            for i in range(len(inputs)):
                row = inputs[i]
                row = np.reshape(row, (input_size,1))

                # Forward pass
                hidden_outputs, output = self.forward_pass(row)

                # Backward pass
                hidden_momentum_optimizer, input_momentum_optimizer = self.backward_pass(targets[i], output, hidden_outputs, row, hidden_momentum_optimizer, input_momentum_optimizer)

        # Return training accuracy
        return self.get_accuracy_score(inputs, targets)

    def test(self, X_test, y_test):
        # Convert input and target values into an array
        inputs = np.array(X_test, dtype=np.float64)
        targets = np.array(y_test, dtype=np.float64)

        # Return testing accuracy
        return self.get_accuracy_score(inputs, targets)

    def forward_pass(self, input):
        # Hidden layer calculations
        hidden_inputs = np.dot(input.T, self.weights_input_hidden) + self.bias_hidden
        hidden_outputs = self.activation(hidden_inputs)

        # Output layer calculations
        output_inputs = np.dot(hidden_outputs, self.weights_hidden_output) + self.bias_output
        output = self.activation(output_inputs)
        return (hidden_outputs, output)

    def backward_pass(self, target_value, final_output, hidden_outputs, data_row, hidden_momentum_optimizer, input_momentum_optimizer):
        # Update weights between hidden and output layer
        output_delta = np.dot((target_value - final_output), self.activation_derivative(final_output))
        hidden_output_weightchange = self.learning_rate * np.dot(output_delta, hidden_outputs)
        hidden_momentum_optimizer = self.momentum_constant * hidden_momentum_optimizer + hidden_output_weightchange
        self.weights_hidden_output += hidden_momentum_optimizer.T

        # Update weights between input and hidden layer
        helper_output_delta = np.broadcast_to(output_delta, (4, 1))
        activation_on_hidden_output = self.activation_derivative(hidden_outputs)
        hidden_deltas = np.dot(np.dot(self.weights_hidden_output, activation_on_hidden_output), helper_output_delta)
        input_hidden_weightchange = self.learning_rate * np.dot(hidden_deltas, data_row.T)
        input_momentum_optimizer = self.momentum_constant * input_momentum_optimizer + input_hidden_weightchange
        self.weights_input_hidden += input_momentum_optimizer.T

        return (hidden_momentum_optimizer, input_momentum_optimizer)

    def get_accuracy_score(self, inputs, targets):
        predicted_outputs = []

        # Run forward passes to make our prediction
        for i in range(len(inputs)):
            row = inputs[i]
            row = np.reshape(row, (len(inputs[i]), 1))
            output = self.forward_pass(row)[1]
            predicted_outputs.append(output[0][0])

        predicted_outputs = np.round(predicted_outputs).astype(int)
        return accuracy_score(predicted_outputs, targets)


# Declare activation function
def sigmoid(x):
  return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
  return sigmoid(x) * sigmoid(1 - x)

def tanh(x):
  return (np.exp(x) - np.exp(-x)) / (np.exp(x) + np.exp(-x))

def tanh_derivative(x):
    return 1 - tanh(x)**2

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.array(x > 0).astype('int')

# Initialize model
model = NeuralNetwork(activation=sigmoid, activation_derivative=sigmoid_derivative, learning_rate=0.03, num_epochs=100, momentum_constant=0.9)

# Fetch dataset
banana_dataset = pd.read_csv("banana_quality.csv")

# Preprocess and clean the data
X_train, X_test, y_train, y_test = model.preprocess(banana_dataset)

# Train
train_accuracy = model.train(X_train, y_train, 7, 4, 1)

# Test
test_accuracy = model.test(X_test, y_test)

# Print results
print(f'Training accuracy: {train_accuracy}')
print(f'Testing accuracy: {test_accuracy}')

FileNotFoundError: [Errno 2] No such file or directory: 'banana_quality.csv'