<a href="https://colab.research.google.com/github/iamudyavar/banana_quality_neural_network/blob/main/banana_quality_neural_network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Identifying the quality of a banana with a neural network



## Dataset Information:

Kaggle link: https://www.kaggle.com/datasets/l3llff/banana

Dependent variables:

1. **Size** - size of fruit (continuous)
2. **Weight** - weight of fruit (continuous)
3. **Sweetness** - sweetness of fruit (continuous)
4. **Softness** - softness of fruit (continuous)
5. **HarvestTime** - amount of time passed from harvesting of the fruit (continuous)
6. **Ripeness** - ripeness of fruit (continuous)
7. **Acidity** - acidity of fruit (continuous)


Independent variable:
1. **Quality** - quality of fruit (Good, Bad)

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings('ignore')

#Declare activation function and derivatives. We will use these as inputs to model
def sigmoid(x):
  return 1/(1 + np.exp(-x))

def sigmoid_derivative(x):
  return sigmoid(x)*sigmoid(1 - x)

def tanh(x):
  return (np.exp(x) - np.exp(-x))/(np.exp(x) + np.exp(-x))

def tanh_derivative(x):
  return 1 - tanh(x)**2

def relu(x):
  return np.maximum(0, x)

def relu_derivative(x):
  return np.array(x > 0).astype('int')


class NeuralNetwork:
  #Declaring weights from input to hidden layer and hidden to output
  wih=None
  who=None
  hiddenBias=None
  outputBias=None

  #Hyperparameters
  activation=None
  activation_der=None
  learningRate=None
  epochs=0
  momentumConst=0

  def __init__(self,activation,activation_der,learningRate,epochs,momentumConst):
    self.name=activation
    self.activation=activation
    self.activation_der=activation_der
    self.learningRate=learningRate
    self.epochs=epochs
    self.momentumConst=momentumConst

  def preprocess(self,dataset):
    #Encoding the dataset
    le=LabelEncoder()
    cat=dataset.select_dtypes(include=['object']).columns
    for col in cat:
      dataset[col]=le.fit_transform(dataset[col])

    dataset = dataset.ffill()
    X = dataset.drop('Quality', axis=1)
    y = dataset['Quality']

    return train_test_split(X,y,test_size=0.3)#,random_state=42)

  def train(self,X_train,y_train, inputSize ,hiddenSize, outputSize):
    #Initializing weights with values between -1 and 1 and bias with 0
    self.wih=np.random.uniform(-1,1,(inputSize,hiddenSize))
    self.hiddenBias=np.zeros((1,hiddenSize))
    self.who=np.random.uniform(-1,1,(hiddenSize,outputSize))
    self.outputBias=np.zeros((1,outputSize))

    #Temp variables to hold X_train and Y_train
    instances=np.array(X_train,dtype=np.float64)
    target=np.array(y_train,dtype=np.float64)

    #Optimizer intialization
    hidMomOpt=np.zeros((1,hiddenSize))
    inpMomOpt=np.zeros((hiddenSize,inputSize))

    #Looping over all inputs i.e. a forward pass and backward pass over every instance for specified epochs
    for e in range((self.epochs)):
      for i in range(len(instances)):
        row=instances[i]
        row=np.reshape(row,(inputSize,1))

        #forward pass
        ho,output=self.forwardPass(row)

        #backward pass
        hidMomOpt,momOpt=self.backwardPass(target[i],output,ho,row, hidMomOpt,inpMomOpt)

    #Training accuracy
    #Array to store training predictions
    p=[]

    for i in range(len(instances)):
      r=instances[i]
      r=np.reshape(r,(len(instances[i]),1))

      o=self.forwardPass(r)[1]
      p.append(o[0][0])

    p= np.round(p).astype(int)

    #Returning accuracy using scikit's accuracy_score
    return accuracy_score(p,target)

  def test(self,X_test,y_test):

    testX=np.array(X_test,dtype=np.float64)
    testY=np.array(y_test,dtype=np.float64)

    #Testing accuracy
    #Storing testing predictions
    predictions=[]

    for i in range(len(testX)):
      row=testX[i]
      row=np.reshape(row,(len(testX[i]),1))

      output=self.forwardPass(row)[1]
      predictions.append(output[0][0])

    predictions= np.round(predictions).astype(int)

    #Returning test accuracy
    return accuracy_score(predictions,testY)

  def forwardPass(self,input):
    #netIn=input
    hi=np.dot(input.T,self.wih)+self.hiddenBias

    #xh=sig(netIn)
    ho=self.activation(hi)

    #xO=sig(net(xh))
    oinput=np.dot(ho,self.who)+self.outputBias
    output=self.activation(oinput)
    return (ho,output) #Made change from (ho,output)

  def backwardPass(self,target,output,ho,row,hidMomOpt,inpMomOpt):
    #Delta of output
    outputDel=np.dot((target-output),self.activation_der(output))

    #Change in weight
    hoWeightChange=self.learningRate*np.dot(outputDel,ho)
    hidMomOpt=self.momentumConst*hidMomOpt+hoWeightChange

    #Update
    self.who+=hidMomOpt.T

    #Output Bias update
    self.outputBias +=self.learningRate*outputDel

    #Reshape for calculation purposes
    helper=np.broadcast_to(outputDel,(4,1))

    #Delta of hidden nodes
    hoDer=self.activation_der(ho)
    hidDel=np.dot(np.dot(self.who,hoDer),helper)

    #Change in weight
    ihWeightChange=self.learningRate*np.dot(hidDel,row.T)
    inpMomOpt=self.momentumConst*inpMomOpt+ihWeightChange

    #Update
    self.wih+=inpMomOpt.T

    #Reshape hidden gradient
    hidDelTemp = hidDel.reshape(1, -1)
    #Hidden bias update

    self.hiddenBias+=self.learningRate*hidDelTemp

    return (hidMomOpt,inpMomOpt)

#Initialize model
model = NeuralNetwork(activation=sigmoid,activation_der=sigmoid_derivative,learningRate=0.03, epochs=100, momentumConst=0.9)

#Fetch dataset
banana_dataset = pd.read_csv("https://raw.githubusercontent.com/iamudyavar/banana_quality_neural_network/main/banana_quality.csv")

#Preprocess and clean the data
X_train, X_test, y_train, y_test = model.preprocess(banana_dataset)

train_accuracy = model.train(X_train, y_train, 7, 4, 1)

#Test
test_accuracy = model.test(X_test, y_test)

#Print results
print(f'Training accuracy: {train_accuracy}')
print(f'Testing accuracy: {test_accuracy}')

Training accuracy: 0.82125
Testing accuracy: 0.82625
