In [None]:
###########################################################################################################################
#                                                                                                                         #
#                                                        LOGIC                                                            #
#                                                                                                                         #
#       We will be using forward propagation and back propagation to train the model.                                     #
#       Then we will be using the testing data to test the accuracy of our training model                                 #
#       We will now use this model to predict the sex of the datasets                                                     #
#                                                                                                                         #
###########################################################################################################################


###########################################################################################################################
#                                                                                                                         #
#                                                     Functions                                                           #
#                                                                                                                         #
#       sigmoid()                     :   caluclate the sigmoid                                                           #
#       forward_propagation()         :   used for forward propagation                                                    #
#       weight_randomise()            :   ramdonly assigns values to the weights                                          #
#       cost()                        :   find the cost                                                                   #
#       back_propagation()            :   used for backward_propagation                                                   #
#       train()                       :   for training the model                                                          #
#       predict()                     :   for predicting the sex for the 4 datsets                                        #
#       testing()                     :   for testing the model                                                           #
#                                                                                                                         #
###########################################################################################################################



# Importing all the libraries needed
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns # visualization
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
%matplotlib inline



# We read the Snails.csv file 
data = pd.read_csv("Snails.csv")


# Since the values of "Sex" are non-numeric, this will cause a problem for us. 
# Hence, we change the values. 
# I've chosen that, if the sex is Male, it is assigned the value 1
# If the sex is Female, it is assigned the value 2
# If the sex is both, then it is assigned the value 3 
# new_sex now has the replaced values 
new_sex = data[['sex']].replace(['M','F','I'],[1,2,3])


# We are assigning the first 8 columns to a datadrame called df1. 
# This will help us in concatenation in the next step
df1 = data[['length','diameter','height','whole-weight','shucked-weight','viscera-weight','shell-weight','rings']]


# We now combine both the datframes to get the final dataframe
# We are using the inbuilt function called concat here
df = pd.concat([df1, new_sex], axis=1)


# We are dividing this data into training and testing data with a 8:2 ratio
# Note that, training_data1 and testing_data1 are un-normalised
training_data1, testing_data1 = train_test_split(df, test_size=0.2, random_state=25)


# We now use pairplot to plot the un-normalised data
sns.pairplot( data=training_data1, vars=('length','diameter','height','whole-weight','shucked-weight','viscera-weight','shell-weight','rings'), hue='sex' )


# We now normalise the data of the first 8 columns 
# Note that we are normalising the entire data, andn not just the training part
df_norm = data[['length','diameter','height','whole-weight','shucked-weight','viscera-weight','shell-weight','rings']].apply(lambda x: (x - x.min()) / (x.max() - x.min()))


# We now concatenate the normalised data with the sex column, which has the values of 1, 2, 3
df = pd.concat([df_norm, new_sex], axis=1)


# We now divide this normlaised data into training and testing data
# Note that here, unlike before, the data is normalised
training_data, testing_data = train_test_split(df, test_size=0.2, random_state=25)


# We now use pairplot to plot this normalised training data
sns.pairplot( data=training_data, vars=('length','diameter','height','whole-weight','shucked-weight','viscera-weight','shell-weight','rings'), hue='sex' )


# we now create a list instead of the DataFrame
data1=np.array(training_data)
data=data1[:,0:8]

x1 = []
x2 = []
x=[]
for i in range(len(data)):
    x1=data[[i]]
    x.append(x1)


# We now use HotEncoder since we have 3 possible output classes, and since we arent using softmax. 
from sklearn.preprocessing import OneHotEncoder
one_hot_encoder = OneHotEncoder(sparse=False)
Y = training_data.sex
Y = one_hot_encoder.fit_transform(np.array(Y).reshape(-1, 1))


# Function to find the sigmoid of that neuron
def sigmoid(x):
    return(1/(1 + np.exp(-x*1.0)))
   

# Function to find the forward propagation
def forward_propagation(x, w1, w2):
    # Hidden Layer
    z1 = x.dot(w1)
    a1 = sigmoid(z1)
     
    # Output Layer
    z2 = a1.dot(w2)
    a2 = sigmoid(z2)
    return(a2)
  
# Function to initialise the weights randomly
def weight_randomise(x, y):
    l =[]
    for i in range(x * y):
        l.append(np.random.randn())
    return(np.array(l).reshape(x, y))
     
# Function to calculate of cost 
def cost(out, Y):
    s =(np.square(out-Y))
    s = np.sum(s)/len(Y)
    return(s*0.5)
   
# Function for the back propagation 
def back_propagation(x, y, w1, w2, alpha):
     
    # # hidden layer
    z1 = x.dot(w1)
    a1 = sigmoid(z1)
     
    # # Output layer
    z2 = a1.dot(w2)
    a2 = sigmoid(z2)
    
    # Error of the output layer
    e2 =(a2-y)

    # Error of the hidden layer 
    e1 = np.multiply((w2.dot((e2.transpose()))).transpose(),
                                   (np.multiply(a1, 1-a1)))
 
    # Gradient for w1 and w2
    w1_adjusted = x.transpose().dot(e1)
    w2_adjusted = a1.transpose().dot(e2)
     
    # Updating the parameters
    w1 = w1-(alpha*(w1_adjusted))
    w2 = w2-(alpha*(w2_adjusted))
     
    return(w1, w2)
 

# Function to train the training dataset. 
# Note that this function will use the forward and backward propagation functions amongst others 
def train(x, Y, w1, w2, alpha = 0.01, epoch = 500):
    acc =[]
    losses =[]
    for j in range(epoch):
        l =[]
        for i in range(len(x)):
            out = forward_propagation(x[i], w1, w2)
            l.append((cost(out, Y[i])))
            w1, w2 = back_propagation(x[i], Y[i], w1, w2, alpha)
        print("epochs:", j + 1, "acc:", (1-(sum(l)/len(x)))*100)  
        acc.append((1-(sum(l)/len(x)))*100)
        losses.append(sum(l)/len(x))
    return(acc, losses, w1, w2)
  

# Function to predict the sex of the 4 datas given in the end 
def predict(x, w1, w2):
    Out = forward_propagation(x, w1, w2)
    maxi = 0
    k = 0
    error = 0
    for i in range(len(Out)):
        if(maxi<Out[0][i]):
            maxi = Out[0][i]
            k = i
            print(Out)
    if(k == 1):
        print("Gender is M")
    elif(k == 2):
        print("Gender is F")
    elif(k == 3):
        print("Gender is I")


# Function to test the testing data and to find the accuracy 
def testing(x, Y, w1, w2):
  for j in range(len(x)):
      Out = forward_propagation(x[j], w1, w2)
      maxi = 0
      k = 0
      num = 0
      error = 0
      for i in range(len(Out[0])):
          if(maxi<Out[0][i]):
              maxi = Out[0][i]
              k = i
              print(Out)
      if(k == 1):
          print("Gender is M")
          if(k!= Y.iloc[j]):
            error = error + 1
           
      elif(k == 2):
          print("Gender is F")
          if ( k != Y.iloc[j]):
            error = error + 1
           
      elif(k == 3):
          print("Gender is I")
          if ( k != Y.iloc[j]):
            error = error + 1
          
  
  # ploting accuraccy
  # This is still part of the function 
  plt.plot(1-error/len(x0))
  plt.ylabel('Accuracy')
  plt.xlabel("Epochs")
  plt.show()


# Randomization of weight
w1 = weight_randomise(8, 5)
w2 = weight_randomise(5, 3)

# We now train the NN based on the weights we just found above 
acc, losses, w1, w2 = train(x, Y, w1, w2, 0.01, 500) 



# plotting Loss
plt.plot(losses)
plt.ylabel('cost')
plt.xlabel("Epochs")
plt.show()


# ploting accuraccy
plt.plot(acc)
plt.ylabel('Accuracy')
plt.xlabel("Epochs")
plt.show()



# We now do the same thing as we did for training_data for testing_data
# We create a list instead of a dataframe for easier data manipulation 
data2=np.array(testing_data)
data3=data2[:,0:8]

x3 = []
x4 = []
x0=[]
for i in range(len(data3)):
    x3=data3[[i]]
    x0.append(x3)

len(x0)


# We assign the sex values of the testing data to the variable Y0. 
# This will help us in finding thr accuracy later on
Y0 = testing_data.sex


# We now test the testing_data
testing(x0, Y0, w1, w2)



# This is the 4 data values given to us
new_testing_data=[[100,50,20,55.5,42,23,35,11],[110,74,25,153.6,47.4,15.5,11,10],[106,73,16,70.3,47.4,29.9,33,19],
              [94,81,20,132.9,33.5,34.2,38,10]]


# Just as we did for the training data, we need to normalise it. So we take the first 8 columns of data 
new_test = pd.DataFrame(new_testing_data, columns=['length','diameter','height','whole-weight','shucked-weight','viscera-weight','shell-weight','rings'])



# Normalise this new datset 
new_test_norm = new_test[['length','diameter','height','whole-weight','shucked-weight','viscera-weight','shell-weight','rings']].apply(lambda x: (x - x.min()) / (x.max() - x.min()))



# Sane as in training_data 
test_data=np.array(new_test_norm)
t=[]
for i in range(4):
    t1=test_data[[i]]
    t.append(t1)


# We now predict the sex of the giving datasets 
predict(t[0], w1, w2)
predict(t[1], w1, w2)
predict(t[2], w1, w2)
predict(t[3], w1, w2)
