In [None]:
import numpy as np
import pandas as pd

#To Read Data from IRIS Dataset
iris = pd.read_csv("https://raw.githubusercontent.com/aiforsec/RIT-DSCI-633-FDS/main/Syllabus/Datasets/iris.csv")

#To Display Data along with its Header
iris.head()

Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [None]:
#To get Descriptive Summary of the Dataset
iris.describe()

Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.054,3.758667,1.198667
std,0.828066,0.433594,1.76442,0.763161
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [None]:
from sklearn.model_selection import train_test_split

#To add Data to X and y Variables
X = iris.iloc[:, :-1].values
y = iris.iloc[:, -1].values

#To split data for Training and Testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)

In [None]:
#To get Total No. of Samples and Total No. of Features of Training Dataset
samples = X_train.shape[0]
feature = X_train.shape[1]

In [None]:
#To get Name of Distinct Classes
classes = np.unique(y_train)

#To get Total No of Classes
number_of_classes = len(classes)

In [None]:
#To Initialize Mean Variable
#'(number_of_classes, feature)' is mentioned as we need to find out
# Mean of Each Feature for Each Class
mean = np.zeros((number_of_classes, feature), dtype=np.float32)

#To Initialize Standard Deviation Variable
#'(number_of_classes, feature)' is mentioned as we need to find out
# Standard Deviation of Each Feature for Each Class
standard_deviation = np.zeros((number_of_classes, feature), dtype=np.float32)

#To Initialize Prior Probability Variable
#'(number_of_classes)' is mentioned as Prior Probability of
# each Class will depend on its occurence in a Sample Data
prior_probability = np.zeros(number_of_classes, dtype=np.float32)

#To Iterate over Each Class
for index,value in enumerate(classes):
  #To Seperate Data as per the Class
  X_Training = X_train[y_train == value]

  #To find Mean of the Class for each feature 
  #and store it at Same Index as Class Index 
  mean[index] = X_Training.mean(axis=0)

  #To find Standard Deviation of the Class 
  #and store it at Same Index as Class Index 
  standard_deviation[index] = X_Training.std(axis=0)

  #To find Prior Probability of Every Class
  prior_probability[index] = X_Training.shape[0] / samples

In [None]:
#Function to Calculate Probability using Gaussian Distribution Formula
def gaussianFormula(x, mean, std):
  probability = (np.exp((-1/2)*(np.square((x - mean)/std))))/(std*(np.sqrt(2*np.pi)))
  return probability

In [None]:
#Initialized variable for storing Predictions
prediction = []

#To Iterate over Test Data
for x_Testing in X_test: 
  #To Store Best Case Probability
  best_Case_Probability = []

  #To Iterate over Each Class
  for index,value in enumerate(classes):
    #To find Posterior Probability
    post = gaussianFormula(x_Testing, mean[index], standard_deviation[index])

    #To find Final Probability
    final = np.sum(np.log(prior_probability[index])+np.log(post));

    #To add Final Probability in a List
    best_Case_Probability.append(final)

  #To append Name of Class whose Probability is Max among all the Classes
  prediction.append(classes[np.argmax(best_Case_Probability)])

In [None]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

#To Print Classification Report
print('Classification Report: ')
print(classification_report(y_test, prediction))

#To Print Confusion Matrix
print('Confusion Matrix: ')
print(confusion_matrix(y_test, prediction))

from sklearn.metrics import accuracy_score
#To Print Accuracy Score
print('Accuracy Score:', accuracy_score(prediction, y_test))

Classification Report: 
                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        16
Iris-versicolor       1.00      0.94      0.97        18
 Iris-virginica       0.92      1.00      0.96        11

       accuracy                           0.98        45
      macro avg       0.97      0.98      0.98        45
   weighted avg       0.98      0.98      0.98        45

Confusion Matrix: 
[[16  0  0]
 [ 0 17  1]
 [ 0  0 11]]
Accuracy Score: 0.9777777777777777
