## <font color = "brown"> Importing Libraries </font>

In [1]:
import pandas as pd
import numpy as np

## <font color = "brown"> Load Data </font>

In [2]:
df = pd.read_csv("iris.csv")
df = df.drop("Id", axis = 1)

In [3]:
df

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


## <font color = "brown"> Train Test Split </font>

In [4]:
# Train test split
train = df.sample(frac = 0.7, random_state = 1)
test = df.drop(train.index)

y_train = train["Species"]
x_train = train.drop("Species", axis = 1)

y_test = test["Species"]
x_test = test.drop("Species", axis = 1)

## <font color = "brown"> Training </font>

In [5]:
means = train.groupby(["Species"]).mean() # Find mean of each class
var = train.groupby(["Species"]).var() # Find variance of each class
prior = (train.groupby("Species").count() / len(train)).iloc[:,1] # Find prior probability of each class
classes = np.unique(train["Species"].tolist()) # Storing all possible classes

## <font color = "brown"> Classification </font>

In [16]:
def Normal(n, mu, var):
    
    # Function to return pdf of Normal(mu, var) evaluated at x
    sd = np.sqrt(var)
    pdf = (np.e ** (-0.5 * ((n - mu)/sd) ** 2)) / (sd * np.sqrt(2 * np.pi))
    
    return pdf

def Predict(X):
    Predictions = []
    
    for i in X.index: # Loop through each instances
        
        ClassLikelihood = []
        instance = X.loc[i]
        
        for cls in classes: # Loop through each class
            
            FeatureLikelihoods = []
            FeatureLikelihoods.append(np.log(prior[cls])) # Append log prior of class 'cls'
            
            for col in x_train.columns: # Loop through each feature
                
                data = instance[col]
                
                mean = means[col].loc[cls] # Find the mean of column 'col' that are in class 'cls'
                variance = var[col].loc[cls] # Find the variance of column 'col' that are in class 'cls'
                
                Likelihood = Normal(data, mean, variance)
                
                if Likelihood != 0:
                    Likelihood = np.log(Likelihood) # Find the log-likelihood evaluated at x
                else:
                    Likelihood = 1/len(train) 
                
                FeatureLikelihoods.append(Likelihood)
                
            TotalLikelihood = sum(FeatureLikelihoods) # Calculate posterior
            ClassLikelihood.append(TotalLikelihood)
            
        MaxIndex = ClassLikelihood.index(max(ClassLikelihood)) # Find largest posterior position
        Prediction = classes[MaxIndex]
        Predictions.append(Prediction)
        
    return Predictions
        

In [17]:
def Accuracy(y, prediction):
    
    # Function to calculate accuracy
    y = list(y)
    prediction = list(prediction)
    score = 0
    
    for i, j in zip(y, prediction):
        if i == j:
            score += 1
            
    return score / len(y)

In [18]:
PredictTrain = Predict(x_train)
PredictTest = Predict(x_test)

In [19]:
print(round(Accuracy(y_train, PredictTrain), 5))
print(round(Accuracy(y_test, PredictTest), 5))

0.98095
0.91111


## <font color = "brown"> Comparison </font>

In [10]:
from sklearn.naive_bayes import GaussianNB

clf = GaussianNB()
clf.fit(x_train, y_train)
SkTrain = clf.predict(x_train) # Predicting on the train set
SkTest = clf.predict(x_test) # Predicting on the test set

In [11]:
print(round(Accuracy(y_train, SkTrain), 5))
print(round(Accuracy(y_test, SkTest), 5))

0.98095
0.91111


In [12]:
print(round(Accuracy(PredictTrain, SkTrain), 5))
print(round(Accuracy(PredictTest, SkTest), 5))

1.0
1.0


$$
\begin{aligned}
P(C \mid x) &= \frac{P(x \mid C) P(C)}{P(x)} \\
\\ \space
\text{Where,} &
\\
P(C \mid x)&: \text{Believe in C, given observation x} \\
P(x \mid C)&: \text{Probability of observing x if C is true} \\
P(C) &: \text{Probability of observing C} \\
P(x) &: \text{Probabiliy of observing x}
\end{aligned}
$$

$$
\begin{aligned}
P(C_i \mid x) &= \frac{P(x \mid C_i) P(C_i)}{P(x)} \\
\\ &\propto P(x \mid C_i) P(C_i) \space\space\space\dots\text{Line 2} \\
\\ &\propto P(C_i) \prod_{k = 1}^K P(x_k \mid C_i) \space\space\space\dots\text{Line 3} \\
\end{aligned}
$$

$$
\begin{aligned}
\\ P(x \mid C_i) &= P(x_1 \cap x_2 \cap \dots \cap x_k \mid C_i) \\
\\ &= P(x_1 \mid C_i)(x_2 \mid C_i) \space \dots \space P(x_k \mid C_i) \\
\\ &= \prod_{k = 1}^K P(x_k \mid C_i)
\end{aligned}
$$

$$
\begin{aligned}
\text{Insted of this,}& \\
P(C_i \mid x) &\propto P(C_i) \prod_{k = 1}^K P(x_k \mid C_i) \\
\\ 
\text{We will use this,}& \\
\log P(C_i \mid x) &\propto \log P(C_i) + \sum_{k = 1}^K \log P(x_k \mid C_i)
\end{aligned}
$$