In [1]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [6]:
import os
!pip install openpyxl

Collecting openpyxl
  Downloading openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting et-xmlfile (from openpyxl)
  Downloading et_xmlfile-1.1.0-py3-none-any.whl.metadata (1.8 kB)
Downloading openpyxl-3.1.5-py2.py3-none-any.whl (250 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m250.9/250.9 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-1.1.0 openpyxl-3.1.5


In [20]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
file_name = "/content/drive/MyDrive/AI_PYTHON/naive1.xlsx"
df = pd.read_excel(file_name,index_col=0)
print(df)

     color  legs height smelly species
no                                    
1   white      3  short    yes       m
2    green     2   tall     no       m
3    green     3  short    yes       m
4   white      3  short    yes       m
5    green     2  short     no       h
6   white      2   tall     no       h
7   white      2   tall     no       h
8   white      2  short    yes       h


In [12]:
X, y=df.drop(["species"],axis=1),df["species"]
print(X.shape)
print(y)

(8, 4)
no
1    m
2    m
3    m
4    m
5    h
6    h
7    h
8    h
Name: species, dtype: object


In [13]:
def accuracy_score(y_true, y_pred):

	"""	score = (y_true - y_pred) / len(y_true) """

	return round(float(sum(y_pred == y_true))/float(len(y_true)) * 100 ,2)

In [32]:
import numpy as np
from sklearn.metrics import accuracy_score

class NaiveBayes:
    """
    Bayes Theorem:
                                  Likelihood * Class prior probability
        Posterior Probability = -------------------------------------
                                      Predictor prior probability

                                   P(x|c) * P(c)
                          P(c|x) = ------------------
                                              P(x)
    """

    def __init__(self):
        """
        Attributes:
            likelihoods: Likelihood of each feature per class
            class_priors: Prior probabilities of classes
            pred_priors: Prior probabilities of features
            features: All features of the dataset
        """
        self.features = None
        self.likelihoods = {}
        self.class_priors = {}
        self.pred_priors = {}

        self.X_train = None
        self.y_train = None
        self.train_size = 0
        self.num_feats = 0

    def fit(self, X, y):
        """
        Fit the Naive Bayes model to the training data.
        """
        self.features = list(X.columns)
        self.X_train = X
        self.y_train = y
        self.train_size = X.shape[0]
        self.num_feats = X.shape[1]

        for feature in self.features:
            self.likelihoods[feature] = {}
            self.pred_priors[feature] = {}

            for feat_val in np.unique(self.X_train[feature]):
                self.pred_priors[feature].update({feat_val: 0})

                for outcome in np.unique(self.y_train):
                    # Convert feat_val and outcome to strings explicitly before concatenation
                    self.likelihoods[feature].update({str(feat_val) + '_' + str(outcome): 0})
                    self.class_priors.update({outcome: 0})

        self._calc_class_prior()
        self._calc_likelihoods()
        self._calc_predictor_prior()

    def _calc_class_prior(self):
        """ P(c) - Prior Class Probability """
        for outcome in np.unique(self.y_train):
            outcome_count = sum(self.y_train == outcome)
            self.class_priors[outcome] = outcome_count / self.train_size

    def _calc_likelihoods(self):
        """ P(x|c) - Likelihood """
        for feature in self.features:
            for outcome in np.unique(self.y_train):
                outcome_count = sum(self.y_train == outcome)
                feat_likelihood = self.X_train[feature][self.y_train[self.y_train == outcome].index.values.tolist()].value_counts().to_dict()

                for feat_val, count in feat_likelihood.items():
                    # Convert feat_val and outcome to strings explicitly before concatenation
                    self.likelihoods[feature][str(feat_val) + '_' + str(outcome)] = count / outcome_count

    def _calc_predictor_prior(self):
        """ P(x) - Evidence """
        for feature in self.features:
            feat_val_counts = self.X_train[feature].value_counts().to_dict()
            for feat_val, count in feat_val_counts.items():
                self.pred_priors[feature][feat_val] = count / self.train_size

    def predict(self, X):
        """ Calculates Posterior probability P(c|x) """
        results = []
        X = np.array(X)

        for query in X:
            probs_outcome = {}
            for outcome in np.unique(self.y_train):
                prior = self.class_priors[outcome]
                likelihood = 1

                for feat, feat_val in zip(self.features, query):
                    likelihood *= self.likelihoods[feat][str(feat_val) + '_' + str(outcome)]

                posterior = likelihood * prior
                probs_outcome[outcome] = posterior

            result = max(probs_outcome, key=lambda x: probs_outcome[x])
            print(probs_outcome)
            results.append(result)

        return np.array(results)

In [53]:
nb_clf = NaiveBayes()
nb_clf.fit(X, y)

print("Train Accuracy: {}".format(accuracy_score(y, nb_clf.predict(X))))

#Query 1:
# color legs height smelly species
query = np.array([['green','2','tall','no']])
print("Query:- {} ---> {}".format(query, nb_clf.predict(query)))


{'h': 0.0, 'm': 0.10546875}
{'h': 0.046875, 'm': 0.00390625}
{'h': 0.0, 'm': 0.10546875}
{'h': 0.0, 'm': 0.10546875}
{'h': 0.046875, 'm': 0.01171875}
{'h': 0.140625, 'm': 0.00390625}
{'h': 0.140625, 'm': 0.00390625}
{'h': 0.046875, 'm': 0.03515625}
Train Accuracy: 0.875
{'h': 0.046875, 'm': 0.00390625}
Query:- [['green' '2' 'tall' 'no']] ---> ['h']
