In [None]:
### **1. Import Libraries**
from sklearn.datasets import load_iris
import numpy as np
### **2. Load Iris Dataset**
# Load the Iris dataset
iris = load_iris()

# Access the features and target variables
X = iris.data
y = iris.target

# Convert the data to a dataframe
import pandas as pd
df = pd.DataFrame(X, columns=iris.feature_names)
df['target'] = y

df

### **3. Build NB Classifier from Scratch**
import numpy as np

class NaiveBayesClassifier:
    def __init__(self):
        self.class_probabilities = None
        self.feature_probabilities = None
        self.classifier_type = None
        self.y = None

    def fit(self, X, y, classifier_type='multinomial'):
        """
        Fits the Naive Bayes classifier to the data.

        Args:
            X: A numpy array of the training data.
            y: A numpy array of the training labels.
            classifier_type: Type of Naive Bayes classifier. Options: 'multinomial', 'bernoulli', 'gaussian'.
            
        Returns:
            None.
        """
        self.y = y
        self.classifier_type = classifier_type.lower()
        
        # Calculate the class probabilities.
        class_counts = np.bincount(y)
        self.class_probabilities = class_counts / np.sum(class_counts)
        
        # Calculate the feature probabilities based on the specified classifier type.
        if self.classifier_type == 'multinomial':
            self.feature_probabilities = {}
            for class_label in np.unique(y):
                X_class = X[y == class_label]
                X_class_flat = X_class.flatten().astype(int)  # Convert X_class to integers
                feature_counts = np.bincount(X_class_flat, minlength=X.shape[1])
                self.feature_probabilities[class_label] = feature_counts / np.sum(feature_counts, axis=0)
        elif self.classifier_type == 'bernoulli':
            self.feature_probabilities = {}
            for class_label in np.unique(y):
                X_class = X[y == class_label]
                feature_probabilities = (X_class.sum(axis=0) + 1) / (X_class.shape[0] + 2)
                self.feature_probabilities[class_label] = feature_probabilities
        elif self.classifier_type == 'gaussian':
            self.feature_probabilities = {}
            for class_label in np.unique(y):
                X_class = X[y == class_label]
                class_mean = np.mean(X_class, axis=0)
                class_std = np.std(X_class, axis=0)
                self.feature_probabilities[class_label] = (class_mean, class_std)
        else:
            raise ValueError("Invalid classifier_type. Supported types are 'multinomial', 'bernoulli', and 'gaussian'.")

    def predict(self, X):
        """
        Predicts the class of the input data.

        Args:
            X: A numpy array of the input data.

        Returns:
            A numpy array of the predicted classes.
        """
        if self.classifier_type == 'multinomial':
            # Calculate the probability of each class given the input data for Multinomial Naive Bayes.
            class_probabilities = np.zeros((X.shape[0], len(self.class_probabilities)))
            for class_label in np.unique(self.y):  # Use self.y instead of y
                X_class = X[self.y == class_label]  # Use self.y instead of y
                X_class_flat = X_class.flatten().astype(float)
                feature_counts = np.bincount(X_class_flat.astype(int), minlength=X.shape[1])
                self.feature_probabilities[class_label] = feature_counts / np.sum(feature_counts, axis=0)
                class_probabilities[:, class_label] = np.log(self.class_probabilities[class_label]) + np.sum(np.log(self.feature_probabilities[class_label][X.astype(bool)]), axis=1)

        elif self.classifier_type == 'bernoulli':
            # Calculate the probability of each class given the input data for Bernoulli Naive Bayes.
            class_probabilities = np.zeros((X.shape[0], len(self.class_probabilities)))
            for class_label in np.unique(self.y):  # Use self.y instead of y
                X_class = X[self.y == class_label]  # Use self.y instead of y
                feature_probabilities = self.feature_probabilities[class_label]
                class_probabilities[:, class_label] = np.log(self.class_probabilities[class_label]) + np.sum(np.log(feature_probabilities[X.astype(bool)]), axis=1)

        elif self.classifier_type == 'gaussian':
            # Calculate the probability of each class given the input data for Gaussian Naive Bayes.
            class_probabilities = np.zeros((X.shape[0], len(self.class_probabilities)))
            for class_label in np.unique(self.y):  # Use self.y instead of y
                X_class = X[self.y == class_label]  # Use self.y instead of y
                class_mean, class_std = self.feature_probabilities[class_label]
                class_probabilities[:, class_label] = np.log(self.class_probabilities[class_label]) + np.sum(-0.5 * np.log(2 * np.pi * class_std) - 0.5 * ((X - class_mean) ** 2) / (class_std ** 2), axis=1)
                
        else:
            raise ValueError("Invalid classifier_type. Supported types are 'multinomial', 'bernoulli', and 'gaussian'.")
        
        # Predict the class with the highest probability.
        predicted_classes = np.argmax(class_probabilities, axis=1)
        return predicted_classes
import numpy as np
from sklearn.preprocessing import LabelEncoder

class NaiveBayes:
    def __init__(self):
        self.features = []
        self.likelihoods = {}
        self.class_priors = {}
        self.pred_priors = {}

        self.X_train = np.array([])
        self.y_train = np.array([])
        self.train_size = 0
        self.num_feats = 0

    def fit(self, X, y, classifier_type='multinomial'):
        self.features = list(X.columns)
        self.X_train = X
        self.y_train = y
        self.train_size = X.shape[0]
        self.num_feats = X.shape[1]

        if classifier_type == 'multinomial':
            self._fit_multinomial()
        elif classifier_type == 'bernoulli':
            self._fit_bernoulli()
        elif classifier_type == 'gaussian':
            self._fit_gaussian()
        else:
            raise ValueError("Invalid classifier_type. Supported types are 'multinomial', 'bernoulli', and 'gaussian'.")

    def _fit_multinomial(self):
        label_encoder = LabelEncoder()
        y_encoded = label_encoder.fit_transform(self.y_train)

        for feature in self.features:
            self.likelihoods[feature] = {}
            self.pred_priors[feature] = {}

            for feat_val in np.unique(self.X_train[feature]):
                self.pred_priors[feature][feat_val] = 0

                for outcome in np.unique(y_encoded):
                    self.likelihoods[feature][feat_val + '_' + str(outcome)] = 0
                    self.class_priors[outcome] = 0

        self._calc_class_prior_multinomial(y_encoded)
        self._calc_likelihoods_multinomial(y_encoded)
        self._calc_predictor_prior_multinomial()

    def _calc_class_prior_multinomial(self, y_encoded):
        class_counts = np.bincount(y_encoded)
        total_count = np.sum(class_counts)

        for outcome, count in enumerate(class_counts):
            self.class_priors[outcome] = count / total_count

    def _calc_likelihoods_multinomial(self, y_encoded):
        for feature in self.features:
            for outcome in np.unique(y_encoded):
                outcome_count = np.sum(y_encoded == outcome)
                feat_likelihood = self.X_train[feature][y_encoded == outcome].value_counts().to_dict()

                for feat_val, count in feat_likelihood.items():
                    self.likelihoods[feature][feat_val + '_' + str(outcome)] = count / outcome_count

    def _calc_predictor_prior_multinomial(self):
        for feature in self.features:
            feat_vals = self.X_train[feature].value_counts().to_dict()

            for feat_val, count in feat_vals.items():
                self.pred_priors[feature][feat_val] = count / self.train_size

    def _fit_bernoulli(self):
        label_encoder = LabelEncoder()
        y_encoded = label_encoder.fit_transform(self.y_train)

        for feature in self.features:
            self.likelihoods[feature] = {}
            self.pred_priors[feature] = {}

            for feat_val in np.unique(self.X_train[feature]):
                self.pred_priors[feature][feat_val] = 0

                for outcome in np.unique(y_encoded):
                    self.likelihoods[feature][feat_val + '_' + str(outcome)] = 0
                    self.class_priors[outcome] = 0

        self._calc_class_prior_bernoulli(y_encoded)
        self._calc_likelihoods_bernoulli(y_encoded)
        self._calc_predictor_prior_bernoulli()

    def _calc_class_prior_bernoulli(self, y_encoded):
        class_counts = np.bincount(y_encoded)
        total_count = np.sum(class_counts)

        for outcome, count in enumerate(class_counts):
            self.class_priors[outcome] = count / total_count

    def _calc_likelihoods_bernoulli(self, y_encoded):
        for feature in self.features:
            for outcome in np.unique(y_encoded):
                outcome_count = np.sum(y_encoded == outcome)
                feat_likelihood = self.X_train[feature][y_encoded == outcome].value_counts().to_dict()

                for feat_val, count in feat_likelihood.items():
                    self.likelihoods[feature][feat_val + '_' + str(outcome)] = count / outcome_count

    def _calc_predictor_prior_bernoulli(self):
        for feature in self.features:
            feat_vals = self.X_train[feature].value_counts().to_dict()

            for feat_val, count in feat_vals.items():
                self.pred_priors[feature][feat_val] = count / self.train_size

    def _fit_gaussian(self):
        label_encoder = LabelEncoder()
        y_encoded = label_encoder.fit_transform(self.y_train)

        for feature in self.features:
            self.likelihoods[feature] = {}
            self.pred_priors[feature] = {}

            for outcome in np.unique(y_encoded):
                self.class_priors[outcome] = 0

        self._calc_class_prior_gaussian(y_encoded)
        self._calc_likelihoods_gaussian(y_encoded)

    def _calc_class_prior_gaussian(self, y_encoded):
        class_counts = np.bincount(y_encoded)
        total_count = np.sum(class_counts)

        for outcome, count in enumerate(class_counts):
            self.class_priors[outcome] = count / total_count

    def _calc_likelihoods_gaussian(self, y_encoded):
        for feature in self.features:
            for outcome in np.unique(y_encoded):
                X_class = self.X_train[feature][y_encoded == outcome]
                class_mean = np.mean(X_class)
                class_std = np.std(X_class)
                self.likelihoods[feature][outcome] = (class_mean, class_std)

    def predict(self, X):
        results = []
        X = np.array(X)

        for query in X:
            probs_outcome = {}

            for outcome in np.unique(self.y_train):
                prior = self.class_priors[outcome]
                likelihood = 1
                evidence = 1

                for feat, feat_val in zip(self.features, query):
                    if feat_val in self.likelihoods[feat]:
                        class_mean, class_std = self.likelihoods[feat][feat_val]
                        likelihood *= self._calc_gaussian_likelihood(feat_val, class_mean, class_std)
                    else:
                        likelihood *= 0

                    evidence *= self.pred_priors[feat][feat_val]

                posterior = (likelihood * prior) / evidence
                probs_outcome[outcome] = posterior

            result = max(probs_outcome, key=lambda x: probs_outcome[x])
            results.append(result)

        return np.array(results)

    def _calc_gaussian_likelihood(self, x, mean, std):
        exponent = np.exp(-((x - mean) ** 2) / (2 * std ** 2))
        return (1 / (np.sqrt(2 * np.pi) * std)) * exponent
import numpy as np 
import pandas as pd 	
import matplotlib.pyplot as plt 
import math


def accuracy_score(y_true, y_pred):

	"""	score = (y_true - y_pred) / len(y_true) """

	return round(float(sum(y_pred == y_true))/float(len(y_true)) * 100 ,2)

def pre_processing(df):

	""" partioning data into features and target """

	X = df.drop([df.columns[-1]], axis = 1)
	y = df[df.columns[-1]]

	return X, y



class  NaiveBayes:

	"""
		Bayes Theorem:
										Likelihood * Class prior probability
				Posterior Probability = -------------------------------------
											Predictor prior probability
				
							  			 P(x|c) * p(c)
							   P(c|x) = ------------------ 
											  P(x)
	"""

	def __init__(self):

		"""
			Attributes:
				likelihoods: Likelihood of each feature per class
				class_priors: Prior probabilities of classes 
				pred_priors: Prior probabilities of features 
				features: All features of dataset
		"""
		self.features = list
		self.likelihoods = {}
		self.class_priors = {}
		self.pred_priors = {}

		self.X_train = np.array
		self.y_train = np.array
		self.train_size = int
		self.num_feats = int

	def fit(self, X, y):

		self.features = list(X.columns)
		self.X_train = X
		self.y_train = y
		self.train_size = X.shape[0]
		self.num_feats = X.shape[1]

		for feature in self.features:
			self.likelihoods[feature] = {}
			self.pred_priors[feature] = {}

			for feat_val in np.unique(self.X_train[feature]):
				self.pred_priors[feature].update({feat_val: 0})

				for outcome in np.unique(self.y_train):
					self.likelihoods[feature].update({feat_val+'_'+outcome:0})
					self.class_priors.update({outcome: 0})

		self._calc_class_prior()
		self._calc_likelihoods()
		self._calc_predictor_prior()

	def _calc_class_prior(self):

		""" P(c) - Prior Class Probability """

		for outcome in np.unique(self.y_train):
			outcome_count = sum(self.y_train == outcome)
			self.class_priors[outcome] = outcome_count / self.train_size

	def _calc_likelihoods(self):

		""" P(x|c) - Likelihood """

		for feature in self.features:

			for outcome in np.unique(self.y_train):
				outcome_count = sum(self.y_train == outcome)
				feat_likelihood = self.X_train[feature][self.y_train[self.y_train == outcome].index.values.tolist()].value_counts().to_dict()

				for feat_val, count in feat_likelihood.items():
					self.likelihoods[feature][feat_val + '_' + outcome] = count/outcome_count


	def _calc_predictor_prior(self):

		""" P(x) - Evidence """

		for feature in self.features:
			feat_vals = self.X_train[feature].value_counts().to_dict()

			for feat_val, count in feat_vals.items():
				self.pred_priors[feature][feat_val] = count/self.train_size


	def predict(self, X):

		""" Calculates Posterior probability P(c|x) """

		results = []
		X = np.array(X)

		for query in X:
			probs_outcome = {}
			for outcome in np.unique(self.y_train):
				prior = self.class_priors[outcome]
				likelihood = 1
				evidence = 1

				for feat, feat_val in zip(self.features, query):
					likelihood *= self.likelihoods[feat][feat_val + '_' + outcome]
					evidence *= self.pred_priors[feat][feat_val]

				posterior = (likelihood * prior) / (evidence)

				probs_outcome[outcome] = posterior

			result = max(probs_outcome, key = lambda x: probs_outcome[x])
			results.append(result)

		return np.array(results)

			

if __name__ == "__main__":
	X, y = pre_processing(df)
	nb_clf = NaiveBayes()
	nb_clf.fit(X, y)

	print("Train Accuracy: {}".format(accuracy_score(y, nb_clf.predict(X))))
	
	# #Query 1:
	# query = np.array([['Rainy','Mild', 'Normal', 't']])
	# print("Query 1:- {} ---> {}".format(query, nb_clf.predict(query)))

	# #Query 2:
	# query = np.array([['Overcast','Cool', 'Normal', 't']])
	# print("Query 2:- {} ---> {}".format(query, nb_clf.predict(query)))

	# #Query 3:
	# query = np.array([['Sunny','Hot', 'High', 't']])
	# print("Query 3:- {} ---> {}".format(query, nb_clf.predict(query)))
model = NaiveBayesClassifier()
model.fit(X, y, classifier_type='multinomial')
y_pred = model.predict(X)
### **4. Split Data**
# 80/20 train/test split
train_size = int(0.8 * len(X))
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
### **5. Define Metric**
def accuracy(y_true, y_pred):
    """
    Calculates the accuracy of the predicted classes.

    Args:
        y_true: A numpy array of the true class labels.
        y_pred: A numpy array of the predicted class labels.

    Returns:
        The accuracy of the predicted classes.
    """
    return np.sum(y_true == y_pred) / len(y_true)
### **5. Predict using Multinomial NB**
# Create an instance of the NaiveBayesClassifier class
classifier = NaiveBayesClassifier()

# Fit the classifier on your training data
classifier.fit(X_train, y_train, classifier_type='multinomial')

# Make predictions on your test data
predictions = classifier.predict(X_test)
print(f"Test accuracy: {accuracy(y_test, predictions):.2%}")