In [8]:
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn.metrics import classification_report
import numpy as np
import pandas as pd

In [9]:
class NaiveBayes:
    def fit(self,X,y):
        n_samples, n_features = X.shape
        self._classes = np.unique(y)
        n_classes = len(self._classes)
        
        self._mean = np.zeros((n_classes, n_features), dtype = np.float64)
        self._var = np.zeros((n_classes, n_features), dtype = np.float64)
        self._priors = np.zeros(n_classes, dtype=np.float64)
        
        
        for c in self._classes:
            X_c=X[c==y]
            self._mean[c,:]= X_c.mean(axis = 0)
            self._var[c,:]= X_c.var(axis=0)
            self._priors[c]= X_c.shape[0] / float(n_samples)
    def predict(self,X):
        y_pred= [self._predict(x) for x in X]
        return y_pred
    
    
    def _predict(self,x):
        posteriors = []
        
        for idx, c in enumerate(self._classes):
            prior = np.log(self._priors[idx])
            class_conditional = np.sum(np.log(self._pdf(idx,x)))
            posterior = prior + class_conditional
            posteriors.append(posterior)
            
        return self._classes[np.argmax(posteriors)]
            
            
    def _pdf(self,class_idx, x):
        mean = self._mean[class_idx]
        var = self._var[class_idx]
        numerator = np.exp(- (x-mean)**2/ (2*var))
        denominator = np.sqrt(2*np.pi * var)
        return numerator / denominator
    

In [11]:
def accuracy(y_true, y_pred):
    accuracy = np.sum(y_true==y_pred) / len(y_true)
    return accuracy

myiris = datasets.load_iris()
X = myiris.data
y = myiris.target


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

nb = NaiveBayes()
nb.fit(X_train, y_train)
predictions = nb.predict(X_test)

print("Classification accuracy : ",accuracy(y_test, predictions))

from sklearn.metrics import confusion_matrix

print("\n\nConfusion matrix: ")
print(confusion_matrix(y_test, predictions))


print('\nClassification Report is : ')
print(classification_report(y_test, predictions))  

Classification accuracy :  0.9666666666666667


Confusion matrix: 
[[13  0  0]
 [ 0  6  0]
 [ 0  1 10]]

Classification Report is : 
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        13
           1       0.86      1.00      0.92         6
           2       1.00      0.91      0.95        11

    accuracy                           0.97        30
   macro avg       0.95      0.97      0.96        30
weighted avg       0.97      0.97      0.97        30



In [None]:
# Making a dataset and apply NVC algorithm

In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv(r"C:\Users\PAROMITA\Desktop\DATASETS\Play_dataset.csv")

In [3]:
x=df.drop([df.columns[-1]], axis = 1)

y= df[df.columns[-1]]

In [4]:
features = list(x.columns)

In [5]:
x_train=x
y_train=y
train_size=x.shape[0]
num_feats = x.shape[1]

In [6]:
pred_priors={}
class_priors={}
likelihoods={}

In [7]:
for feature in features:
    likelihoods[feature]= {}
    pred_priors[feature] = {}
    
    for feat_val in np.unique(x_train[feature]):
        pred_priors[feature].update({feat_val:0})
        
        for outcome in np.unique(y_train):
            likelihoods[feature].update({feat_val+'_'+outcome:0})
            class_priors.update({outcome:0})

In [8]:
for outcome in np.unique(y_train):
    outcome_count = sum(y_train == outcome)
    class_priors[outcome] = outcome_count/train_size

In [9]:

for feature in features:
    for outcome in np.unique(y_train):
        outcome_count= sum(y_train==outcome)
        feat_likelihood = x_train[feature][y_train[y_train==outcome].index.values.tolist()].value_counts().to_dict()
        
        for feat_val, count in feat_likelihood.items():
            likelihoods[feature][feat_val + '_' + outcome]= count/outcome_count

In [10]:

for feature in features:
    feat_vals= x_train[feature].value_counts().to_dict()
    
    for feat_val, count in feat_vals.items():
        pred_priors[feature][feat_val]= count/train_size

In [11]:


results = []
qu = np.array([['Rainy','Cool','High','t']])
qu= np.array(qu)

m=0
p=0

for query in qu:
    probs_outcome={}
    for outcome in np.unique(y_train):
        prior = class_priors[outcome]
        likelihood =1
        evidence =1
        
        
        for feat, feat_val in zip(features, query):
            likelihood*=likelihoods[feat][feat_val + '_'+ outcome]
            evidence *= pred_priors[feat][feat_val]
            
        posterior = (likelihood * prior+ m*p) / (evidence+m)
        probs_outcome[outcome]= posterior
    print(probs_outcome)
    result= max(probs_outcome, key = lambda X: probs_outcome[X])
    results.append(result)
    
print(results)

{'No': 0.6272000000000002, 'Yes': 0.36296296296296293}
['No']
