In [2]:
import numpy as np

In [3]:
def fit(x_train,y_train):
    result = {}
    result["total_data"]=len(y_train)
    key_val = set(y_train)
    for key in key_val:
        result[key] = {}
        currentclassrow = (y_train==key)
        x_train_current = x_train[currentclassrow]
        y_train_current = y_train[currentclassrow]
        numberoffeatures = x_train.shape[1]
        for feature in range(1,1+numberoffeatures):
            result[key][feature]={}
            val = set(x_train[:,feature-1])
            for indval in val:
                result[key][feature][indval] = (x_train_current[:,feature-1]==indval).sum()
        result[key]["count"]=len(y_train_current)
    return result

In [4]:
def prob(dictionary,x,curr_class):
    output = np.log(1)
    #it has 2 components (prioir prob / class prob)
    classprob = np.log(dictionary[curr_class]["count"])-np.log(dictionary["total_data"])
    #now we do 2nd component
    numoffeatures = len(dictionary[curr_class].keys())-1
    for i in range(1,1+numoffeatures):
        xi = x[i-1]
        countwithxi = dictionary[curr_class][i][xi] +1
        prob = np.log(countwithxi)-np.log(dictionary[curr_class]["count"] +len(dictionary[curr_class][i].keys()))
        output +=  prob
    #print (output,classprob)
    return output+classprob

In [5]:
def predictsinglepoint(dictionary,x):
    classes = dictionary.keys()
    bestprob = -1000
    bestclass = -1
    for curr_class in classes:
        if(curr_class== "total_data"):
            continue
        p_curr_class = prob(dictionary, x, curr_class)
        if(p_curr_class>bestprob):
            bestprob = p_curr_class
            bestclass= curr_class
    return bestclass

In [6]:
def predict(dictionary , x_test):
    y_pred = []
    for x in x_test:
        x_class = predictsinglepoint(dictionary,x)
        y_pred.append(x_class)
        #print(x_class)
    return y_pred

In [7]:
def makelabel(column):
    seclim = column.mean()
    firlim = seclim / 2
    thirlim = firlim * 3 
    for i in range(len(column)):
        if(column[i]<firlim):
            column[i] = 0
        elif(column[i]>=firlim and column[i]<seclim):
            column[i]=1
        else:
            column[i]=2
    return column

In [8]:
from sklearn import datasets
db = datasets.load_iris()
x = db.data
y = db.target

In [9]:
for i in range(x.shape[-1]):
    x[:,i] = makelabel(x[:,i])

In [17]:
from sklearn import model_selection
x_train , x_test , y_train , y_test = model_selection.train_test_split(x,y)

In [18]:
print(x_train.shape)

(112, 4)


In [19]:
dictionary = fit(x_train,y_train)

In [20]:
print(dictionary[1])

{1: {1.0: 17, 2.0: 17}, 2: {1.0: 26, 2.0: 8}, 3: {0.0: 0, 1.0: 3, 2.0: 31}, 4: {0.0: 0, 1.0: 5, 2.0: 29}, 'count': 34}


In [21]:
y_pred = predict(dictionary,x_test)
print(y_pred)

[2, 2, 0, 0, 1, 0, 2, 1, 2, 1, 0, 2, 0, 2, 2, 2, 2, 1, 0, 1, 2, 2, 2, 2, 0, 2, 1, 1, 2, 0, 0, 1, 0, 2, 1, 2, 0, 2]


In [22]:
from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(y_test,y_pred))
print(confusion_matrix(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        11
           1       0.89      0.50      0.64        16
           2       0.56      0.91      0.69        11

   micro avg       0.76      0.76      0.76        38
   macro avg       0.81      0.80      0.78        38
weighted avg       0.82      0.76      0.76        38

[[11  0  0]
 [ 0  8  8]
 [ 0  1 10]]


In [23]:
#for continuous valued data (Gaussian Curve)
from sklearn.naive_bayes import GaussianNB
algo = GaussianNB()
algo.fit(x_train,y_train)
y_pred2 = algo.predict(x_test)
print(classification_report(y_test,y_pred2))
print(confusion_matrix(y_test,y_pred2))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        11
           1       1.00      0.38      0.55        16
           2       0.52      1.00      0.69        11

   micro avg       0.74      0.74      0.74        38
   macro avg       0.84      0.79      0.74        38
weighted avg       0.86      0.74      0.72        38

[[11  0  0]
 [ 0  6 10]
 [ 0  0 11]]


In [29]:
#for continuous valued data (Multinomial Curve)
from sklearn.naive_bayes import MultinomialNB
algo1 = MultinomialNB()
algo1.fit(x_train,y_train)
y_pred3 = algo1.predict(x_test)
print(classification_report(y_test,y_pred3))
print(confusion_matrix(y_test,y_pred3))

              precision    recall  f1-score   support

           0       1.00      0.91      0.95        11
           1       0.00      0.00      0.00        16
           2       0.39      1.00      0.56        11

   micro avg       0.55      0.55      0.55        38
   macro avg       0.46      0.64      0.51        38
weighted avg       0.40      0.55      0.44        38

[[10  0  1]
 [ 0  0 16]
 [ 0  0 11]]


  'precision', 'predicted', average, warn_for)
