In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import math

df = pd.read_csv('computer.csv')

le = LabelEncoder()

dataset = df.apply(le.fit_transform)

X = dataset.iloc[:, :-1]
y = dataset.iloc[:, -1]

dataset

Unnamed: 0,age,income,student,credit_rating,buys_computer
0,1,0,0,1,0
1,1,0,0,0,0
2,0,0,0,1,1
3,2,2,0,1,1
4,2,1,1,1,1
5,2,1,1,0,0
6,0,1,1,0,1
7,1,2,0,1,0
8,1,1,1,1,1
9,2,2,1,1,1


In [2]:
def groupUnderClass(mydata):
    dict = {}
    for i in range(len(mydata)):
        if (mydata.iloc[i, -1] not in dict):
            dict[mydata.iloc[i, -1]] = []
        dict[mydata.iloc[i, -1]].append(mydata.iloc[i, :])
    return dict

def mean(numbers):
    return sum(numbers) / float(len(numbers))

def std_dev(numbers):
    avg = mean(numbers)
    variance = sum([pow(x - avg, 2) for x in numbers]) / float(len(numbers) - 1)
    return math.sqrt(variance)

def MeanAndStdDev(mydata):
    info = [(mean(attribute), std_dev(attribute)) for attribute in zip(*mydata)]
    del info[-1]
    return info

def MeanAndStdDevForClass(mydata):
    info = {}
    dict = groupUnderClass(mydata)
    for classValue, instances in dict.items():
        info[classValue] = MeanAndStdDev(instances)
    return info

def calculateGaussianProbability(x, mean, stdev):
    expo = math.exp(-(math.pow(x - mean, 2) / (2 * math.pow(stdev, 2))))
    return (1 / (math.sqrt(2 * math.pi) * stdev)) * expo

def calculateClassProbabilities(info, test):
    probabilities = {}
    for classValue, classSummaries in info.items():
        probabilities[classValue] = 1
        for i in range(len(classSummaries)):
            mean, std_dev = classSummaries[i]
            x = test[i]
            probabilities[classValue] *= calculateGaussianProbability(x, mean, std_dev)
    return probabilities

def predict(info, test):
    probabilities = calculateClassProbabilities(info, test)
    bestLabel, bestProb = None, -1
    for classValue, probability in probabilities.items():
        if bestLabel is None or probability > bestProb:
            bestProb = probability
            bestLabel = classValue
    return bestLabel

def getPredictions(info, test):
    predictions = []
    for i in range(len(test)):
        result = predict(info, test.iloc[i, :])
        predictions.append(result)
    return predictions

def accuracy_rate(test, predictions):
    correct = 0
    for i in range(len(test)):
        if test.iloc[i] == predictions[i]:
            correct += 1
    return (correct / float(len(test))) * 100.0

In [3]:
info = MeanAndStdDevForClass(dataset)
predictions = getPredictions(info, X)

accuracy = accuracy_rate(y, predictions)
print("Accuracy of Naive Bayes Model is: ", accuracy)

Accuracy of Naive Bayes Model is:  85.71428571428571


In [4]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

y_true = y
y_pred = predictions
print('Confusion Matrix: \n', confusion_matrix(y_true, y_pred))

tp, fn, fp, tn = confusion_matrix(y_true,y_pred,labels=[0,1]).reshape(-1)
print('\nOutcome values : \n', tp, fn, fp, tn)

matrix = classification_report(y_true,y_pred,labels=[0,1])
print('\nClassification report : \n',matrix)

Confusion Matrix: 
 [[4 1]
 [1 8]]

Outcome values : 
 4 1 1 8

Classification report : 
               precision    recall  f1-score   support

           0       0.80      0.80      0.80         5
           1       0.89      0.89      0.89         9

    accuracy                           0.86        14
   macro avg       0.84      0.84      0.84        14
weighted avg       0.86      0.86      0.86        14



In [5]:
print('For the Data Instance X = (age <=30,Income = medium,Student = yes,Credit_rating = fair)\n')
X_test = pd.DataFrame([[0, 2, 1, 1]])

predictions = getPredictions(info, X_test)

if predictions[0] == 1:
    print('Prediction is: Yes the student will buy computer')
else:
    print('Prediction is: No the student will not buy computer')

For the Data Instance X = (age <=30,Income = medium,Student = yes,Credit_rating = fair)

Prediction is: Yes the student will buy computer
