In [29]:
from sklearn.naive_bayes import GaussianNB
import pandas as pd
import math
import numpy as np
import operator
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from collections import Counter
    
# разделение датасета на тестовую и обучающую выборку
def split_dataset(test_size):
    dataset = pd.read_csv('heart.dat', header=None).values
    attr = dataset[:, 0:-1] # атрибуты
    heart_class =  (dataset[:, -1]).astype(np.int64, copy=False) # классы  
    data_train, data_test, class_train, class_test = train_test_split(attr, heart_class, test_size=test_size, random_state=55)
    return data_train, class_train, data_test, class_test
###############################################data########################

###############################################nbayesfunctions#####################

#разделение данных по классам
def get_subsequences_by_classes(data_train,class_train):
    d={}
    class_train_set=set(class_train)
    for i in class_train_set:
        d[i]=[]
    for i in range(len(data_train)):
        d[class_train[i]].append(data_train[i])     
    return d

#получение среднего значения
def get_y(arr):
    return sum(arr)/len(arr)

#получение дисперсии
def get_disp(arr):
    res=0.0
    y=get_y(arr)
    for i in arr:
        res+=((i-y)**2)
    return ((res/float(len(arr)-1.0))**0.5)


#обучение классификатора
def train_classifier(data_train,class_train):
    D=get_subsequences_by_classes(data_train,class_train)
    results={}
    for class_name,class_elements in D.items():
        results[class_name]=[(get_y(attribute),get_disp(attribute)) for attribute in zip(*class_elements)]
    return results

# вычисление f(xj|y,disp)
def f(x, y, disp):
    if disp == 0.0:
        disp += 0.000001
    return (1. / (math.sqrt(2. * math.pi) * disp)) * math.exp(-(math.pow(x - y, 2.) / (2. * math.pow(disp, 2.))))

# вычисление P(ci)
def p(summaries, instance_attr):
    probabilities = {}
    for class_name, class_summaries in summaries.items():
        probabilities[class_name] = 1.0
        for i in range(len(class_summaries)):
            y, disp = class_summaries[i]
            probabilities[class_name] *= f(instance_attr[i], y, disp)
    return probabilities
  
# тест-е одного объекта
def test_one(train_results,dt):
    probabilities = p(train_results, dt)
    return max(probabilities.items(),key=operator.itemgetter(1))[0]
    
# тест-е классификатора
def test_classifier(train_results,data_test,class_test):
    score=0.0
    predicts=[(test_one(train_results,dt)) for dt in data_test]
    score =sum( [i == j for i, j in zip(predicts, class_test)])
    return score/float(len(predicts))    
        
        
    
###############################################nbayesfuncntion#####################



data_train, class_train,data_test, class_test = split_dataset(0.3)
gnb = GaussianNB()
gnb.fit(data_train, class_train)
print('Naive Bayes library algo', 'Result: ', gnb.score(data_test, class_test))

cl_training_results=train_classifier(data_train,class_train)
cl_testing_results=test_classifier(cl_training_results,data_test,class_test)
print('Naive Bayes algo', 'Result: ', cl_testing_results)








Naive Bayes library algo Result:  0.83950617284
Naive Bayes algo Result:  0.83950617284
