In [88]:
import numpy as np
import pandas as pd
import utils
import tester
import dataset
import importlib
from scipy import spatial
from IPython.display import display

In [112]:
importlib.reload(utils)

<module 'utils' from '/mnt/c/Users/ErickLima/Desktop/UFC/Mestrado/Pattern Recognition/utils.py'>

In [107]:
class KNN:
    
    def __init__(self, k=1):
        self.k = k
        self.X = None
        self.Y = None
        self.kdTree = None
    
    def train(self, X_train, Y_train):
        self.X = X_train
        self.Y = Y_train
        self.kdTree = spatial.cKDTree(self.X, leafsize=10)
        
    def predict(self, x):
        if self.X is not None:
#             sorted_values = sorted(self.values, key= lambda val : utils.distance(val[0], x))
            print(sorted([utils.distance(val[0], x) for val in self.X]))
#             k_nearest = sorted_values[:self.k]
            query = self.kdTree.query(x, k=self.k)
            if self.k == 1:
                return self.Y[query[1]]
            
            classes = np.array([self.Y[i] for i in query[1]])
            
            return np.bincount(classes).argmax()
    
        else:
            print('Not trained')
     

In [108]:
class DMC:
    
    def __init__(self):
        self.clusters = {}
        self.centroids = None
    
    def train(self, X_train, Y_train):
        for i in range(len(X_train)):
            if Y_train[i] not in self.clusters:
                self.clusters[Y_train[i]] = []
            self.clusters[Y_train[i]].append(X_train[i])
            
        self.clusters = {k : np.array(self.clusters[k]) for k in self.clusters}
        
        self.centroids = {k : np.mean(self.clusters[k], axis=0) for k in self.clusters}
        
    def predict(self, x):
        if self.centroids:
            return min(self.centroids, key=lambda c : utils.distance(self.centroids[c], x))
    
        else:
            print('Not trained')

In [109]:
class CQG:
    
    def __init__(self):
        self.classes = None
        self.centroids = None
        self.covariances = None
        self.cov_invs = None
        self.cov_dets = None
        self.a_priori = None
        
    def __friedman_for_class(self, i, pooled, alpha):
        total = sum([len(c) for c in self.classes])
        c_len = len(self.classes[i])
        return ((1 - alpha) * c_len * self.covariances[i] + alpha * total * pooled) / ((1-alpha) * c_len + alpha * total)
        
    def regularize_friedman(self, alpha):
        total = sum([len(c) for c in self.classes])
        pooled = np.sum(np.array([(len(self.classes[i]) / total) * self.covariances[i] for i in range(len(self.classes))]), axis=0)
        self.covariances = [self.__friedman_for_class(i, pooled, alpha) for i in range(len(self.classes))]

    def __log_gaussian(self, class_n, x):
        cov_inv = self.cov_invs[class_n]
        cov_det = self.cov_dets[class_n]
        z = x - self.centroids[class_n]
        return - 0.5 * (np.dot(z, np.dot(cov_inv, z)) + np.log(cov_det))

    def a_posteriori(self, x):
        return np.array([self.__log_gaussian(i, x) for i in range(len(self.classes))])
    
    def train(self, X_train, Y_train):
        cd = utils.to_class_dict(X_train, Y_train)
        self.classes = [cd[y] for y in sorted(cd)]
        
        self.centroids = [np.mean(c, axis=0) for c in self.classes]
        self.covariances = [utils.cov(c) for c in self.classes]
        
        if any([np.linalg.matrix_rank(cov_mat) != cov_mat.shape[0] for cov_mat in self.covariances]):
            self.regularize_friedman(0.3)
            
        self.cov_invs = [np.linalg.inv(cov) for cov in self.covariances]
        self.cov_dets = [np.linalg.det(cov) for cov in self.covariances]
        self.a_priori = np.array([ np.log(len(c)) for c in self.classes ])
        
    def predict(self, x):
        if self.centroids and self.covariances:
            probabilities = self.a_priori + self.a_posteriori(x)
            return probabilities.argmax()
        else:
            print('Not trained')

In [156]:
model_tester = tester.Tester(dataset.Parkinsons(True), KNN())
model_tester.test(transform='')

In [157]:
statistics = model_tester.statistics()
statistics.print_all()

[[ 921.   79.]
 [ 141. 2859.]]
Mean accuracy: 0.945
Specificity: 0.867231638418079
Sensibility: 0.9731109598366235
