In [None]:
import numpy as np
import pandas as pd
import random
import sys,itertools
import matplotlib.pyplot as plt
import scipy.stats

In [None]:
def get_data(filename):
    import csv
    data = []
    with open(filename) as csv_file:
        csv_reader = csv.reader(csv_file,delimiter= ',')
        for row in csv_reader:
            a = []
            for i in range(len(row) -1):
                a.append(float(row[i]))
            a.append(int(row[len(row)-1]))
            data.append(a)
        return data

In [None]:
class Fisher:
    def __init__(self,data,num_dim):
        self.data = data
        self.num_dim = num_dim
        self.columnlabel = len(self.data[0]) -1
        self.dim = self.columnlabel
        random.shuffle(self.data)
        self.training_data = self.data[:int(len(self.data)*0.7)]
        self.training_data = self.data[int(len(self.data)*0.7):]
        self.group_data_by_classes()
        self.calculate_means()
        self.calculate_SB_SW()
        self.calculate_eigen_values()
        self.transform_data()
        self.test_algorithm()
        self.plot_normal_graph()
        self.plot_transformed_data()
        
    def group_data_by_class(self):
        self.grouped_data = {}
        for i in self.training_data:
            

In [4]:
import warnings
from scipy.sparse.linalg import eigsh
from scipy.sparse import eye
from sklearn.base import BaseEstimator, ClassifierMixin, TransformerMixin
from sklearn.metrics import pairwise_kernels
from sklearn.neighbors import NearestCentroid
from sklearn.utils.multiclass import unique_labels
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
from sklearn.preprocessing import OneHotEncoder
import numpy as np

In [5]:
class Kfda(BaseEstimator,ClassifierMixin,TransformerMixin):
    
    def __init__(self, n_components = 2, kernel = 'linear', robustness_offset = 1e-8, **kwds):
        self.kernel = kernel
        self.n_components = n_components
        self.kwds = kwds
        self.robustness_offset = robustness_offset
        
        if kernel is None:
            self.kernel = 'linear'
            
    def fit(self,X,y):
        X, y = check_X_y(X,y)
        self.classes_ = unique_labels(y)
        if self.n_components > self.classes_.size - 1:
            warnings.warn(
                "n_components > classes_.size - 1."
                "Only the first classes_.size - 1 components will be valid."
            )
        self.X_ = X
        self.y_ = y
        
        y_onehot = OneHotEncoder().fit_transform(self.y_[:,np.newaxis])
        K = pairwise_kernels(X,X,metric = self.kernel,**self.kwds)
        # (k,n)X(n,n) --> (k,n) K_bar of each group
        m_classes =   y_onehot.T @K/y_onehot.T.sum(1)
        indices = (y_onehot@np.arange(self.classes_.size)).astype('i')
        # m_classes[indices]  -- > H@K ----> generate the corresponding group mean (mi) for each observation(i)
        # W = K(I-H)K
        N = K@(K-m_classes[indices])
        N += eye(self.y_.size)*self.robustness_offset
        
        # B = K(H-J)K
        m_classes_centered = m_classes -K.mean(1)
        M = m_classes_centered.T@m_classes_centered
        
        # HK = m_classes[indices]
        # m_classes_centered = m_classes[indices] -K.mean(1)
        # M = K@m_classes_centered
        
        # M @ x = w * N @ x.
        # K(H-J)Ka = lamba*K(I-H)Ka
        w, self.weights_ =eigsh(M,self.n_components,N,which = 'LM')
        
        #new point
        # alpha.T@K
        # for many points
        # K(centers)@alpha: (k,n)X(n*1) = (k)
        centroids_ = m_classes@self.weights_
        self.clf_ = NearestCentroid().fit(centroids_, self.classes_)
        return self
    
    
    def transform(self,X):
        check_is_fitted(self)
        # (1,n)@(n,1) --> a scaler
        # (l,n)@(n,1) --> l
        return pairwise_kernel(X,self.X_, metric = self.kernel, **self.kwds) @ self.weights_
    
    def predict(self,X):
        check_is_fitted(self)
        X = check_array(X)
        
        projected_points = self.transform(X) # return a scaler or a vector
        predictions = self.clf_.predict(projected_points)
        
        return predictions
    
    def fit_additional(self,X,y):
        check_is_fitted(self)
        X, y = check_X_y(X, y)
        
        new_classes = np.unique(y)
        projections = self.transform(X)
        y_onehot = OneHotEncoder().fit_transform(y[:,np.newaxis])
        new_centroids = y_onehot.T @ projections / y_onehot.T.sum(1)
        
        concatenated_classes = np.concatenate([self.class_, new_classes])
        concatenated_centroids = np.concatenate([self.clf_.centroids_, new_centroids])
        self.clf_.fit(concatenated_centroids, concatenated_classes)
        return self

In [8]:
y = np.array([0,1,2,2,0,1,0])
OneHotEncoder().fit_transform(y[:,np.newaxis]).toarray().T.sum(1)

array([3., 2., 2.])

In [11]:
OneHotEncoder().fit_transform(y[:,None]).toarray()

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.]])

In [13]:
OneHotEncoder().fit_transform(y[:,None])@ np.arange(unique_labels(y).size)

array([0., 1., 2., 2., 0., 1., 0.])