### A fuzzy-KNN Algorithm

In [89]:
import operator
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
import sklearn
from sklearn.metrics import accuracy_score
from sklearn.base import BaseEstimator, ClassifierMixin
from datetime import datetime

### Load the dataset

In [90]:
dataset = sklearn.datasets.load_iris()
x = dataset.data
y = dataset.target

### Split the dataset into train and test

In [91]:
x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(x, y, random_state=589)

### Fuzzy KNN

In [92]:
class FuzzyKNN(BaseEstimator, ClassifierMixin):
    def __init__(self, k=3, plot=False):
        self.k = k
        self.plot = plot
        
        
    def fit(self, X, y=None):
        self._check_params(X,y)
        self.X = X
        self.y = y
        
        self.xdim = len(self.X[0])
        self.n = len(y)
        
        classes = list(set(y))
        classes.sort()
        self.classes = classes
        
        self.df = pd.DataFrame(self.X)
        self.df['y'] = self.y
        
        self.memberships = self._compute_memberships()
        
        self.df['membership'] = self.memberships
        
        self.fitted_ = True
        return self
    
    
    def predict(self, X):
        if self.fitted_ == None:
            raise Exception('predict() called before fit()')
        else:
            m = 2
            y_pred = []
            
            for x in X:
                neighbors = self._find_k_nearest_neighbors(pd.DataFrame.copy(self.df), x)
                
                votes = {}
                for c in self.classes:
                    den = 0
                    for n in range(self.k):
                        dist = np.linalg.norm(x - neighbors.iloc[n,0:self.xdim])
                        den += 1 / (dist ** (2 / (m-1)))
                    
                    neighbors_votes = []
                    for n in range(self.k):
                        dist = np.linalg.norm(x - neighbors.iloc[n,0:self.xdim])
                        num = (neighbors.iloc[n].membership[c]) / (dist ** (2 / (m-1)))
                        
                        vote = num/den
                        neighbors_votes.append(vote)
                    votes[c] = np.sum(neighbors_votes)
                    
                pred = max(votes.items(), key=operator.itemgetter(1))[0]
                y_pred.append((pred, votes))
                
            return y_pred
        
        
    def score(self, X, y):
        if self.fitted_ == None:
            raise Exception('score() called before fit()')
        else:
            predictions = self.predict(X)
            y_pred = [t[0] for t in predictions]
            confidences = [t[1] for t in predictions]
            
            return accuracy_score(y_pred=y_pred, y_true=y)
    
        
    def _find_k_nearest_neighbors(self, df, x):
        X = df.iloc[:,0:self.xdim].values
        
        df['distances'] = [np.linalg.norm(X[i] - x) for i in range(self.n)]
        
        df.sort_values(by='distances', ascending=True, inplace=True)
        neighbors = df.iloc[0:self.k]
        
        return neighbors

                
    def _get_counts(self, neighbors):
        groups = neighbors.groupby('y')
        counts = {group[1]['y'].iloc[0]:group[1].count()[0] for group in groups}
        
        return counts
        
        
    def _compute_memberships(self):
        memberships = []
        for i in range(self.n):
            x = self.X[i]
            y = self.y[i]
            
            neighbors = self._find_k_nearest_neighbors(pd.DataFrame.copy(self.df), x)
            counts = self._get_counts(neighbors)
        
            membership = dict()
            for c in self.classes:
                try:
                    uci = 0.49 * (counts[c] / self.k)
                    if c == y:
                        uci += 0.51
                    membership[c] = uci
                except:
                    membership[c] = 0
                    
            memberships.append(membership)
        return memberships
        
        
    def _check_params(self, X, y):
        if type(self.k) != int:
            raise Exception('"k" should have type int')
        elif self.k >= len(y):
            raise Exception('"k" should be less than no of feature sets')
        elif self.k % 2 == 0:
            raise Exception('"k" should be odd')
            
        if type(self.plot) != bool:
            raise Exception('"plot" should have type bool')

### Comparing Fuzzy KNN with baseline KNN Classifier

In [93]:
analysis = {
    'model' : list(),
    'Training time' : list(),
    'Prediction time' : list(),
}

In [94]:
def add_classifier(d):
    for i in analysis.keys():
        analysis[i].append(d[i])

In [95]:
baseline = sklearn.neighbors.KNeighborsClassifier()
baseline_classifier = {x : None for x in analysis.keys()}
baseline_classifier['model'] = 'baseline KNN'
start_training_time = datetime.now()
baseline.fit(x_train, y_train)
end_training_time = datetime.now()
baseline_classifier['Training time'] = (end_training_time - start_training_time).total_seconds()

start_prediction_time = datetime.now()
base_score = sklearn.model_selection.cross_val_score(cv=5, estimator=baseline, X=x_test, y=y_test)
end_prediction_time = datetime.now()
baseline_classifier['Prediction time'] = (end_prediction_time - start_prediction_time).total_seconds()
print("Cross Validation Score" , base_score)
add_classifier(baseline_classifier)

Cross Validation Score [0.75  0.75  0.875 1.    1.   ]


In [96]:
fuzzyknn = FuzzyKNN()
fuzzy_knn_classifier = {x : None for x in analysis.keys()}
fuzzy_knn_classifier['model'] = 'fuzzy KNN'
start_training_time = datetime.now()
fuzzyknn.fit(x_train, y_train)
end_training_time = datetime.now()
fuzzy_knn_classifier['Training time'] = (end_training_time - start_training_time).total_seconds()

start_prediction_time = datetime.now()
fuzzy_score = sklearn.model_selection.cross_val_score(cv=5, estimator=fuzzyknn, X=x_test, y=y_test)
end_prediction_time = datetime.now()
fuzzy_knn_classifier['Prediction time'] = (end_prediction_time - start_prediction_time).total_seconds()
print("Cross Validation Score" , base_score)
add_classifier(fuzzy_knn_classifier)

Cross Validation Score [0.75  0.75  0.875 1.    1.   ]


In [97]:
pd.DataFrame(analysis)

Unnamed: 0,model,Training time,Prediction time
0,baseline KNN,0.001999,0.012009
1,fuzzy KNN,0.419087,0.890228
