In [271]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
from sklearn.neighbors import NearestNeighbors
from numpy import linalg as LA
from tqdm import tqdm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [272]:
data = pd.read_csv("diabetes.csv")
x = data.iloc[:, [0, 1, 2, 3, 4, 5, 6, 7]].values
y = data.iloc[:, [8]].values.ravel()
labels = np.unique(y)

In [273]:
class RBML:
    def __init__(self, x, y, k=3, alpha=0.5, beta=2.):
        self.x = x
        self.len_ = len(x)
        self.y = y
        self.k = k
        self.alpha = alpha
        self.beta = beta
        self.labels = np.unique(self.y)
        self.neigh = NearestNeighbors(n_neighbors=4)
        self.neigh.fit(self.x)
        
    def find_target_neighbors(self, i):
        neighbors = self.neigh.kneighbors(self.x[i].reshape(1, -1), self.len_, return_distance=False)[:,1:][0]
        target_neighbors = []
        label = y[i]
        count_ = 0
        l = 0
        while count_ != self.k and l < self.len_-1:
            if self.y[neighbors[l]] == label:
                target_neighbors.append(neighbors[l])
                count_+=1
            l+=1
        return target_neighbors    
    
    def find_imposters(self, i):
        neighbors = self.neigh.kneighbors(self.x[i].reshape(1, -1), self.len_, return_distance=False)[:,1:][0]
        imposters = []
        label = y[i]
        k = 0
        while k < self.len_-1:
            if self.y[neighbors[k]] != label:
                imposters.append(neighbors[k])
            k+=1
        return imposters
    
    def delta_t(self, i, j):
        neighbors = self.find_target_neighbors(i)
        return 1 if j in neighbors else 0
    
    def Tv(self, i):
        sum_ = 0
        for j in range(self.len_):
            sum_ += (LA.norm(x[i] - x[j])**2)*self.delta_t(i, j)
        return sum_
    
    def TX(self):
        sum_ = 0
        print("TX calculation started")
        for i in tqdm(range(self.len_)):
            sum_ += self.Tv(i)
        return sum_
    
    def m(self, i):
        neighbors = self.find_target_neighbors(i)
        # neighbors[-1] most distant target neighbor
        mi = self.beta*(LA.norm(x[i] - self.x[neighbors[-1]])**2)*self.delta_t(i, neighbors[-1])
        return mi
    
    def delta_i(self, i, j):
        #print("la",LA.norm(x[i] - x[j])**2)
        #print("mi",self.m(i))
        t1 = (LA.norm(x[i] - x[j])**2 < self.m(i))
        t2 = (y[i] != y[j])
        return 1 if t1 and t2 else 0
    
    def Hv(self, i):
        sum_ = 0
        for j in range(self.len_):
            sum_ += (self.m(i) - LA.norm(x[i] - x[j])**2)*self.delta_i(i,j)
        return sum_
    
    def HX(self):
        sum_ = 0
        print("HX calculation started")
        for i in tqdm(range(self.len_)):
            sum_ += self.Hv(i)
        return sum_
    
    def C(self):
        return (1-self.alpha)*self.TX() + self.alpha*self.HX()
    
    def xiN(self, i):
        sum_ = 0
        sum_t = 0
        for j in range(len(self.x)):
            sum_ += self.x[j]*self.delta_t(i,j)
            sum_t += self.delta_t(i,j)
        
        return sum_/sum_t if sum_t != 0 else 0
    
    def xjI(self, i):
        sum_ = 0
        sum_t = 0
        for j in range(self.len_):
            delta_value = self.delta_i(i,j)
            sum_ += self.x[j]*delta_value
            sum_t += delta_value
        #  if sum_t != 0 else 0
        return sum_/sum_t if sum_t != 0 else 0
    
    # Updated hinge loss
    def Hv_(self, i):
        return max(0, (self.m(i) - LA.norm(x[i] - self.xjI(i))**2))
    
    def xiH(self, i):
        return self.x[i] + self.m(i)*((self.x[i]-self.xjI(i))/(LA.norm(x[i] - self.xjI(i))**2))  
    
    def xi_star(self, i):
        return (1-self.alpha)*self.xiN(i) + self.alpha*self.xiH(i)

In [274]:
def convergence_method(x, y, k):
    classifier = KNeighborsClassifier(n_neighbors=k)
    classifier.fit(x, y)
    y_pred = classifier.predict(x)
    acc = accuracy_score(y, y_pred)
    return acc

# Iris Dataset

In [None]:
data = pd.read_csv("IRIS.csv")
x = data.iloc[:, [1, 2, 3, 4]].values
y = data.iloc[:, [5]].values.ravel()

In [None]:
rbml_ = RBML(x, y)
flag = True
old_acc = convergence_method(rbml_.x, rbml_.y, rbml_.k)

while flag:
    print(f"Accuracy before iteration = {old_acc}")
    x_stars = []
    for i in tqdm(range(rbml_.len_)):
        x_stars.append(rbml_.xi_star(i))
    x_stars = np.array(x_stars)
    
    rbml_.x = x_stars
    new_acc = convergence_method(rbml_.x, rbml_.y, rbml_.k)
    print(f"Accuracy after iteration = {new_acc}")
    if new_acc <= old_acc or new_acc == 1:
        flag = False
    else:
        old_acc = new_acc

# Wine Dataset

In [284]:
from sklearn.datasets import load_wine

In [285]:
wine_dataset = load_wine()
x = wine_dataset['data']
y = wine_dataset['target'].ravel()

In [286]:
rbml_ = RBML(x, y)
flag = True
old_acc = convergence_method(rbml_.x, rbml_.y, rbml_.k)

while flag:
    print(f"Accuracy before iteration = {old_acc}")
    x_stars = []
    for i in tqdm(range(rbml_.len_)):
        x_stars.append(rbml_.xi_star(i))
    x_stars = np.array(x_stars)
    
    rbml_.x = x_stars
    new_acc = convergence_method(rbml_.x, rbml_.y, rbml_.k)
    print(f"Accuracy after iteration = {new_acc}")
    if new_acc <= old_acc or new_acc == 1:
        flag = False
    else:
        old_acc = new_acc

Accuracy before iteration = 0.8707865168539326


100%|█████████████████████████████████████████| 178/178 [01:17<00:00,  2.30it/s]


Accuracy after iteration = 0.9719101123595506
Accuracy before iteration = 0.9719101123595506


100%|█████████████████████████████████████████| 178/178 [01:18<00:00,  2.26it/s]


Accuracy after iteration = 0.9831460674157303
Accuracy before iteration = 0.9831460674157303


100%|█████████████████████████████████████████| 178/178 [01:20<00:00,  2.20it/s]


Accuracy after iteration = 0.9943820224719101
Accuracy before iteration = 0.9943820224719101


100%|█████████████████████████████████████████| 178/178 [01:21<00:00,  2.19it/s]

Accuracy after iteration = 0.9943820224719101





In [287]:
x

array([[1.423e+01, 1.710e+00, 2.430e+00, ..., 1.040e+00, 3.920e+00,
        1.065e+03],
       [1.320e+01, 1.780e+00, 2.140e+00, ..., 1.050e+00, 3.400e+00,
        1.050e+03],
       [1.316e+01, 2.360e+00, 2.670e+00, ..., 1.030e+00, 3.170e+00,
        1.185e+03],
       ...,
       [1.327e+01, 4.280e+00, 2.260e+00, ..., 5.900e-01, 1.560e+00,
        8.350e+02],
       [1.317e+01, 2.590e+00, 2.370e+00, ..., 6.000e-01, 1.620e+00,
        8.400e+02],
       [1.413e+01, 4.100e+00, 2.740e+00, ..., 6.100e-01, 1.600e+00,
        5.600e+02]])

In [288]:
rbml_.x

array([[ 1.40277686e+01,  2.36218398e+00,  2.33855353e+00, ...,
         1.00806591e+00,  3.31799604e+00,  1.06533889e+03],
       [ 1.39623265e+01,  1.84175137e+00,  2.23325907e+00, ...,
         1.04102498e+00,  3.26060586e+00,  1.04926551e+03],
       [ 1.35073377e+01,  1.87059434e+00,  2.44276385e+00, ...,
         1.05455344e+00,  3.05733962e+00,  1.17474327e+03],
       ...,
       [ 1.41133724e+01,  1.71910773e+01,  7.66896323e-01, ...,
        -2.23444897e+00, -6.53332962e+00,  7.87660164e+02],
       [ 1.42269794e+01,  1.65436210e+01,  9.41199842e-01, ...,
        -2.35334217e+00, -6.66921507e+00,  7.87965419e+02],
       [ 3.07666063e+01,  1.73370992e+01,  5.35286850e+00, ...,
        -4.48617944e+00, -1.38388637e+01,  5.72533920e+02]])