In [21]:
import numpy as np 
import pandas as pd
from collections import Counter

def minkowskiDistance(A, B, p=2):
    # This function will calculate the Minkowski distance
    # The default value for p is 2
    
    return pow(pow(abs(B - A),p).sum(axis=1),1/p)

def partition( feature_matrix, target_vector, t, shuffle = True):
   
    if shuffle:
        shuffler = np.random.permutation(len(feature_matrix))
        feature_matrix = feature_matrix[shuffler]
        target_vector = target_vector[shuffler]

    train_split_limit = int(1-t * len(feature_matrix))
    test_split_limit = int(1-t * len(feature_matrix))

    x_train = feature_matrix[:train_split_limit]
    x_test = feature_matrix[test_split_limit:]
       
    y_train = target_vector[:train_split_limit]
    y_test = target_vector[test_split_limit:]

    return x_train, x_test, y_train, y_test

df = pd.read_csv('winequality-white.csv',sep=';')

X = df[['alcohol','density']]

#X = df.drop(columns=['quality'])
#X = np.array(df.drop(columns=['quality']))

Y = df['quality']

Y = np.asarray([0 if val <= 5 else 1 for val in Y])

X = np.array(X)
Y = np.array(Y)

x_train, x_test, y_train, y_test = partition(X,Y,0.2,shuffle=True)

In [208]:
x_test

array([[ 9.2    ,  1.0012 ],
       [ 9.1    ,  1.00014],
       [11.7    ,  0.99038],
       ...,
       [ 9.     ,  0.99773],
       [10.     ,  0.997  ],
       [11.8    ,  0.9905 ]])

In [313]:

n_neighbors = 5

#points_labels=[]

labels = []

pred = []

for x in x_test:
    #euc_dist = minkowskiDistance(x,x_test)
    euc_dist = np.sqrt(np.sum((x_train-x)**2, axis =1))
    sorted_distances = euc_dist.argsort()[:n_neighbors]
    
    nearest =[]
    for index in sorted_distances:
        nearest.append(y_train[index])
        
    labels.append(Counter(nearest).most_common(1)[0][0])
    

pred = np.array(labels)

In [320]:
n_neighbors = 5

labels=[]

nearest_neighbors = []

#inv_distances = []

w = []

pred1 = []


for x in x_test:
    #euc_dist = minkowskiDistance(x,x_train) #this must be euclidian distance
    
    euc_dist = minkowskiDistance(x_train,x)
    
    short_distances = euc_dist.argsort()[:n_neighbors]

    inv_dist = 1/(short_distances+0.00001)

    w1= inv_dist/(inv_dist.sum())
    
    nearest =[]
    for index in short_distances:
        nearest.append(y_train[index])
    
    label = np.matmul(w1, nearest) / np.sum(w1) 
    
    #labels.append(label)
    
    labels.append(0 if label < 0.5 else 1)
    
pred1 = np.asarray(labels)

    
    



In [321]:
pred1

array([0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1,
       1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1,
       0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1,
       1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0,
       0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1,
       1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1,
       1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0,
       0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1,

In [16]:
def CalculateAccuracy(y_test,predictions):

    N = y_test.shape[0]

    accuracy = ( y_test == predictions).sum() / N
    
    return accuracy

In [311]:
print(CalculateAccuracy(y_test,pred))
print(CalculateAccuracy(y_test,pred1))


0.7229038854805726
0.7034764826175869


In [24]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

clf = KNeighborsClassifier(n_neighbors = 5, weights ="distance")
clf.fit(x_train,y_train)
y_pred = clf.predict(x_test)

In [296]:
#print(CalculateAccuracy(y_test,pred1))
#print(CalculateAccuracy(y_test,pred2))
print(CalculateAccuracy(y_test,pred1))

0.7034764826175869
