# 3. KNN

Paper reference:
*  https://ieeexplore.ieee.org/abstract/document/6503286/
* K. M. Thilina, K. W. Choi, N. Saquib and E. Hossain, "Pattern classification techniques for cooperative spectrum sensing in cognitive radio networks: SVM and W-KNN approaches," 2012 IEEE Global Communications Conference (GLOBECOM), 2012, pp. 1260-1265, doi: 10.1109/GLOCOM.2012.6503286.

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Modeling and training packages
import torch
from torch import nn
import torch.optim as optim
import torch.nn.functional as F

# Data Preparation packages
from sklearn.metrics import classification_report
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
from scipy import signal

from IPython.display import clear_output

In [3]:
%run functions/functions.ipynb

Loaded: 
 - partition(X, y, percent_train = 0.8), 
 - Xy_dataloader(X_numpy, y_numpy, batch_size=32)


<hr>

## 1. Load data

In [4]:
%run #0-LoadingData.ipynb

Pandas_ML_Project_SNR_Minus10
Pandas_ML_Project_SNR_Minus15
Pandas_ML_Project_SNR_Minus20
Pandas_ML_Project_SNR_Minus25
Pandas_ML_Project_SNR_Minus5
Data loaded
Example: X, y = data['Pandas_ML_Project_SNR_Minus5']['x'], data['Pandas_ML_Project_SNR_Minus5']['y']


In [5]:
X, y = data['Pandas_ML_Project_SNR_Minus5']['x'], data['Pandas_ML_Project_SNR_Minus5']['y']

<hr>

## 2. Model

In [6]:
class KNN():
    
    def __init__(self, k = 2):
        self.k = k
        
#     def find_best_k(self, X_train, y_train, div, k_range):
#         batch_size = int(X_train.shape[0] / div)
#         y_hat_arr = np.zeros((len(k_range), div))
#         y_hat_probs = np.zeros((len(k_range), div))
    
#         for k_idx, kneighbor in enumerate(k_range):
#             self.k = kneighbor
#             for idx, i in enumerate(range(0, X_train.shape[0], batch_size)):
#                 X_test2 = X_train[i:i+batch_size]
#                 y_test2 = y_train[i:i+batch_size]
#                 X_train2 = np.concatenate((X_train[:i], X_train[i+batch_size:]))
#                 y_train2= np.concatenate((y_train[:i], y_train[i+batch_size:]))
#                 yhat, yhat_prob = self.predict(X_train2, X_test2, y_train2)
#                 acc_correct = yhat == y_test2
#                 accuracy = np.sum(acc_correct) / len(y_test2)
#                 y_hat_arr[k_idx, idx] = accuracy
#                 y_hat_probs[k_idx, idx] = yhat_prob.mean()
#         return y_hat_arr, y_hat_probs
    
    def find_distance(self, X_train, X_test):
        #create newaxis simply so that broadcast to all values
        dist = X_test[:, np.newaxis, :] - X_train[np.newaxis, :, :]
        sq_dist = dist ** 2

        #sum across feature dimension, thus axis = 2
        summed_dist = sq_dist.sum(axis=2)
        sq_dist = np.sqrt(summed_dist)
        return sq_dist
    

    def find_neighbors(self, X_train, X_test, k):
        dist = self.find_distance(X_train, X_test)
        #return the first k neighbors
        neighbors_ix = np.argsort(dist)[:, 0:k]
        return neighbors_ix
    
    
    def get_most_common(self, y, k):
        y = y[0:k]
        count = np.bincount(y)
        largest_first = count.argmax()
        largest_second = count.argsort()[-2:][0]
        if count[largest_first] == count[largest_second]:
            y = y[0: k + 1]
            return np.bincount(y).argmax(), count[largest_first] /count.sum()
        
        return np.bincount(y).argmax(), count[largest_first] / count.sum()
            
    
    def predict(self, X_train, X_test, y_train):
        neighbors_ix = self.find_neighbors(X_train, X_test, self.k)
        self.pred = np.zeros(X_test.shape[0])
        self.probs = np.zeros(X_test.shape[0])
        for ix, y in enumerate(y_train[neighbors_ix]):
            self.pred[ix], self.probs[ix] = self.get_most_common(y, self.k)
        return self.pred, self.probs

In [7]:
#train test split
y = y.astype(int)
idx, X_train, y_train, X_test, y_test = partition(X, y, percent_train=0.8)

(8000, 11) (8000,) (2000, 11) (2000,)


## 3. Predict for SNR-5

In [8]:
knn = KNN(k = 2)
yhat, yhat_probs = knn.predict(X_train, X_test, y_train)

print("==================== SNR Minus5 ===================\n Accuracy: ", np.sum(yhat == y_test)/len(y_test))
print("---- Classification report ----")
print("Report: ", classification_report(y_test, yhat))
print("---- Probability score ----")
print("Probability: ", yhat_probs.mean())

 Accuracy:  1.0
---- Classification report ----
Report:                precision    recall  f1-score   support

           0       1.00      1.00      1.00      1005
           1       1.00      1.00      1.00       995

    accuracy                           1.00      2000
   macro avg       1.00      1.00      1.00      2000
weighted avg       1.00      1.00      1.00      2000

---- Probability score ----
Probability:  1.0


## 4. Predict for everything SNR

### SNR-10

In [9]:
X, y = data['Pandas_ML_Project_SNR_Minus10']['x'], data['Pandas_ML_Project_SNR_Minus10']['y']
y = y.astype(int)
idx, X_train, y_train, X_test, y_test = partition(X, y, percent_train=0.8)
knn = KNN(k = 2)
yhat, yhat_probs = knn.predict(X_train, X_test, y_train)

print("==================== SNR Minus10 ===================\n Accuracy: ", np.sum(yhat == y_test)/len(y_test))
print("---- Classification report ----")
print("Report: ", classification_report(y_test, yhat))
print("---- Probability score ----")
print("Probability: ", yhat_probs.mean())

(8000, 11) (8000,) (2000, 11) (2000,)
 Accuracy:  0.9875
---- Classification report ----
Report:                precision    recall  f1-score   support

           0       0.98      1.00      0.99       998
           1       1.00      0.98      0.99      1002

    accuracy                           0.99      2000
   macro avg       0.99      0.99      0.99      2000
weighted avg       0.99      0.99      0.99      2000

---- Probability score ----
Probability:  0.9935


### SNR-15

In [10]:
X, y = data['Pandas_ML_Project_SNR_Minus15']['x'], data['Pandas_ML_Project_SNR_Minus15']['y']
y = y.astype(int)
idx, X_train, y_train, X_test, y_test = partition(X, y, percent_train=0.8)
knn = KNN(k = 2)
yhat, yhat_probs = knn.predict(X_train, X_test, y_train)

print("==================== SNR Minus15 ===================\n Accuracy: ", np.sum(yhat == y_test)/len(y_test))
print("---- Classification report ----")
print("Report: ", classification_report(y_test, yhat))
print("---- Probability score ----")
print("Probability: ", yhat_probs.mean())

(8000, 11) (8000,) (2000, 11) (2000,)
 Accuracy:  0.725
---- Classification report ----
Report:                precision    recall  f1-score   support

           0       0.68      0.86      0.76      1021
           1       0.80      0.58      0.67       979

    accuracy                           0.73      2000
   macro avg       0.74      0.72      0.72      2000
weighted avg       0.74      0.72      0.72      2000

---- Probability score ----
Probability:  0.85375


### SNR-20

In [13]:
X, y = data['Pandas_ML_Project_SNR_Minus20']['x'], data['Pandas_ML_Project_SNR_Minus20']['y']
y = y.astype(int)
idx, X_train, y_train, X_test, y_test = partition(X, y, percent_train=0.8)
knn = KNN(k = 2)
yhat, yhat_probs = knn.predict(X_train, X_test, y_train)

print("==================== SNR Minus20 ===================\n Accuracy: ", np.sum(yhat == y_test)/len(y_test))
print("---- Classification report ----")
print("Report: ", classification_report(y_test, yhat))
print("---- Probability score ----")
print("Probability: ", yhat_probs.mean())

(8000, 11) (8000,) (2000, 11) (2000,)
 Accuracy:  0.529
---- Classification report ----
Report:                precision    recall  f1-score   support

           0       0.52      0.77      0.62      1009
           1       0.55      0.28      0.37       991

    accuracy                           0.53      2000
   macro avg       0.54      0.53      0.50      2000
weighted avg       0.54      0.53      0.50      2000

---- Probability score ----
Probability:  0.7555


### SNR-25

In [17]:
X, y = data['Pandas_ML_Project_SNR_Minus25']['x'], data['Pandas_ML_Project_SNR_Minus25']['y']
y = y.astype(int)
idx, X_train, y_train, X_test, y_test = partition(X, y, percent_train=0.8)
knn = KNN(k = 2)
yhat, yhat_probs = knn.predict(X_train, X_test, y_train)

print("==================== SNR Minus25 ===================\n Accuracy: ", np.sum(yhat == y_test)/len(y_test))
print("---- Classification report ----")
print("Report: ", classification_report(y_test, yhat))
print("---- Probability score ----")
print("Probability: ", yhat_probs.mean())

(8000, 11) (8000,) (2000, 11) (2000,)
 Accuracy:  0.507
---- Classification report ----
Report:                precision    recall  f1-score   support

           0       0.50      0.75      0.60       994
           1       0.52      0.26      0.35      1006

    accuracy                           0.51      2000
   macro avg       0.51      0.51      0.48      2000
weighted avg       0.51      0.51      0.48      2000

---- Probability score ----
Probability:  0.75325
