<a href="https://colab.research.google.com/github/YTTTTTABBY/NPS/blob/main/kNN%20Classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
### Core code adapted from a coding assignment in IT1244. Credit to Dr Prabhu for finding/creating the initial code.
### Datasets used are adapted from https://archive.ics.uci.edu/dataset/856/higher+education+students+performance+evaluation

# file manipulation
import csv

# numerical data processing
import numpy as np

np.set_printoptions(precision=4, suppress=True)

In [None]:
# loading data
X_COLUMN_NAMES = [
    "AGE", # 1: 18-21, 2: 22-25, 3: above 26
    "CCA", # 1: Yes, 2: No
    "CAMPUS", # 1: Yes, 2: No
    "YEAR", # 1:1, 2:2, 3:3, 4:4
    "GPA", # 1: <3.00, 2: 3.00-3.49, 3: 3.50-3.99, 4: 4.00-4.49, 5: above 4.49
]
Y_COLUMN_NAME = "ID" # ID of the student/tourist

# function to load files
def load_data(filename):
    X, y = None, None

    Xlist=[]
    ylist=[]
    with open(filename,"r") as f:
      reader=csv.reader(f,delimiter=',')
      titles = reader.__next__()
      lookup = [titles.index(x) for x in X_COLUMN_NAMES]
      lookupy = titles.index(Y_COLUMN_NAME)
      for row in reader:
        Xlist.append([row[x] for x in lookup])
        ylist.append(row[lookupy])
      X=np.asarray(Xlist,dtype=float)
      y=np.asarray(ylist,dtype=float)

    return X, y

# loading student data
filename = "/content/Student Data.csv"
Xs, ys = load_data(filename)

# loading tourist data
filename = "/content/Tourist Data.csv"
Xt, yt = load_data(filename)

In [None]:
#normalising data
def normalise(dataset):
  X_norm = np.zeros_like(dataset)

  mini = np.amin(dataset,axis=0)
  maxi = np.amax(dataset,axis=0)
  X_norm=np.array([((x-mini)/(maxi-mini)) for x in dataset])

  return X_norm

Xs_norm = normalise(Xs)
Xt_norm = normalise(Xt)

In [None]:
# computing euclidean distance between points
def euclideanDist(x1, x2):
    dist = np.sqrt(np.sum(np.square(x1 - x2)))

    return dist

# finding the nearest neighbours and their IDs
def kNearestNeighbors(X, y, Xtest, K):

    Xng, yng = None, None
      # calculate the distance between Xtest and every sample in X
    dist_list=[]
    for i in range(len(X)):
      Xdist = euclideanDist(Xtest, X[i, :])
      dist_list.append(Xdist)

    dists = np.array(dist_list)

    # Finding the nearest K neighbours from nearest to Kth nearest
    index = np.argsort(dists)[:K]
    Xng = X[index]
    yng = y[index]
    return Xng, yng

In [None]:
# driver code to test the function
K = 3
Xng, yng = kNearestNeighbors(Xs_norm, ys, Xt_norm[12], K)

print(Xng) # X values of chosen students
print(Xt_norm[14]) # Check X values of the tourist to manually validate closeness of matchs
print(yng) #ID of chosen students

[[0.   1.   1.   0.   0.75]
 [0.   1.   1.   0.   0.75]
 [0.   1.   1.   0.   0.75]]
[0.5    0.     1.     0.3333 0.25  ]
[62. 86. 35.]


In [None]:
# testing the algorithm for matching multiple tourists
K=3
for i in range(5):
  Xng, yng = kNearestNeighbors(Xs_norm, ys, Xt_norm[i], K)
  print("For Tourist ", yt[i],":")
  print("Choice 1: Student #", yng[0])
  print("Choice 2: Student #", yng[1])
  print("Choice 3: Student #", yng[2])

For Tourist  1.0 :
Choice 1: Student # 90.0
Choice 2: Student # 59.0
Choice 3: Student # 64.0
For Tourist  2.0 :
Choice 1: Student # 27.0
Choice 2: Student # 5.0
Choice 3: Student # 18.0
For Tourist  3.0 :
Choice 1: Student # 51.0
Choice 2: Student # 16.0
Choice 3: Student # 27.0
For Tourist  4.0 :
Choice 1: Student # 96.0
Choice 2: Student # 38.0
Choice 3: Student # 31.0
For Tourist  5.0 :
Choice 1: Student # 21.0
Choice 2: Student # 96.0
Choice 3: Student # 28.0


In [None]:
# matching every tourist in the dataset to 3 potential students
K=3
for i in range(len(yt)):
  Xng, yng = kNearestNeighbors(Xs_norm, ys, Xt_norm[i], K)
  print("For Tourist ", yt[i],":")
  print("Choice 1: Student #", yng[0])
  print("Choice 2: Student #", yng[1])
  print("Choice 3: Student #", yng[2])

For Tourist  1.0 :
Choice 1: Student # 90.0
Choice 2: Student # 59.0
Choice 3: Student # 64.0
For Tourist  2.0 :
Choice 1: Student # 27.0
Choice 2: Student # 5.0
Choice 3: Student # 18.0
For Tourist  3.0 :
Choice 1: Student # 51.0
Choice 2: Student # 16.0
Choice 3: Student # 27.0
For Tourist  4.0 :
Choice 1: Student # 96.0
Choice 2: Student # 38.0
Choice 3: Student # 31.0
For Tourist  5.0 :
Choice 1: Student # 21.0
Choice 2: Student # 96.0
Choice 3: Student # 28.0
For Tourist  6.0 :
Choice 1: Student # 77.0
Choice 2: Student # 5.0
Choice 3: Student # 27.0
For Tourist  7.0 :
Choice 1: Student # 100.0
Choice 2: Student # 43.0
Choice 3: Student # 111.0
For Tourist  8.0 :
Choice 1: Student # 122.0
Choice 2: Student # 121.0
Choice 3: Student # 136.0
For Tourist  9.0 :
Choice 1: Student # 121.0
Choice 2: Student # 122.0
Choice 3: Student # 136.0
For Tourist  10.0 :
Choice 1: Student # 23.0
Choice 2: Student # 68.0
Choice 3: Student # 2.0
For Tourist  11.0 :
Choice 1: Student # 20.0
Choice 2: