In [21]:
import numpy as np

import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

import scipy.stats as st
import statsmodels.api as sm

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from sklearn.neighbors import KNeighborsClassifier

from sklearn.metrics import accuracy_score, precision_score, recall_score, cohen_kappa_score
from sklearn.metrics import roc_auc_score, roc_curve, classification_report, f1_score

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold

In [2]:
df = pd.read_csv("../Admission_Predict.csv")
df.head()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337,118,4,4.5,4.5,9.65,1,1
1,2,324,107,4,4.0,4.5,8.87,1,1
2,3,316,104,3,3.0,3.5,8.0,1,0
3,4,322,110,3,3.5,2.5,8.67,1,1
4,5,314,103,2,2.0,3.0,8.21,0,0


In [4]:
df = df.drop(columns="Serial No.")
inp = df.drop(columns="Chance of Admit")
out = df["Chance of Admit"]

In [7]:
sc = StandardScaler()
inp_sc = pd.DataFrame(sc.fit_transform(inp), columns=inp.columns)
inp_sc.head(2)

Unnamed: 0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research
0,1.762107,1.746971,0.798829,1.093864,1.167321,1.764818,0.909112
1,0.627656,-0.067635,0.798829,0.596653,1.167321,0.455151,0.909112


In [9]:

xtrain,xtest,ytrain,ytest = train_test_split(inp_sc, out, test_size=0.2, random_state=48, stratify=out)

In [10]:
knn = KNeighborsClassifier()
knn.fit(xtrain, ytrain)

In [16]:
ypredict = knn.predict(xtest)
ypredict

array([1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1,
       1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0,
       0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0,
       1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1], dtype=int64)

In [17]:
print(classification_report(ytest, ypredict))

              precision    recall  f1-score   support

           0       0.88      0.86      0.87        44
           1       0.84      0.86      0.85        36

    accuracy                           0.86        80
   macro avg       0.86      0.86      0.86        80
weighted avg       0.86      0.86      0.86        80



In [18]:
cohen_kappa_score(ytest, ypredict)

0.7229219143576826

In [20]:
KNeighborsClassifier()

In [23]:
params = {"n_neighbors": [1,2,3,4,5], "p": [1,2,3], "weights": ["uniform", "distance"] }
kf = KFold(n_splits=5)
gs = GridSearchCV(estimator=knn, param_grid=params, cv=kf, scoring="f1")
gs.fit(xtrain, ytrain)

In [24]:
gs.best_params_

{'n_neighbors': 3, 'p': 3, 'weights': 'uniform'}

In [37]:
test = xtest.loc[82]
test

GRE Score            0.278595
TOEFL Score          0.427257
University Rating    1.674257
SOP                  1.591075
LOR                  1.167321
CGPA                 1.042822
Research             0.909112
Name: 82, dtype: float64

In [35]:
knn.kneighbors(test.values.reshape(1, -1))



(array([[0.61767557, 0.70647406, 0.77228162, 0.7826315 , 0.84669372]]),
 array([[ 85, 255,  49, 194,   9]], dtype=int64))

Unnamed: 0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research
82,0.278595,0.427257,1.674257,1.591075,1.167321,1.042822,0.909112


In [47]:
knn.kneighbors(test.to_frame().T)

(array([[0.61767557, 0.70647406, 0.77228162, 0.7826315 , 0.84669372]]),
 array([[ 85, 255,  49, 194,   9]], dtype=int64))