In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('50_Students_Placement.csv')

In [3]:
df.head()

Unnamed: 0,Student ID,CGPA,IQ,Placement
0,1,3.8,125,Yes
1,2,3.5,118,Yes
2,3,3.2,109,Yes
3,4,2.9,102,No
4,5,3.6,120,Yes


In [4]:
df = df.drop(columns = 'Student ID')

In [5]:
df.head()

Unnamed: 0,CGPA,IQ,Placement
0,3.8,125,Yes
1,3.5,118,Yes
2,3.2,109,Yes
3,2.9,102,No
4,3.6,120,Yes


## Encoding Placement using LabelEncoder

In [7]:
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()

In [8]:
df['Placement'] = encoder.fit_transform(df['Placement'])

In [9]:
df.head()

Unnamed: 0,CGPA,IQ,Placement
0,3.8,125,1
1,3.5,118,1
2,3.2,109,1
3,2.9,102,0
4,3.6,120,1


## Applying Standardization on CGPA and IQ

In [11]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

In [12]:
df[['CGPA', 'IQ']] = sc.fit_transform(df[['CGPA', 'IQ']])

In [13]:
df.head()

Unnamed: 0,CGPA,IQ,Placement
0,1.578114,1.349261,1
1,0.62361,0.476357,1
2,-0.330895,-0.64595,1
3,-1.285399,-1.518854,0
4,0.941778,0.725758,1


In [14]:
df.describe()

Unnamed: 0,CGPA,IQ,Placement
count,50.0,50.0,50.0
mean,5.817569e-16,-8.593126e-16,0.58
std,1.010153,1.010153,0.498569
min,-1.921736,-1.892956,0.0
25%,-0.649063,-0.7706503,0.0
50%,-0.01272673,0.1646049,1.0
75%,0.8622357,0.6945828,1.0
max,1.896282,1.972765,1.0


In [15]:
df.isnull().sum()

CGPA         0
IQ           0
Placement    0
dtype: int64

In [16]:
df.head(2)

Unnamed: 0,CGPA,IQ,Placement
0,1.578114,1.349261,1
1,0.62361,0.476357,1


In [17]:
df.iloc[:,:-1].head(2)

Unnamed: 0,CGPA,IQ
0,1.578114,1.349261
1,0.62361,0.476357


In [18]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:, :-1], df.iloc[:, -1], test_size = 0.2, random_state=1)

In [19]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=3)

knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)

print(y_pred_knn)

[0 1 1 1 0 0 1 1 0 1]


In [20]:
from sklearn.linear_model import LogisticRegression
regressor = LogisticRegression()

regressor.fit(X_train, y_train)
y_pred_logistic = regressor.predict(X_test)

print(y_pred_logistic)

[0 1 1 1 0 0 1 1 0 1]


In [21]:
from sklearn.metrics import accuracy_score

knn_accuracy = accuracy_score(y_test, y_pred_knn)
logistic_accuracy = accuracy_score(y_test, y_pred_logistic)

In [22]:
knn_accuracy

0.9

In [23]:
logistic_accuracy

0.9

In [46]:
import pickle
pickle.dump(knn, open('knn_model', 'wb'))
pickle.dump(regressor, open('logistic_model', 'wb'))