In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")

In [2]:
df = sns.load_dataset('iris')
df.head(3)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa


In [3]:
from sklearn.model_selection import train_test_split

In [4]:
x = df.drop('species',axis=1)
y = df[['species']]

In [5]:
x_train,x_test,y_train,y_test = train_test_split(x,y,train_size=0.8,random_state=100)

In [6]:
from sklearn.neighbors import KNeighborsClassifier

In [7]:
knn = KNeighborsClassifier() ## k, p and weights

In [8]:
knn_model = knn.fit(x_train,y_train)

In [9]:
y_pred = knn_model.predict(x_test)

In [10]:
from sklearn.metrics import accuracy_score

In [11]:
accuracy_score(y_test,y_pred)

1.0

In [12]:
from sklearn.model_selection import GridSearchCV

In [13]:
grid_search = GridSearchCV(estimator=knn,param_grid={'n_neighbors' :[i for i in range(2,31)],
                                                     'p':[1,2]},scoring='accuracy',cv=5)

In [14]:
grid_search.fit(x_train,y_train)

In [15]:
grid_search.best_params_

{'n_neighbors': 13, 'p': 1}

In [16]:
### Model
knn = KNeighborsClassifier(n_neighbors=15,p=2)

In [17]:
knn.fit(x_train,y_train)

In [18]:
y_pred = knn.predict(x_test)

In [19]:
accuracy_score(y_test,y_pred)

0.9666666666666667

In [20]:
from sklearn.metrics import classification_report,confusion_matrix,recall_score,precision_score,f1_score

In [21]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        11
  versicolor       1.00      0.83      0.91         6
   virginica       0.93      1.00      0.96        13

    accuracy                           0.97        30
   macro avg       0.98      0.94      0.96        30
weighted avg       0.97      0.97      0.97        30



In [22]:
confusion_matrix(y_test,y_pred)

array([[11,  0,  0],
       [ 0,  5,  1],
       [ 0,  0, 13]], dtype=int64)

## Naive Bayes

In [23]:
from sklearn.naive_bayes import GaussianNB

In [24]:
nb = GaussianNB()

In [25]:
nb_model = nb.fit(x_train,y_train)

In [26]:
nb_pred = nb_model.predict(x_test)

In [27]:
print(classification_report(y_test,nb_pred))

              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        11
  versicolor       1.00      0.83      0.91         6
   virginica       0.93      1.00      0.96        13

    accuracy                           0.97        30
   macro avg       0.98      0.94      0.96        30
weighted avg       0.97      0.97      0.97        30



## Admission Dataset

In [28]:
df = pd.read_csv('Admission.csv')
df.head()

Unnamed: 0,SlNo,Gender,Percent_SSC,Board_SSC,Percent_HSC,Board_HSC,Stream_HSC,Percent_Degree,Course_Degree,Experience_Yrs,Entrance_Test,Percentile_ET,Percent_MBA,Specialization_MBA,Marks_Communication,Marks_Projectwork,Marks_BOCA,Placement,Salary
0,1,M,62.0,Others,88.0,Others,Commerce,52.0,Science,0,MAT,55.0,58.8,Marketing & HR,50,65,74,Placed,270000
1,2,M,76.33,ICSE,75.33,Others,Science,75.48,Computer Applications,1,MAT,86.5,66.28,Marketing & Finance,69,70,75,Placed,200000
2,3,M,72.0,Others,78.0,Others,Commerce,66.63,Engineering,0,,0.0,52.91,Marketing & Finance,50,61,59,Placed,240000
3,4,M,60.0,CBSE,63.0,CBSE,Arts,58.0,Management,0,MAT,75.0,57.8,Marketing & Finance,54,66,62,Placed,250000
4,5,M,61.0,CBSE,55.0,ISC,Science,54.0,Engineering,1,MAT,66.0,59.43,Marketing & HR,52,65,67,Placed,180000


In [29]:
df.drop('Salary',axis=1,inplace=True)

In [30]:
df.isna().sum()

SlNo                    0
Gender                  0
Percent_SSC             0
Board_SSC               0
Percent_HSC             0
Board_HSC               0
Stream_HSC              0
Percent_Degree          0
Course_Degree           0
Experience_Yrs          0
Entrance_Test          67
Percentile_ET           0
Percent_MBA             0
Specialization_MBA      0
Marks_Communication     0
Marks_Projectwork       0
Marks_BOCA              0
Placement               0
dtype: int64

In [31]:
df['Entrance_Test'].unique()

array(['MAT', nan, 'K-MAT', 'CAT', 'PGCET', 'GCET', 'G-MAT', 'XAT',
       'G-SAT'], dtype=object)

In [34]:
df['Entrance_Test'] = df['Entrance_Test'].fillna('MGMT')

In [38]:
df['Entrance_Test'].value_counts(normalize=True)

MAT      0.677749
MGMT     0.171355
K-MAT    0.061381
CAT      0.056266
PGCET    0.020460
GCET     0.005115
G-MAT    0.002558
XAT      0.002558
G-SAT    0.002558
Name: Entrance_Test, dtype: float64

In [36]:
df.isna().sum()

SlNo                   0
Gender                 0
Percent_SSC            0
Board_SSC              0
Percent_HSC            0
Board_HSC              0
Stream_HSC             0
Percent_Degree         0
Course_Degree          0
Experience_Yrs         0
Entrance_Test          0
Percentile_ET          0
Percent_MBA            0
Specialization_MBA     0
Marks_Communication    0
Marks_Projectwork      0
Marks_BOCA             0
Placement              0
dtype: int64

In [37]:
df.drop('SlNo',axis=1,inplace=True)

In [39]:
cat = df.select_dtypes(exclude=np.number)
num = df.select_dtypes(include=np.number)

In [51]:
cat = pd.get_dummies(cat,prefix=None,drop_first=True)

In [52]:
cat.head()

Unnamed: 0,Gender_M,Board_SSC_ICSE,Board_SSC_Others,Board_HSC_ISC,Board_HSC_Others,Stream_HSC_Commerce,Stream_HSC_Science,Course_Degree_Commerce,Course_Degree_Computer Applications,Course_Degree_Engineering,...,Entrance_Test_G-SAT,Entrance_Test_GCET,Entrance_Test_K-MAT,Entrance_Test_MAT,Entrance_Test_MGMT,Entrance_Test_PGCET,Entrance_Test_XAT,Specialization_MBA_Marketing & HR,Specialization_MBA_Marketing & IB,Placement_Placed
0,1,0,1,0,1,1,0,0,0,0,...,0,0,0,1,0,0,0,1,0,1
1,1,1,0,0,1,0,1,0,1,0,...,0,0,0,1,0,0,0,0,0,1
2,1,0,1,0,1,1,0,0,0,1,...,0,0,0,0,1,0,0,0,0,1
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,1
4,1,0,0,1,0,0,1,0,0,1,...,0,0,0,1,0,0,0,1,0,1


In [54]:
x = pd.concat([num,cat.drop('Placement_Placed',axis=1)],axis=1)
y = cat[['Placement_Placed']]

In [55]:
x.head()

Unnamed: 0,Percent_SSC,Percent_HSC,Percent_Degree,Experience_Yrs,Percentile_ET,Percent_MBA,Marks_Communication,Marks_Projectwork,Marks_BOCA,Gender_M,...,Entrance_Test_G-MAT,Entrance_Test_G-SAT,Entrance_Test_GCET,Entrance_Test_K-MAT,Entrance_Test_MAT,Entrance_Test_MGMT,Entrance_Test_PGCET,Entrance_Test_XAT,Specialization_MBA_Marketing & HR,Specialization_MBA_Marketing & IB
0,62.0,88.0,52.0,0,55.0,58.8,50,65,74,1,...,0,0,0,0,1,0,0,0,1,0
1,76.33,75.33,75.48,1,86.5,66.28,69,70,75,1,...,0,0,0,0,1,0,0,0,0,0
2,72.0,78.0,66.63,0,0.0,52.91,50,61,59,1,...,0,0,0,0,0,1,0,0,0,0
3,60.0,63.0,58.0,0,75.0,57.8,54,66,62,1,...,0,0,0,0,1,0,0,0,0,0
4,61.0,55.0,54.0,1,66.0,59.43,52,65,67,1,...,0,0,0,0,1,0,0,0,1,0


In [56]:
y.head()

Unnamed: 0,Placement_Placed
0,1
1,1
2,1
3,1
4,1


## Train-Test Split

In [57]:
x_train,x_test,y_train,y_test = train_test_split(x,y,train_size=0.7,random_state=100)

## KNN

In [58]:
knn = KNeighborsClassifier()

In [59]:
knn_model = knn.fit(x_train,y_train)

In [60]:
knn_pred = knn_model.predict(x_test)

In [62]:
print(classification_report(y_test,knn_pred))

              precision    recall  f1-score   support

           0       0.40      0.23      0.29        26
           1       0.81      0.90      0.85        92

    accuracy                           0.75       118
   macro avg       0.60      0.57      0.57       118
weighted avg       0.72      0.75      0.73       118



## Naive Bayes

In [63]:
nb = GaussianNB()

In [64]:
nb_model = nb.fit(x_train,y_train)

In [66]:
nb_pred = nb_model.predict(x_test)

In [67]:
print(classification_report(y_test,nb_pred))

              precision    recall  f1-score   support

           0       0.22      1.00      0.36        26
           1       1.00      0.01      0.02        92

    accuracy                           0.23       118
   macro avg       0.61      0.51      0.19       118
weighted avg       0.83      0.23      0.10       118

