In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import MinMaxScaler, PowerTransformer, StandardScaler

In [2]:
df = pd.read_csv('Social_Network_Ads.csv')
df.drop(columns='User ID', inplace=True)
df.head()

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,Male,19,19000,0
1,Male,35,20000,0
2,Female,26,43000,0
3,Female,27,57000,0
4,Male,19,76000,0


In [3]:
X = df.drop(columns='Purchased')
y = df['Purchased']

In [4]:
from sklearn.model_selection import train_test_split,cross_val_score

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3)

In [5]:
## encoding categorical variable

tnf1 = ColumnTransformer(transformers=[
    ('tnf1', OneHotEncoder(sparse=False,drop='first',dtype=np.int64),[0])
],remainder='passthrough')


In [6]:
## scaling variable

tnf2 = ColumnTransformer(transformers=[
    ('tnf2', StandardScaler(),[0,1,2])
], remainder='passthrough')

In [7]:
from sklearn.neighbors import KNeighborsClassifier

tnf3 = KNeighborsClassifier(n_neighbors=8)

In [8]:
from sklearn.pipeline import Pipeline

pipe = Pipeline([
    ('tnf1', tnf1),
    ('tnf2', tnf2),
    ('tnf3', tnf3)
])

In [9]:
pipe.fit(X_train,y_train)

In [11]:
from sklearn.metrics import accuracy_score

y_pred = pipe.predict(X_test)

accuracy_score(y_test,y_pred)

0.9

In [28]:
from sklearn.model_selection import RandomizedSearchCV,KFold

In [29]:
param_grid ={
    'tnf3__n_neighbors': [4,5,6,7,8,9,10,11,12],
    'tnf3__weights': ['uniform', 'distance'],
    'tnf3__algorithm' : ['auto', 'ball_tree', 'kd_tree', 'brute'],
    'tnf3__leaf_size': [10,15,20,25,35,30,35,40,45,50,60,70],
    'tnf3__p':[1,2],
    'tnf3__metric':['minkowski']
}

In [30]:
hyper = RandomizedSearchCV(estimator=pipe, param_distributions=param_grid,n_iter=100,scoring='accuracy',cv=6,n_jobs=-1)

In [31]:
hyper.fit(X_train,y_train)

In [32]:
hyper.best_score_

0.9070305272895468

In [33]:
hyper.best_params_

{'tnf3__weights': 'uniform',
 'tnf3__p': 2,
 'tnf3__n_neighbors': 7,
 'tnf3__metric': 'minkowski',
 'tnf3__leaf_size': 25,
 'tnf3__algorithm': 'kd_tree'}

In [193]:
import pickle
pathurl = open('pipe.pkl','wb')
pickle.dump(pipe, pathurl)
pathurl.close()