In [61]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier


In [62]:
df = pd.read_csv('Social_Network_Ads.csv')

In [16]:
df.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   User ID          400 non-null    int64 
 1   Gender           400 non-null    object
 2   Age              400 non-null    int64 
 3   EstimatedSalary  400 non-null    int64 
 4   Purchased        400 non-null    int64 
dtypes: int64(4), object(1)
memory usage: 15.8+ KB


In [18]:
 df['Gender'].replace({"Male":1,"Female":0},inplace=True)

In [19]:
df.head()


Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,1,19,19000,0
1,15810944,1,35,20000,0
2,15668575,0,26,43000,0
3,15603246,0,27,57000,0
4,15804002,1,19,76000,0


In [36]:
X = df.iloc[:,1:-1].values
Y = df.iloc[:,-1].values



scaler = StandardScaler()

x = scaler.fit_transform(X)
x_train, x_test, y_train, y_test = train_test_split(x, Y, test_size=0.2, random_state=42)




In [55]:
def model(model_type):
    model_type.fit(x_train,y_train)
    y_pred = model_type.predict(x_test)
    print(f"Accuracy of {model_type}:{accuracy_score(y_test,y_pred)}")


In [56]:
#support vector machine 

svm = SVC(kernel='rbf', C=1.0)
model(svm)


Accuracy of SVC():0.925


In [57]:
#Naive Bayes

nb_classifirs = GaussianNB()
model(nb_classifirs)

Accuracy of GaussianNB():0.9375


In [58]:
#decision tree

clf = DecisionTreeClassifier()
model(clf)


Accuracy of DecisionTreeClassifier():0.8375


In [59]:
#Logestic Regression

lr  =  LogisticRegression()

model(lr)

Accuracy of LogisticRegression():0.8875


In [63]:
#knearest neibors
knn_c= KNeighborsClassifier(n_neighbors=3)

model(knn_c)

Accuracy of KNeighborsClassifier(n_neighbors=3):0.9125


In [71]:
from sklearn.model_selection import GridSearchCV

params_dc = { 
              "criterion":['gini','entropy'],
              "max_depth":[1,2,3,4,5,6,7]
            }

params_svm = {

                "kernel":['linear','poly','rbf','sigmoid'],
                "C":[1,10,100,50,5]
        }

params_neigh = {
                "n_neighbors":[3,5,7,9],

}


            

In [72]:
def GridCv(model,dict):
    grid = GridSearchCV(model,param_grid=dict,cv=10,n_jobs =-1)
    grid.fit(x_train,y_train)
    print(f"Best Estimator:{grid.best_estimator_}")
    print(f"Best accuracy:{grid.best_score_}")
    print(f"Best parameter:{grid.best_params_}")

In [73]:
#svm
GridCv(svm,params_svm)

Best Estimator:SVC(C=5)
Best accuracy:0.909375
Best parameter:{'C': 5, 'kernel': 'rbf'}


In [74]:
#decision tree
GridCv(clf,params_dc)

Best Estimator:DecisionTreeClassifier(max_depth=2)
Best accuracy:0.9125
Best parameter:{'criterion': 'gini', 'max_depth': 2}


In [75]:
#k nearest 
GridCv(knn_c,params_neigh)

Best Estimator:KNeighborsClassifier(n_neighbors=7)
Best accuracy:0.915625
Best parameter:{'n_neighbors': 7}
