In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV

In [3]:
df = pd.read_csv("./datasets/advertising.csv")
del df['Ad Topic Line']
del df['Timestamp']

In [4]:
df = pd.get_dummies(df, columns=['Country','City'])


In [5]:
df.head()

Unnamed: 0,Daily Time Spent on Site,Age,Area Income,Daily Internet Usage,Male,Clicked on Ad,Country_Afghanistan,Country_Albania,Country_Algeria,Country_American Samoa,...,City_Wintersfort,City_Wongland,City_Wrightburgh,City_Wrightview,City_Yangside,City_Youngburgh,City_Youngfort,City_Yuton,City_Zacharystad,City_Zacharyton
0,68.95,35,61833.9,256.09,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
1,80.23,31,68441.85,193.77,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,69.47,26,59785.94,236.5,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,74.15,29,54806.18,245.89,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,68.37,35,73889.99,225.58,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [7]:
X = df.drop('Clicked on Ad', axis=1)
y = df['Clicked on Ad']

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3, random_state = 10)

In [13]:
model = SVC()
model.fit(X_train,y_train)

model_predict = model.predict(X_test)

#confusion matrix
print(confusion_matrix(y_test,model_predict))

#classification report
print(classification_report(y_test, model_predict))

[[124  22]
 [ 68  86]]
              precision    recall  f1-score   support

           0       0.65      0.85      0.73       146
           1       0.80      0.56      0.66       154

    accuracy                           0.70       300
   macro avg       0.72      0.70      0.70       300
weighted avg       0.72      0.70      0.69       300



In [16]:
hyperparameters = {'C':[10,25,50],'gamma':[0.001,0.0001,0.00001]}

In [17]:
grid = GridSearchCV(SVC(),hyperparameters) #to increase performance of SVM
grid.fit(X_train,y_train)
grid.best_params_ #takes 30+ seconds

{'C': 50, 'gamma': 1e-05}

In [20]:
grid_prediction = grid.predict(X_test)

print(confusion_matrix(y_test,grid_prediction))
print(classification_report(y_test,grid_prediction))

[[129  17]
 [ 15 139]]
              precision    recall  f1-score   support

           0       0.90      0.88      0.89       146
           1       0.89      0.90      0.90       154

    accuracy                           0.89       300
   macro avg       0.89      0.89      0.89       300
weighted avg       0.89      0.89      0.89       300

