# Cross Validation

#### Load the packages, import the data, pre-process the data, and run the model
Example using L2 Logistic Regression

In [4]:
import pandas as pd
import numpy as np

data = pd.read_csv("./Data Files/Log_Reg_Sample_File.csv")

X = data[['Daily Time Spent on Site', 'Age', 'Area Income',
       'Daily Internet Usage', 'Gender']]
y = data["Clicked on Ad"]

X_dummies = pd.get_dummies(X[["Gender"]], drop_first = True)
X = pd.concat([X, X_dummies], axis = 1)
X = X.drop(["Gender"], axis = 1)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 1111)

from sklearn.linear_model import LogisticRegression
log_model = LogisticRegression()
log_model.fit(X_train, y_train)

y_pred = log_model.predict(X_test)

#### Evaluate the Model using Classication Report

In [5]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

print("Base Logistic Regression Model (L2)", "\n")
print(pd.DataFrame(confusion_matrix(y_test, y_pred)), 
      "      Accuracy:", round(accuracy_score(y_test, y_pred), 3), 
      "\n")
print(classification_report(y_test, y_pred))

Base Logistic Regression Model (L2) 

     0    1
0  141    9
1   25  125       Accuracy: 0.887 

             precision    recall  f1-score   support

         No       0.85      0.94      0.89       150
        Yes       0.93      0.83      0.88       150

avg / total       0.89      0.89      0.89       300



#### Evaluate the Model using K-Fold Cross Validation

In [22]:
from sklearn.model_selection import cross_val_score

cv_results = cross_val_score(log_model, X_train, y_train,
                             cv = 10, n_jobs = -1)

print("Base Logistic Regression Model (L2) Cross Validation Results", "\n")
print("Avg. Accuracy:", round(cv_results.mean(), 3))

Base Logistic Regression Model (L2) Cross Validation Results 

Avg. Accuracy: 0.9
