
# Logistic Regression Project 


## Importing Libraries


In [51]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
%matplotlib inline

## Get the Data
**We are going to read an advertising.csv file and set it to a data frame called ad_data.**

In [52]:
ad_data = pd.read_csv('advertising.csv')

FileNotFoundError: [Errno 2] No such file or directory: 'advertising.csv'

**Check the head of ad_data**

In [None]:
ad_data.head()

In [None]:
ad_data.describe()

## Exploratory Data Analysis

To see more of the information provided we are going to analyze some descriptive results 

In [None]:
sns.histplot(data=ad_data, x="Age", bins= 10)

**Income vs Age**

In [None]:
sns.jointplot(data=ad_data, x="Age", y="Area Income", kind="reg")

**Daily Time spent on site vs. Age.**

In [None]:
sns.jointplot(data=ad_data, x="Age", y="Daily Time Spent on Site", kind="hex")

**'Daily Time Spent on Site' vs. 'Daily Internet Usage'**

In [None]:
sns.jointplot(data=ad_data, x="Daily Time Spent on Site", y="Daily Internet Usage", kind="reg")

**pairplot with the hue defined by the 'Clicked on Ad' column feature.**

In [None]:
sns.pairplot(data=ad_data, hue="Clicked on Ad", kind="reg")

# Logistic Regression

Now it's time to do a train test split, and train our model

In [None]:
sns.heatmap(ad_data.isnull(),yticklabels=False,cbar=False,cmap='viridis')

In [None]:
ad_data.drop(['Ad Topic Line','City','Country','Timestamp'],axis=1,inplace=True)


In [None]:
ad_data.head()

** Split the data into training set and testing set using train_test_split**

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(ad_data.drop('Clicked on Ad',axis=1), 
                                                    ad_data['Clicked on Ad'], test_size=0.30, 
                                                    random_state=101)

Now we need to train and fit a logistic regression model on the training set

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
logmodel = LogisticRegression()
logmodel.fit(X_train,y_train)


In [None]:
predictions = logmodel.predict(X_test)

## Predictions and Evaluations
predict values for the testing data

In [None]:
from sklearn.metrics import classification_report

To see the how well our predictive model responds to the test data, we need to construct the correspondant measures of precision, accuracy and f1-score together with a confussion matrix

In [None]:
print(classification_report(y_test,predictions))

In [None]:
confusion_matrix = pd.crosstab(y_test, predictions, rownames=['Actual'], colnames=['Predicted'])
sns.heatmap(confusion_matrix, annot=True, fmt='d').set(title='Confussion Matrix')

In [None]:
# save the model to disk
filename = 'advertising_model.sav'
pickle.dump(logmodel, open(filename, 'wb'))

# load the model from disk
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.score(X_test, y_test)
print(result)