In [None]:
# Simple Linear Regression 

In this example we will consider sales based on 'TV' marketing budget. 

In this notebook, we'll build a linear regression model to predict 'Sales' using 'TV' as the predictor variable.


In [None]:
import pandas as pd
import numpy as np
import seaborn as sns

# To visualise in the notebook
%matplotlib inline

In [None]:
adver_df = pd.read_csv("AdvertSales.csv")

Now, let's check the structure of the advertising dataset.

In [None]:
# Display the first 5 rows
adver_df.head()

In [None]:
# Display the last 5 rows
adver_df.tail()

In [None]:
# Let's check the columns
adver_df.info()

In [None]:
# Check the shape of the DataFrame (rows, columns)
adver_df.shape

In [None]:
# Let's look at some statistical information about the dataframe.
adver_df.describe()

In [None]:
# Visualising Data Using Seaborn

In [None]:
# Let us do a correlation analysis among the different dimensions and also each dimension with the dependent dimension
# This is done using scatter matrix function which creates a dashboard reflecting useful information about the dimensions
# The result can be stored as a .png file and opened in say, paint to get a larger view 

#axes = pd.plotting.scatter_matrix(mpg_df_attr)
#plt.tight_layout()
#plt.savefig('d:\mpg_pairpanel.png')

sns.pairplot(adver_df)
#sns.pairplot(adver_df, diag_kind='kde')   # to plot density curve instead of histogram



In [None]:
# Perfroming Simple Linear Regression

In [None]:
# Putting feature variable to X
X = adver_df[['Advert']]

# Print the first 5 rows
X.head()

In [None]:
# Putting response variable to y
y = adver_df[['Sales']]

# Print the first 5 rows
y.head()

In [None]:
## Splitting Data into Training and Testing Sets

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3 , random_state=10)

In [None]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

In [None]:
## Performing Linear Regression

In [None]:
# import LinearRegression from sklearn
from sklearn.linear_model import LinearRegression

# Representing LinearRegression as lr(Creating LinearRegression Object)
lr_model = LinearRegression()

# Fit the model using lr.fit()
lr_model.fit(X_train, y_train)

In [None]:
## Coefficients Calculation

In [None]:
# Print the intercept and coefficients
print(lr_model.intercept_)
print(lr_model.coef_)

In [None]:
## Predictions

In [None]:
# Making predictions on the testing set
y_pred = lr_model.predict(X_test)

In [None]:
print(y_pred.shape)


In [None]:
import matplotlib.pyplot as plt
#y_cordinate = 1 * y_test + 0
#plt.plot(y_test, y_cordinate, 'r')

plt.scatter(y_test,y_pred)
plt.xlabel('Y Test')
plt.ylabel('Predicted Y')

In [None]:
print("Train")
print(lr_model.score(X_train, y_train))
print("Test")
print(lr_model.score(X_test, y_test))

In [None]:
from sklearn.metrics import mean_squared_error, r2_score
mse = mean_squared_error(y_test, y_pred)

In [None]:
# following is not required since model score already displayed. Shown only for training
r_squared = r2_score(y_test, y_pred)


In [None]:
print('Mean_Squared_Error :' ,mse)
print('r_square_value :',r_squared)