### Import Libraries and show the data set ###

In [51]:
import numpy as np
import pandas as pd
from pathlib import Path
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression

In [52]:
## Load the Sales dataset

df_sales = pd.read_csv('sales.csv')

In [53]:
# show 'sales' data:
df_sales.head()

Unnamed: 0,ads,sales
0,21,8350
1,180,22820
2,50,12950
3,195,21105
4,96,15008


### Prep the variables and setup the models ###

In [54]:
# Define X and Y variables:

X = X = df_sales["ads"].values.reshape(-1, 1)
y = df_sales["sales"]

In [55]:
# Build the Linear Regression model:

model = LinearRegression()

# Fit the data into the model:
model.fit(X,y)

In [56]:
# Check the model score
model.score(X,y)

0.9219961974942597

In [57]:
# Check the coefficient

print(model.coef_)

[81.34898394]


In [58]:
# Check the intercept

print(model.intercept_)

7764.796945240409


In [59]:
# Best Fit formula: y = a + bx

print(f"Best Fit formula: y = {model.intercept_} + {model.coef_[0]}X")

Best Fit formula: y = 7764.796945240409 + 81.34898393753781X


### Plot The Best Fit Line ###

In [60]:
# Make predictions using the X variables

y_predict = model.predict(X)

In [61]:
# Create a copy of the original data

df_sales_predicted = df_sales.copy()

# Add a column with the predicted sales values

df_sales_predicted["sales_predicted"] = y_predict

# Display sample data
df_sales_predicted.head()

Unnamed: 0,ads,sales,sales_predicted
0,21,8350,9473.125608
1,180,22820,22407.614054
2,50,12950,11832.246142
3,195,21105,23627.848813
4,96,15008,15574.299403


### Linear Regression Model Analysis ###

In [63]:
# Import relevant metrics from scikit-learn: score, r2, mse, rmse, std

from sklearn.metrics import mean_squared_error, r2_score

In [65]:
# Compute the metrics for the linear regression model
score = model.score(X, y, sample_weight=None)
r2 = r2_score(y, y_predict)
mse = mean_squared_error(y, y_predict)
rmse = np.sqrt(mse)
std = np.std(y)

# Print releveant metrics.
print(f"The score is {score}.")
print(f"The r2 is {r2}.")
print(f"The mean squared error is {mse}.")
print(f"The root mean squared error is {rmse}.")
print(f"The standard deviation is {std}.")

The score is 0.9219961974942597.
The r2 is 0.9219961974942597.
The mean squared error is 1922652.7853956893.
The root mean squared error is 1386.5975571144243.
The standard deviation is 4964.6946616416735.
