# Simple Linear Regression


## Import packages and read data
 

In [None]:
# import relevant packages
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Load the dataset from a CSV file
df = pd.read_csv("../datasets/SLR_advertising_budget.csv")


## Explore the data

It is generally a good practice to explore the data after it has been loaded into a dataframe, especially if you are working with a new dataset or if you are not familiar with the data.

Exploring the data can help you to identify any issues or anomalies in the dataset, such as missing values, outliers, or incorrect data types. It can also help you to understand the structure of the data and the relationships between the different variables in the dataset.

*Run the below cells to get a better understanding of the dataset.*

In [None]:
df.head(5)

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
df.shape

In [None]:
df.corr()


We can get a feeling for the data by plotting it as a scatter graph to see if we can identify a pattern or trend.

![correlation800.png](attachment:3b66b130-f545-43c8-a7c6-1309ad10e29d.png)

If we can identify a **linear** relationship (positive or negative) between our variables, we can use this for our **linear regression.**
<br>
<br>
*Run the below cells to see the pattern in our dataset.*

In [None]:
# Extract the predictor variables (Advertising) and the response variable (Sales)
advertising_spend = df["Advertising"]
sales = df["Sales"]

# Create a scatter plot of the data points
plt.scatter(advertising_spend, sales)
plt.title('Scatter Plot showing positive correlation')

# Line of best fit



In [None]:
# Create a scatter plot of the data points
plt.scatter(advertising_spend, sales)

# Compute the line of best fit
m, c = np.polyfit(advertising_spend, sales, 1)

# Plot the line of best fit
plt.plot(advertising_spend, m*advertising_spend + c, color='red')

# Add labels and a title
plt.xlabel('Advertising')
plt.ylabel('Sales')
plt.title('Scatter Plot with Line of Best Fit')

# Show the plot
plt.show()

# Print the slope and y-intercept of the line of best fit
print(f"The equation of the line of best fit is y = {m:.4f}x + {c:.4f}")

# Predict a value with the LBF equation


In [None]:
x = input("Please enter the advertising budget: ")
y = m*float(x['new']) + c
print(f"The estimated number of sales: {y:.2f}")

In [None]:
from ipywidgets import IntSlider, Output
from IPython.display import display, clear_output

x = IntSlider(value=0.0, min=0, max=advertising_spend.max(), step=1)



def estimation(x):
    with output:
        clear_output()  # Clear any previous output
        y = m*float(x['new']) + c
        print(f"The estimated number of sales: {y:.2f}")


x.observe(estimation, names='value')
output = Output()


display(x)
display(output)