In [14]:
import numpy as np
import pandas as pd
import plotly.io as pio
import plotly.express as px
import plotly.graph_objects as go
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

pio.templates.default = "plotly_dark"

In [2]:
# Leitura do dataset
DATASET_PATH = '../data/marketing_sales_data.csv'
data = pd.read_csv(DATASET_PATH, usecols=['Radio', 'Social Media', 'Sales', 'TV'])

# Renomeando colunas 
data = data.rename({'Radio': 'radio', 'Social Media': 'social_media', 'Sales': 'sales', 'TV': 'tv'}, axis=1)

data = data.sort_values(by='radio')
data.head()

Unnamed: 0,tv,radio,social_media,sales
342,Low,0.109106,0.981484,54.634711
76,Medium,0.132418,0.091141,155.79684
208,Low,0.216101,0.753243,41.396515
346,Low,0.38804,1.104823,52.038374
493,Low,0.426664,1.40327,49.217335


Primeiro, vamos tentar ajustar um modelo de regressão linear e calcular o RSS.

In [3]:
fig = px.scatter(data, x='radio', y='sales')
fig

In [4]:
# Variáveis: X -> preditora e Y -> resposta
X = ['radio']
Y = 'sales'

# Ajustando o modelo
model = LinearRegression()
model.fit(data[X].values, data[Y].values)

# Salvando os coeficientes
b0 = model.intercept_
b1 = model.coef_[0]

Y_PRED = model.predict(data.radio.values.reshape(-1,1))

print(b0, b1)

43.87894296441644 8.29982032896824


In [5]:
fig = px.scatter(data, x='radio', y='sales')
fig.add_trace(go.Scatter(x=data.radio, y=Y_PRED, mode="lines", name="Regressão Linear"))

In [17]:
MSE = mean_squared_error(data.sales, Y_PRED)
R2 = r2_score(data.sales, Y_PRED)
MAE = mean_absolute_error(data.sales, Y_PRED)
MAPE = mean_absolute_percentage_error(data.sales, Y_PRED)

MSE, R2, MAE, MAPE

(2126.7500900523182, 0.736226226099673, 37.00112700809608, 0.2781558955985846)

: 

In [10]:
RESIDUALS = data.sales - Y_PRED

In [12]:
px.histogram(RESIDUALS)

In [13]:
px.scatter(Y_PRED, RESIDUALS)

Agora, vamos tentar ajustar um modelo de regressão polinomial e calcular o RSS.

In [7]:
poly = PolynomialFeatures(degree=3, include_bias=False)
poly_features = poly.fit_transform(data[X].values)

poly_reg_model = LinearRegression()
poly_reg_model.fit(poly_features, data[Y].values)

Y_PRED_POLY = poly_reg_model.predict(poly.transform(data.radio.values.reshape(-1,1)))

In [8]:
fig = px.scatter(data, x='radio', y='sales')
fig.add_trace(go.Scatter(x=data.radio, y=Y_PRED, mode="lines", name="Regressão Linear"))
fig.add_trace(go.Scatter(x=data.radio, y=Y_PRED_POLY, mode="lines", name="Regressão Polinomial"))

In [9]:
MSE = mean_squared_error(data.sales, Y_PRED_POLY)
R2 = r2_score(data.sales, Y_PRED_POLY)

MSE, R2

(2073.9805772453137, 0.7427710541004293)