In [1]:
import numpy as np
import pandas as pd
import plotly.io as pio
import plotly.express as px
import plotly.graph_objects as go
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

pio.templates.default = "plotly_dark"

In [2]:
# Leitura do dataset
DATASET_PATH = '../data/salary_sales.csv'
data = pd.read_csv(DATASET_PATH).dropna()
data = data[~((data['EstimatedSalary']<=60000)&(data['Purchased']==1))]
data = data[~((data['EstimatedSalary']>=80000)&(data['Purchased']==0))]
data = data.sort_values(by='EstimatedSalary').reset_index(drop=True)
data.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15610140,Female,31,15000,0
1,15792008,Male,30,15000,0
2,15809823,Male,26,15000,0
3,15679651,Female,26,15000,0
4,15685576,Male,26,16000,0


In [3]:
px.scatter(data, 'EstimatedSalary' , 'Purchased')

In [4]:
# Variáveis: X -> preditora e Y -> resposta
X = ['EstimatedSalary']
Y = 'Purchased'

# Ajustando o modelo
model = LogisticRegression(penalty='l1', solver='liblinear')
model.fit(data[X].values, data[Y].values)

# Salvando os coeficientes
b0 = model.intercept_
b1 = model.coef_[0]

Y_PRED = model.predict_proba(data[X].__array__())
Y_PRED = np.delete(Y_PRED, 0, 1).reshape(1, -1)[0]

print(b0, b1)

[-13.85528893] [0.00017103]



Liblinear failed to converge, increase the number of iterations.



In [5]:
# Ajustando o modelo
linear_model = LinearRegression()
linear_model.fit(data[X].values, data[Y].values)

# Salvando os coeficientes
b0 = linear_model.intercept_
b1 = linear_model.coef_[0]

Y_PRED_LINEAR = linear_model.predict(data[X].values.reshape(-1,1))

print(b0, b1)

-0.4491944652360248 1.0813376800128682e-05


In [6]:
fig = px.scatter(data, 'EstimatedSalary' , 'Purchased')
fig.add_trace(go.Scatter(x=data.EstimatedSalary, y=Y_PRED, mode="lines", name="Regressão Logística"))
fig.add_trace(go.Scatter(x=data.EstimatedSalary, y=Y_PRED_LINEAR, mode="lines", name="Regressão Linear"))

In [10]:
RESIDUALS = data.Purchased - Y_PRED_LINEAR

In [14]:
px.scatter(Y_PRED_LINEAR, RESIDUALS)

: 

In [None]:
px.histogram(RESIDUALS)