In [140]:
import numpy as np
import pandas as pd
from sklearn.linear_model import Lasso
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

In [141]:
np.random.seed(42)

n_samples = 1000

df = pd.DataFrame({
    'Revenue': np.random.uniform(50, 200, n_samples), 
    'Operating_Costs': np.random.uniform(20, 100, n_samples),  
    'Marketing_Spend': np.random.uniform(5, 50, n_samples),
    'Market_Share': np.random.uniform(5, 30, n_samples),  
})

df['Net_Income'] = (
    0.5 * df['Revenue'] - 0.3 * df['Operating_Costs'] + 0.8 * df['Marketing_Spend'] + 
    2 * df['Market_Share'] + np.random.normal(0, 5, n_samples)  
)

df.head()

Unnamed: 0,Revenue,Operating_Costs,Marketing_Spend,Market_Share,Net_Income
0,106.181018,34.810634,16.776756,21.817575,106.347077
1,192.607146,63.352076,16.114046,24.917035,141.589179
2,159.799091,89.835667,45.781456,11.261697,109.064888
3,139.798773,78.577991,16.229579,20.621852,102.832878
4,73.402796,84.524892,17.237738,19.29365,61.425968


In [142]:
X = df.drop('Net_Income', axis=1)
y = df['Net_Income']

In [143]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [144]:
pipeline = Pipeline([
    ('scale', StandardScaler()),
    ('lasso', Lasso(max_iter=10000))
])

In [145]:
param_grid = {  
    'lasso__alpha': [0.001, 0.01, 0.1, 1, 10] 
}

In [146]:
grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='neg_mean_squared_error')

In [147]:
grid_search.fit(X_train, y_train)

In [148]:
print("Melhores parâmetros:", grid_search.best_params_)
print("Melhor Score (neg_mean_squared_error):", grid_search.best_score_)

Melhores parâmetros: {'lasso__alpha': 0.001}
Melhor Score (neg_mean_squared_error): -26.181840502922178


In [149]:
y_pred = grid_search.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("Mean Squared Error no teste:", mse)
print("R2 no teste:", r2)

Mean Squared Error no teste: 26.288665174113472
R2 no teste: 0.9660752039005878
