In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV, train_test_split

In [2]:
df = sns.load_dataset("diamonds")

In [3]:
df.drop(['depth', 'table', 'x', 'y', 'z'], axis=1, inplace=True)

In [4]:
df = pd.get_dummies(df, drop_first=True)

In [5]:
df['carat'] = np.log(1+df['carat'])
df['price'] = np.log(1+df['price'])

In [6]:
X = df.drop(columns="price")
y = df["price"]

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [8]:
param_grid = {
    "loss": ["squared_error", "epsilon_insensitive"],
    "penalty": ["elasticnet"],
    "alpha": np.logspace(-3, 3, 15),
    "l1_ratio": np.linspace(0, 1, 11),
    "max_iter": np.logspace(0, 3, 10).astype(int),
    "random_state": [42],
    "learning_rate": ["constant"],
    "eta0": np.logspace(-4, -1, 4),
}

In [9]:
estimator = SGDRegressor()

sgd_cv = GridSearchCV(
    estimator=estimator,
    param_grid=param_grid,
    cv=5,
    n_jobs=-1
)
sgd_cv.fit(X_train, y_train)

y_pred = sgd_cv.predict(X_test)

print(mean_squared_error(y_test, y_pred))




0.043498537765514184
