In [1]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split

In [2]:
# Generate some random data for demonstration
X, y = make_regression(n_samples=100, n_features=1, noise=0.2, random_state=42)

In [3]:
X.shape

(100, 1)

In [4]:
X

array([[ 0.93128012],
       [ 0.08704707],
       [-1.05771093],
       [ 0.31424733],
       [-0.47917424],
       [ 0.64768854],
       [-0.46341769],
       [ 0.54256004],
       [ 0.61167629],
       [ 1.0035329 ],
       [ 0.8219025 ],
       [ 1.53803657],
       [ 0.73846658],
       [-0.21967189],
       [-0.8084936 ],
       [ 0.09176078],
       [-1.95967012],
       [ 0.51326743],
       [ 1.03099952],
       [-2.6197451 ],
       [ 0.49671415],
       [ 0.09707755],
       [-0.46572975],
       [ 0.91540212],
       [ 1.56464366],
       [ 1.46564877],
       [-0.60063869],
       [-0.03582604],
       [-0.60170661],
       [-1.19620662],
       [ 0.35711257],
       [ 0.37569802],
       [ 0.26105527],
       [-0.5297602 ],
       [-0.90802408],
       [ 0.19686124],
       [-0.29900735],
       [ 0.36163603],
       [ 0.82254491],
       [-0.29169375],
       [ 0.36139561],
       [-0.676922  ],
       [ 1.52302986],
       [-0.51827022],
       [-0.23415337],
       [-0

In [5]:
y.shape

(100,)

In [6]:
y

array([  39.11080232,    3.3594662 ,  -43.96538547,   13.0652237 ,
        -19.95600228,   27.12581951,  -19.55807077,   22.31235228,
         25.49086799,   41.69766791,   34.33628725,   64.45301651,
         30.9316748 ,   -8.92746991,  -34.02869364,    4.00593049,
        -81.87189508,   21.39689305,   42.7470462 , -109.52457141,
         20.654292  ,    4.02729725,  -19.6385896 ,   38.0172885 ,
         65.49805491,   61.24575648,  -24.79027875,   -1.41052104,
        -25.29202422,  -50.17435167,   15.01386367,   15.95885182,
         10.79414124,  -22.1043778 ,  -38.01686587,    7.97262989,
        -12.63525196,   14.94484971,   34.67525068,  -11.89715292,
         15.23904569,  -28.32036731,   63.53972984,  -21.61441609,
         -9.73083174,  -16.15791152,  -12.60058704,  -27.154094  ,
         13.91786905,    8.72002514,  -13.52156857,    0.35546315,
        -10.17759719,   62.23580801,  -15.56592555,   14.55177762,
         13.95082829,   -7.58012968,   -9.77342929,  -83.01123

In [7]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# Create a Linear Regression model
model = LinearRegression()

In [9]:
# Define the hyperparameters to tune
param_grid = {'fit_intercept': [True, False], 'positive': [True, False]}

In [10]:
# Perform grid search with cross-validation
grid_search = GridSearchCV(model, param_grid, cv=5, verbose=3)
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 4 candidates, totalling 20 fits
[CV 1/5] END .fit_intercept=True, positive=True;, score=1.000 total time=   0.0s
[CV 2/5] END .fit_intercept=True, positive=True;, score=1.000 total time=   0.0s
[CV 3/5] END .fit_intercept=True, positive=True;, score=1.000 total time=   0.0s
[CV 4/5] END .fit_intercept=True, positive=True;, score=1.000 total time=   0.0s
[CV 5/5] END .fit_intercept=True, positive=True;, score=1.000 total time=   0.0s
[CV 1/5] END fit_intercept=True, positive=False;, score=1.000 total time=   0.0s
[CV 2/5] END fit_intercept=True, positive=False;, score=1.000 total time=   0.0s
[CV 3/5] END fit_intercept=True, positive=False;, score=1.000 total time=   0.0s
[CV 4/5] END fit_intercept=True, positive=False;, score=1.000 total time=   0.0s
[CV 5/5] END fit_intercept=True, positive=False;, score=1.000 total time=   0.0s
[CV 1/5] END fit_intercept=False, positive=True;, score=1.000 total time=   0.0s
[CV 2/5] END fit_intercept=False, positive=True;,

In [11]:
# Get the best model
best_model = grid_search.best_estimator_

In [12]:
# Generate predictions on the test set using the best model
y_pred = best_model.predict(X_test)

In [13]:
# Calculate the mean squared error
mse = mean_squared_error(y_test, y_pred)

In [14]:
# Print the best hyperparameters and the mean squared error
print("Best Hyperparameters:", grid_search.best_params_)
print("Mean Squared Error:", mse)

Best Hyperparameters: {'fit_intercept': False, 'positive': True}
Mean Squared Error: 0.04210298183421305
