In [9]:
# Load libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score, KFold
from sklearn.metrics import mean_squared_error 
from sklearn.linear_model import LinearRegression, RidgeCV
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer 
from sklearn.pipeline import Pipeline

In [2]:
# Load data
datos = pd.read_csv("yield.csv")

In [3]:
# Define roles for diamantes1.csv
y = datos.Yield
X = datos.drop(datos.columns[[57, 58]], axis=1)

In [4]:
# Define the preprocessing pipeline
categorical_features = X.select_dtypes(include=['object','category']).columns
numeric_features = X.select_dtypes(exclude=['object','category']).columns

preprocessor = ColumnTransformer(
        transformers=[
            ('cat',OneHotEncoder(handle_unknown='ignore',sparse_output=False),categorical_features),
            ('num',StandardScaler(),numeric_features)
        ]
)

In [5]:
# Combine preprocessing model and the linear regression model into a single pipeline
modelo_lr = Pipeline(steps=[
    ('preprocessor',preprocessor),
    ('linreg',LinearRegression())
])

In [6]:
# Setup 10-fold cross-validation
random_seed = 1
kf = KFold(n_splits=10,shuffle=True,random_state=random_seed)
scores = cross_val_score(modelo_lr,X,y,cv=kf,scoring='neg_mean_squared_error')
rmse_lr = np.sqrt(np.mean(-1*scores))
rmse_lr

1.4378914851279516

In [13]:
# RidgeCV
alphas = np.logspace(-6, 6, 30)

modelo_ridge= Pipeline(steps=[
    ('preprocessor',preprocessor),
    ('ridge',RidgeCV(alphas=alphas, store_cv_values=True))
])


# Cross-validation for RidgeCV
scores_ridge = cross_val_score(modelo_ridge, X, y, cv=kf, scoring='neg_mean_squared_error')
rmse_ridge = np.sqrt(np.mean(-scores_ridge))



In [14]:
rmse_ridge

1.9945218293760152

In [15]:
# Fit RidgeCV to find the best alpha
modelo_ridge.fit(X, y)
best_alpha = modelo_ridge.alpha_

AttributeError: 'Pipeline' object has no attribute 'alpha_'

In [None]:
print(f'RMSE for Ridge Regression: {rmse_ridge}')
print(f'Best alpha for Ridge Regression: {best_alpha}')