#Linear Regression, Ridge Regression and Lasso Regression_Xiao Fan

In [5]:
import pandas as pd
from sklearn.model_selection import cross_val_score, KFold
from sklearn.linear_model import Lasso, LinearRegression, Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error

train_data = pd.read_csv('/kaggle/input/dm-dataset-2/model_dataset.csv')
# Prepare the features and target variable
X = train_data.drop(['id', 'score'], axis=1)
y = train_data['score']

# Create a cross-validation scheme
kf = KFold(n_splits=10, shuffle=True, random_state=42)

# Define a function to evaluate the model and print MSE
def evaluate_model(pipeline, X, y, kf):
    scores = cross_val_score(pipeline, X, y, cv=kf, scoring='neg_mean_squared_error')
    mean_mse = -scores.mean()
    return mean_mse

# Create and evaluate a Lasso regression pipeline
lasso_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('lasso', Lasso(alpha=0.5, random_state=42))
])
mean_mse_lasso = evaluate_model(lasso_pipeline, X, y, kf)

# Create and evaluate a Linear Regression pipeline
linear_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('linear', LinearRegression())
])
mean_mse_linear = evaluate_model(linear_pipeline, X, y, kf)

# Create and evaluate a Ridge regression pipeline
ridge_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('ridge', Ridge(alpha=1.0, random_state=42))
])
mean_mse_ridge = evaluate_model(ridge_pipeline, X, y, kf)

# Print the MSE for each model
#print(f"Lasso Regression Mean Squared Error (Cross-Validated): {mean_mse_lasso}")
#print(f"Linear Regression Mean Squared Error (Cross-Validated): {mean_mse_linear}")
#print(f"Ridge Regression Mean Squared Error (Cross-Validated): {mean_mse_ridge}")


# Function to perform cross-validation and calculate MAE
def evaluate_model_mae(pipeline, X, y, kf):
    mae_scores = []
    for train_index, test_index in kf.split(X):
        # Split data into training and test sets
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
        
        # Fit the model
        pipeline.fit(X_train, y_train)
        
        # Make predictions
        y_pred = pipeline.predict(X_test)
        
        # Calculate MAE
        mae = mean_absolute_error(y_test, y_pred)
        mae_scores.append(mae)
    
    # Return the average MAE
    return sum(mae_scores) / len(mae_scores)

# Evaluate each model and calculate MAE
mae_lasso = evaluate_model_mae(lasso_pipeline, X, y, kf)
mae_linear = evaluate_model_mae(linear_pipeline, X, y, kf)
mae_ridge = evaluate_model_mae(ridge_pipeline, X, y, kf)

# Print the MAE for each model
mae_lasso, mae_linear, mae_ridge


(0.7175300957316056, 0.5615904220409902, 0.5607865463065497)