In [2]:
import pandas as pd
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Load the dataset
data = pd.read_csv('train.csv')

# Define the target variable and the features to be used for training
target_var = 'Weekly_Sales'
features = ['Store', 'Dept', 'IsHoliday', 'Size', 'Temperature', 'Fuel_Price', 'CPI', 'Unemployment']

# Split the data into training and validation sets
train_data, val_data = train_test_split(data, test_size=0.2, random_state=42)

# Split the training and validation data into target variable and features
X_train = train_data[features]
y_train = train_data[target_var]
X_val = val_data[features]
y_val = val_data[target_var]

# Train the ridge regression model with hyperparameter tuning
alpha_list = [0.01, 0.1, 1, 10, 100]
rmse_list = []

for alpha in alpha_list:
    ridge_model = Ridge(alpha=alpha, random_state=42)
    ridge_model.fit(X_train, y_train)
    y_pred = ridge_model.predict(X_val)
    rmse = mean_squared_error(y_val, y_pred, squared=False)
    rmse_list.append(rmse)

# Choose the best alpha based on validation RMSE
best_alpha = alpha_list[rmse_list.index(min(rmse_list))]

# Train the final ridge regression model with the best alpha
ridge_model = Ridge(alpha=best_alpha, random_state=42)
ridge_model.fit(X_train, y_train)

# Make predictions on the test data
test_data = pd.read_csv('test.csv')
X_test = test_data[features]
y_pred = ridge_model.predict(X_test)



KeyboardInterrupt: 