In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

def perform_ridge_regression(file_paths, target_columns, alpha=1.0, test_size=0.2, random_state=42):
    # List to store individual DataFrames
    dfs = []

    # Iterate over file paths and target variable column names
    for file_path, target_column in zip(file_paths, target_columns):
        # Read CSV file
        df = pd.read_csv(file_path)
        
        # Perform any necessary operations specific to each structure
        
        # Append the processed DataFrame to the list
        dfs.append(df)

    # Concatenate all DataFrames into one
    df = pd.concat(dfs, axis=0, ignore_index=True)

    # Handle missing values if any
    df = df.dropna()

    # Split the data into features (X) and target variable (y)
    X = df.drop(target_columns, axis=1)
    y = df[target_columns]

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    if len(X_train) == 0:
        raise ValueError("The resulting train set is empty. Adjust test_size or provide more data.")

    # Standardize the features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Initialize and train the Ridge regression model
    ridge_model = Ridge(alpha=alpha)
    ridge_model.fit(X_train_scaled, y_train)

    # Make predictions on the test set
    y_pred = ridge_model.predict(X_test_scaled)

    # Evaluate the model
    mse = mean_squared_error(y_test, y_pred)
    print(f'Mean Squared Error: {mse}')
    
    # Plot predicted vs actual values
    plt.scatter(y_test, y_pred)
    plt.xlabel("Actual Values")
    plt.ylabel("Predicted Values")
    plt.title("Ridge Regression: Actual vs Predicted")
    plt.show()


# Example usage:
file_paths = ['Coffee.csv', 'GDP.csv', 'Zillow.csv']
target_columns = ['PI', 'GDPC1', 'ZHVI']
perform_ridge_regression(file_paths, target_columns, alpha=1.0, test_size=0.2, random_state=42)


ValueError: With n_samples=0, test_size=0.2 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.