<a href="https://colab.research.google.com/github/alvinfranklyndavis/Project2023_v3/blob/main/Project2023_v4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Upgrade pip and install required packages
!pip install -U --upgrade-strategy eager pip
!pip install -U --upgrade-strategy eager pandas gdown numpy matplotlib scikit-learn xgboost shap

# Import necessary libraries
import pandas as pd
import gdown
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestRegressor, VotingRegressor
from xgboost import XGBRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import shap

# Define the URL of the CSV file
csv_url = 'https://drive.google.com/uc?id=1o87z4evvCLwBtqX8ocZl3I2nIDYS8mtH'

# Define the local file path to save the CSV
csv_path = 'Training_Testing_Hybrid_Mod.csv'

# Download the CSV file from the Google Drive link
gdown.download(csv_url, csv_path, quiet=False)

# Load the dataset into a Pandas DataFrame
df = pd.read_csv(csv_path)

# Display the first few rows and data types
print(df.head())
print(df.dtypes)


[0m

Downloading...
From: https://drive.google.com/uc?id=1o87z4evvCLwBtqX8ocZl3I2nIDYS8mtH
To: /content/Training_Testing_Hybrid_Mod.csv
100%|██████████| 71.9k/71.9k [00:00<00:00, 3.79MB/s]

       Date  Day of the Week  Morning  Prev_Week  Rep_Prev_Week  \
0  8/1/2018                3       19          7              0   
1  8/2/2018                4       31         11              0   
2  8/3/2018                5       15         19              0   
3  8/4/2018                6       31         35              0   
4  8/6/2018                1       31         18              0   

   Rep_Prev_Entry  Afternoon  Prev_Week.1  Rep_Prev_Week.1  Rep_Prev_Entry.1  \
0               0         14           13                0                 0   
1               0          3           21                0                 0   
2               0          9           19                0                 0   
3               0         21           20                0                 0   
4               0         31           30                0                 1   

   Evening  Prev_Week.2  Rep_Prev_Week.2  Rep_Prev_Entry.2  Night  \
0       33           28                0       




In [2]:
# Convert 'Date' column to datetime type
df['Date'] = pd.to_datetime(df['Date'])

# Extract relevant date features
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
df['Day'] = df['Date'].dt.day

# Drop the original 'Date' column
df = df.drop(columns=['Date'])

# Create Target Variable columns
df['Prediction1'] = np.nan
df['Prediction2'] = np.nan
df['Prediction3'] = np.nan
df['Prediction4'] = np.nan

# Display the updated DataFrame with target variables
print(df.head())
print(df.dtypes)

   Day of the Week  Morning  Prev_Week  Rep_Prev_Week  Rep_Prev_Entry  \
0                3       19          7              0               0   
1                4       31         11              0               0   
2                5       15         19              0               0   
3                6       31         35              0               0   
4                1       31         18              0               0   

   Afternoon  Prev_Week.1  Rep_Prev_Week.1  Rep_Prev_Entry.1  Evening  ...  \
0         14           13                0                 0       33  ...   
1          3           21                0                 0       35  ...   
2          9           19                0                 0       23  ...   
3         21           20                0                 0       29  ...   
4         31           30                0                 1       15  ...   

   Prev_Week.3  Rep_Prev_Week.3  Rep_Prev_Entry.3  Year  Month  Day  \
0            7       

In [3]:
# Check for missing values
missing_values = df.isnull().sum()

# Display the count of missing values for each column
print("Missing Values:\n", missing_values)

# Identify missing values in target variables
missing_values_targets = df[['Prediction1', 'Prediction2', 'Prediction3', 'Prediction4']].isnull().sum()
print("Missing Values in Target Variables:\n", missing_values_targets)

# Iterate through each row with missing values in target variables
for index, row in df[df[['Prediction1', 'Prediction2', 'Prediction3', 'Prediction4']].isnull().any(axis=1)].iterrows():
    # Fill missing values with the corresponding historical entry
    df.at[index, 'Prediction1'] = df.at[index, 'Morning']
    df.at[index, 'Prediction2'] = df.at[index, 'Afternoon']
    df.at[index, 'Prediction3'] = df.at[index, 'Evening']
    df.at[index, 'Prediction4'] = df.at[index, 'Night']

# Display the updated DataFrame
print(df.head())


Missing Values:
 Day of the Week        0
Morning                0
Prev_Week              0
Rep_Prev_Week          0
Rep_Prev_Entry         0
Afternoon              0
Prev_Week.1            0
Rep_Prev_Week.1        0
Rep_Prev_Entry.1       0
Evening                0
Prev_Week.2            0
Rep_Prev_Week.2        0
Rep_Prev_Entry.2       0
Night                  0
Prev_Week.3            0
Rep_Prev_Week.3        0
Rep_Prev_Entry.3       0
Year                   0
Month                  0
Day                    0
Prediction1         1409
Prediction2         1409
Prediction3         1409
Prediction4         1409
dtype: int64
Missing Values in Target Variables:
 Prediction1    1409
Prediction2    1409
Prediction3    1409
Prediction4    1409
dtype: int64
   Day of the Week  Morning  Prev_Week  Rep_Prev_Week  Rep_Prev_Entry  \
0                3       19          7              0               0   
1                4       31         11              0               0   
2                5   

In [4]:
# Split the data into features (X) and target variables (y)
X = df[['Day of the Week', 'Morning', 'Prev_Week', 'Rep_Prev_Week', 'Rep_Prev_Entry', 'Afternoon', 'Prev_Week.1', 'Rep_Prev_Week.1', 'Rep_Prev_Entry.1', 'Evening', 'Prev_Week.2', 'Rep_Prev_Week.2', 'Rep_Prev_Entry.2', 'Night', 'Prev_Week.3', 'Rep_Prev_Week.3', 'Rep_Prev_Entry.3', 'Year', 'Month', 'Day']]
y = df[['Prediction1', 'Prediction2', 'Prediction3', 'Prediction4']]

# Use an 80/20 split for training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Confirm the shapes of X_train, X_test, y_train, and y_test
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)


X_train shape: (1127, 20)
X_test shape: (282, 20)
y_train shape: (1127, 4)
y_test shape: (282, 4)


In [5]:
# Check for missing values
missing_values = df.isnull().sum()

# Display the count of missing values for each column
print("Missing Values:\n", missing_values)

# Display the count of missing values in the training sets
print("Missing Values in X_train:\n", X_train.isnull().sum())
print("\nMissing Values in y_train:\n", y_train.isnull().sum())


Missing Values:
 Day of the Week     0
Morning             0
Prev_Week           0
Rep_Prev_Week       0
Rep_Prev_Entry      0
Afternoon           0
Prev_Week.1         0
Rep_Prev_Week.1     0
Rep_Prev_Entry.1    0
Evening             0
Prev_Week.2         0
Rep_Prev_Week.2     0
Rep_Prev_Entry.2    0
Night               0
Prev_Week.3         0
Rep_Prev_Week.3     0
Rep_Prev_Entry.3    0
Year                0
Month               0
Day                 0
Prediction1         0
Prediction2         0
Prediction3         0
Prediction4         0
dtype: int64
Missing Values in X_train:
 Day of the Week     0
Morning             0
Prev_Week           0
Rep_Prev_Week       0
Rep_Prev_Entry      0
Afternoon           0
Prev_Week.1         0
Rep_Prev_Week.1     0
Rep_Prev_Entry.1    0
Evening             0
Prev_Week.2         0
Rep_Prev_Week.2     0
Rep_Prev_Entry.2    0
Night               0
Prev_Week.3         0
Rep_Prev_Week.3     0
Rep_Prev_Entry.3    0
Year                0
Month             

In [6]:
print("y_train shape:", y_train.shape)


y_train shape: (1127, 4)


In [7]:
try:
    # Import necessary libraries
    from sklearn.ensemble import RandomForestRegressor, VotingRegressor
    from xgboost import XGBRegressor
    from sklearn.model_selection import cross_val_score
    from sklearn.metrics import mean_squared_error, r2_score
    import numpy as np
except Exception as e:
    print(f"Error during library import: {e}")


In [8]:
try:
    # Initialize individual models
    rf_model = RandomForestRegressor(random_state=42, n_estimators=100)
    xgb_model = XGBRegressor(objective='reg:squarederror', random_state=42)

    # Fit each individual model for Prediction1
    rf_model.fit(X_train, y_train['Prediction1'])
    xgb_model.fit(X_train, y_train['Prediction1'])

    # Fit each individual model for Prediction2
    rf_model.fit(X_train, y_train['Prediction2'])
    xgb_model.fit(X_train, y_train['Prediction2'])

    # Fit each individual model for Prediction3
    rf_model.fit(X_train, y_train['Prediction3'])
    xgb_model.fit(X_train, y_train['Prediction3'])

    # Fit each individual model for Prediction4
    rf_model.fit(X_train, y_train['Prediction4'])
    xgb_model.fit(X_train, y_train['Prediction4'])
except Exception as e:
    print(f"Error during model initialization and fitting: {e}")


In [9]:
try:
    # Create a VotingRegressor with the specified models
    ensemble_model = VotingRegressor(estimators=[
        ('rf', rf_model),
        ('xgb', xgb_model)
    ])
except Exception as e:
    print(f"Error during ensemble model creation: {e}")


In [10]:
try:
    # Use the individual models to create inputs for the ensemble model
    rf_predictions = rf_model.predict(X_train)
    xgb_predictions = xgb_model.predict(X_train)
    ensemble_X_train = np.column_stack((rf_predictions, xgb_predictions))
except Exception as e:
    print(f"Error during prediction and stacking: {e}")


In [11]:
try:
    # Ensure y_train_ensemble has the same number of elements as X_train
    # Flatten y_train to ensure consistency
    y_train_ensemble = y_train[['Prediction1', 'Prediction2', 'Prediction3', 'Prediction4']].values.ravel()

    # Check if the sizes match
    if ensemble_X_train.shape[0] != len(y_train_ensemble):
        # Handle the size mismatch (e.g., by truncating or padding)
        min_size = min(ensemble_X_train.shape[0], len(y_train_ensemble))
        ensemble_X_train = ensemble_X_train[:min_size, :]
        y_train_ensemble = y_train_ensemble[:min_size]
except Exception as e:
    print(f"Error during data preparation: {e}")


In [12]:
try:
    # Fit the ensemble model
    ensemble_model.fit(ensemble_X_train, y_train_ensemble)

    print("Shape of ensemble_X_train:", ensemble_X_train.shape)
    print("Length of y_train_ensemble:", len(y_train_ensemble))
except Exception as e:
    print(f"Error during ensemble model fitting: {e}")


Shape of ensemble_X_train: (1127, 2)
Length of y_train_ensemble: 1127


In [13]:
try:
    # Import KFold for cross-validation
    from sklearn.model_selection import KFold
except Exception as e:
    print(f"Error during KFold import: {e}")


In [14]:
try:
    def evaluate_model(model, X, y, cv=5):
        """
        Evaluate the performance of a predictive model.

        Parameters:
        - model: The predictive model to be evaluated.
        - X: The input features for evaluation.
        - y: The target variables for evaluation.
        - cv: Number of cross-validation folds.

        Returns:
        A dictionary containing evaluation metrics.
        """
        kf = KFold(n_splits=cv, shuffle=True, random_state=42)

        # Initialize evaluation metrics
        mse_5fold = []
        mse_10fold = []
        r2_scores = []

        # Loop through each target variable
        for i in range(y.shape[1]):
            # Initialize scores
            mse_5fold_i = []
            mse_10fold_i = []
            r2_scores_i = []

            # Perform cross-validation
            for train_idx, test_idx in kf.split(X):
                X_train_fold, X_test_fold = X[train_idx], X[test_idx]
                y_train_fold, y_test_fold = y[train_idx, i], y[test_idx, i]

                # Fit the model
                model.fit(X_train_fold, y_train_fold)

                # Predict on the test fold
                y_pred_fold = model.predict(X_test_fold)

                # Calculate MSE
                mse_fold = mean_squared_error(y_test_fold, y_pred_fold)
                if len(mse_5fold_i) < 5:
                    mse_5fold_i.append(mse_fold)
                mse_10fold_i.append(mse_fold)

                # R-squared score
                r2_fold = r2_score(y_test_fold, y_pred_fold)
                r2_scores_i.append(r2_fold)

            # Average scores over folds
            mse_5fold.append(np.mean(mse_5fold_i))
            mse_10fold.append(np.mean(mse_10fold_i))
            r2_scores.append(np.mean(r2_scores_i))

        return {
            '5-Fold Cross-Validation MSE': mse_5fold,
            '10-Fold Cross-Validation MSE': mse_10fold,
            'R-squared Score': r2_scores
        }
except Exception as e:
    print(f"Error during model evaluation function definition: {e}")


In [15]:
# Assuming 'Prediction1', 'Prediction2', 'Prediction3', 'Prediction4' are your target variable names
# and 'df' is your DataFrame

# Step 1: Check data types of keys
row_keys = ['Prediction1', 'Prediction2', 'Prediction3', 'Prediction4']
row_keys_data_types = [type(key) for key in row_keys]
print("Data types of keys:", row_keys_data_types)

# Step 2: Convert keys to int if needed
try:
    row_keys_as_int = [int(key[10:]) for key in row_keys]
except ValueError as e:
    print(f"Error converting keys to int: {e}")
    # Handle the error as needed

# Step 3: Check data type of the index
try:
    index_data_type = type(df.index[0])
except IndexError as e:
    print(f"Error accessing index: {e}")
    # Handle the error as needed

print("Data type of the index:", index_data_type)


Data types of keys: [<class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>]
Data type of the index: <class 'int'>


In [16]:
# Step 4: Check for missing or invalid values in the index
try:
    missing_values = df.index.isnull().sum()  # Check for NaN values
    invalid_values = df.index[~df.index.isin(row_keys_as_int)]  # Check for values not in keys_as_int
except Exception as e:
    print(f"Error checking index values: {e}")
    # Handle the error as needed

# Step 5: Convert index values to integers
try:
    df.index = df.index.astype(int)
except Exception as e:
    print(f"Error converting index values to integers: {e}")
    # Handle the error as needed

print("Number of missing values in the index:", missing_values)
print("Invalid values in the index:", invalid_values)


Number of missing values in the index: 0
Invalid values in the index: Index([   0,    5,    6,    7,    8,    9,   10,   11,   12,   13,
       ...
       1399, 1400, 1401, 1402, 1403, 1404, 1405, 1406, 1407, 1408],
      dtype='int64', length=1405)


In [17]:
try:
    # Evaluate the ensemble model
    ensemble_evaluation = evaluate_model(ensemble_model, ensemble_X_train, y_train.iloc[:, :2])

    # Display the evaluation metrics
    for metric, values in ensemble_evaluation.items():
        print(f"{metric}: {values}")

except Exception as e:
    print(f"Error during ensemble model evaluation: {e}")

    # Print specific rows in y_train using the problematic indices
    if isinstance(e, tuple) and len(e) == 2 and isinstance(e[0], np.ndarray) and e[1] == 0:
        problematic_indices = e[0]
        problematic_rows = y_train.iloc[problematic_indices[0], :2]
        print("Problematic Rows in y_train:")
        print(problematic_rows)

    else:
        print("Unable to retrieve problematic rows. Check the error type and message.")



Error during ensemble model evaluation: (array([   0,    1,    2,    4,    5,    6,    7,    8,    9,   11,   13,
         14,   15,   16,   17,   18,   19,   20,   21,   22,   24,   25,
         26,   27,   28,   29,   32,   33,   34,   35,   36,   37,   38,
         40,   41,   42,   43,   45,   46,   47,   48,   50,   52,   53,
         55,   57,   60,   61,   62,   64,   65,   68,   69,   71,   72,
         73,   74,   75,   77,   79,   80,   81,   82,   84,   85,   87,
         89,   90,   91,   92,   93,   94,   95,   97,   98,   99,  102,
        103,  104,  105,  106,  108,  110,  111,  112,  114,  115,  116,
        117,  118,  119,  120,  121,  122,  123,  124,  125,  126,  127,
        129,  130,  131,  132,  133,  134,  135,  137,  142,  143,  144,
        145,  146,  147,  148,  149,  150,  151,  152,  153,  154,  155,
        157,  159,  160,  161,  162,  163,  164,  165,  166,  167,  169,
        170,  171,  172,  173,  175,  176,  177,  179,  180,  181,  182,
        18

In [18]:
try:
    # Step 1: Initial Training with warm_start
    # Train the RandomForestRegressor with a small number of trees using placeholder values
    # Adjust the number of trees and other hyperparameters as needed
    rf_model.fit(X_train, y_train.iloc[:, 0])

    # Evaluate its performance on the test set
    y_pred_initial = rf_model.predict(X_test.iloc[:, :20])  # Assuming the first target variable is Prediction1
    mse_initial = mean_squared_error(y_test.iloc[:, 0], y_pred_initial)
    print("Mean Squared Error after Initial Training:", mse_initial)
except Exception as e:
    print(f"Error during initial training and evaluation: {e}")


Mean Squared Error after Initial Training: 0.0


In [19]:
try:
    # Step 2: Transition to Two-Step Approach
    # Randomize the placeholder values for each target variable
    y_train_randomized = y_train.apply(np.random.permutation, axis=0)

    # Train the models on the randomized values
    rf_model.fit(X_train, y_train_randomized.iloc[:, 0])
    xgb_model.fit(X_train, y_train_randomized.iloc[:, 1])

    # Evaluate their performance on the test set
    y_pred_rf_randomized = rf_model.predict(X_test.iloc[:, :20])  # Assuming the first target variable is Prediction1
    y_pred_xgb_randomized = xgb_model.predict(X_test)
    mse_rf_randomized = mean_squared_error(y_test.iloc[:, 0], y_pred_rf_randomized)
    mse_xgb_randomized = mean_squared_error(y_test.iloc[:, 1], y_pred_xgb_randomized)
    print("Mean Squared Error after Training with Randomized Placeholders (RF):", mse_rf_randomized)
    print("Mean Squared Error after Training with Randomized Placeholders (XGB):", mse_xgb_randomized)
except Exception as e:
    print(f"Error during training with randomized placeholders and evaluation: {e}")


Mean Squared Error after Training with Randomized Placeholders (RF): 115.64086028368794
Mean Squared Error after Training with Randomized Placeholders (XGB): 146.46224526226553


In [20]:
try:
    # Step 3: Fine-Tuning with Actual Target Variables
    # Fine-tune the models using the actual values
    rf_model.fit(X_train, y_train.iloc[:, 0])
    xgb_model.fit(X_train, y_train.iloc[:, 1])

    # Evaluate their final performance on the test set
    y_pred_rf_final = rf_model.predict(X_test.iloc[:, :20])  # Assuming the first target variable is Prediction1
    y_pred_xgb_final = xgb_model.predict(X_test)
    mse_rf_final = mean_squared_error(y_test.iloc[:, 0], y_pred_rf_final)
    mse_xgb_final = mean_squared_error(y_test.iloc[:, 1], y_pred_xgb_final)
    print("Mean Squared Error after Fine-Tuning with Actual Target Variables (RF):", mse_rf_final)
    print("Mean Squared Error after Fine-Tuning with Actual Target Variables (XGB):", mse_xgb_final)
except Exception as e:
    print(f"Error during fine-tuning and final evaluation: {e}")


Mean Squared Error after Fine-Tuning with Actual Target Variables (RF): 0.0
Mean Squared Error after Fine-Tuning with Actual Target Variables (XGB): 1.2040056856947442e-08


In [21]:
print(y_train)

      Prediction1  Prediction2  Prediction3  Prediction4
1034         34.0         26.0         12.0         29.0
579           8.0         22.0          9.0         11.0
1138         35.0          2.0         12.0         18.0
48            8.0         24.0          3.0         24.0
155          19.0         26.0         22.0         28.0
...           ...          ...          ...          ...
1095         11.0         15.0          6.0         32.0
1130          6.0         21.0         18.0         20.0
1294         16.0         20.0          2.0         31.0
860          32.0         11.0          7.0         30.0
1126         29.0         22.0         35.0          7.0

[1127 rows x 4 columns]


In [22]:
# Assuming 'Prediction1', 'Prediction2', 'Prediction3', 'Prediction4' are your target variable names
# and 'df' is your DataFrame

# Step 1: Check data types of keys
row_keys = ['Prediction1', 'Prediction2', 'Prediction3', 'Prediction4']
row_keys_data_types = [type(key) for key in row_keys]
print("Data types of keys:", row_keys_data_types)

# Step 2: Check data type of the index
index_data_type = type(df.index[0])
print("Data type of the index:", index_data_type)


Data types of keys: [<class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>]
Data type of the index: <class 'int'>


In [23]:
# Define a function for model evaluations
def evaluate_model(model, X, y, cv=5):
    # Ensure the model is fitted
    model.fit(X, y)

    # Initialize evaluation metrics
    mse_5fold = []
    mse_10fold = []
    r2_scores = []

    # Loop through each target variable
    for i in range(y.shape[1]):
        # 5-fold cross-validation
        scores_5fold = cross_val_score(model, X, y.iloc[:, i], cv=cv, scoring='neg_mean_squared_error')
        mse_5fold.append(-scores_5fold.mean())

        # 10-fold cross-validation
        scores_10fold = cross_val_score(model, X, y.iloc[:, i], cv=10, scoring='neg_mean_squared_error')
        mse_10fold.append(-scores_10fold.mean())

        # Ensure the model is fitted
        model.fit(X, y.iloc[:, i])

        # R-squared score
        y_pred = model.predict(X)
        r2 = r2_score(y.iloc[:, i], y_pred)
        r2_scores.append(r2)

    return {
        '5-Fold Cross-Validation MSE': mse_5fold,
        '10-Fold Cross-Validation MSE': mse_10fold,
        'R-squared Score': r2_scores
    }


In [24]:
# Fit the ensemble model
ensemble_model.fit(X_train, y_train)

# Evaluate the ensemble model
ensemble_evaluation = evaluate_model(ensemble_model, X_train, y_train)

# Display the evaluation metrics
for metric, values in ensemble_evaluation.items():
    print(f"{metric}: {values}")


ValueError: ignored

In [None]:
# Evaluate the ensemble model
evaluation_results = evaluate_model(ensemble_model, X_train, y_train)

# Access the results from the evaluation
mse_5fold = evaluation_results['5-Fold Cross-Validation MSE']
mse_10fold = evaluation_results['10-Fold Cross-Validation MSE']
r2_scores = evaluation_results['R-squared Score']

# Display the results
for i in range(len(mse_5fold)):
    print(f"Target Variable {i + 1}:")
    print(f"5-Fold Cross-Validation MSE: {mse_5fold[i]}")
    print(f"10-Fold Cross-Validation MSE: {mse_10fold[i]}")
    print(f"R-squared Score: {r2_scores[i]}")
    print()


In [None]:
# Check the shape of the predictions array
print("Shape of predictions array:", ensemble_model.predict(X_train).shape)


In [None]:
# Task 1.1: Analyze Predictions and Visualize Results
import matplotlib.pyplot as plt
import numpy as np

def analyze_predictions(model, X, y):
    predictions = model.predict(X)

    # Check if predictions are 1-dimensional
    if len(predictions.shape) == 1:
        predictions = predictions.reshape(-1, 1)

    print("Shapes - y:", y.shape, "predictions:", predictions.shape)  # Add this line for debugging

    # Create subplots for each target variable
    n_targets = y.shape[1]
    n_subplots = min(n_targets * 2, 8)  # Limit to 8 subplots for better visualization
    fig, axes = plt.subplots(nrows=n_subplots // 2, ncols=2, figsize=(15, 5 * (n_subplots // 2)))

    # Flatten the axes array to handle the case of one target variable
    axes = np.array(axes).flatten()

    # Loop through each target variable
    for i in range(n_targets):
        # Check if there are available subplots
        if i * 2 < n_subplots:
            # Plot predicted vs. actual values
            axes[i * 2].scatter(y.iloc[:, i], predictions[:, i], alpha=0.5)
            axes[i * 2].set_title(f'Target Variable {i + 1}: Predicted vs. Actual')
            axes[i * 2].set_xlabel('Actual Values')
            axes[i * 2].set_ylabel('Predicted Values')

        # Check if there are available subplots for residuals
        if i * 2 + 1 < n_subplots:
            # Plot residuals
            residuals = y.iloc[:, i] - predictions[:, i]
            axes[i * 2 + 1].scatter(predictions[:, i], residuals, alpha=0.5)
            axes[i * 2 + 1].set_title(f'Target Variable {i + 1}: Residuals Plot')
            axes[i * 2 + 1].set_xlabel('Predicted Values')
            axes[i * 2 + 1].set_ylabel('Residuals')
            axes[i * 2 + 1].axhline(y=0, color='red', linestyle='--')  # Add horizontal line at y=0

    plt.tight_layout()
    plt.show()

# Call the function with your ensemble model and training data
analyze_predictions(ensemble_model, X_train, y_train)
