### Import Required Libraries

In [4]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt


### Prepare the data

In [9]:
def create_lagged_features_multivariate(df, target_col, lag=1):
    """
    Create lagged features for multivariate time series forecasting.
    
    Parameters:
    df (pd.DataFrame): The multivariate time series data.
    target_col (str): The name of the target column.
    lag (int): The number of lagged features to create.
    
    Returns:
    pd.DataFrame: A DataFrame with lagged features.
    """
    lagged_data = pd.DataFrame()
    for col in df.columns:
        for i in range(1, lag + 1):
            lagged_data[f'{col}_lag_{i}'] = df[col].shift(i)
    
    lagged_data[target_col] = df[target_col]
    lagged_data.dropna(inplace=True)
    return lagged_data

# Example usage:
# Assume `multivariate_data` is a pandas DataFrame containing the multivariate time series data.
file_name = 'waterTank_Golden_reduced.csv'
# data = pd.read_csv(file_name, index_col=0, header=0, parse_dates=True)
multivariate_data = pd.read_csv(file_name, index_col=0)
multivariate_data.index.freq = 'ms'
# multivariate_data = pd.DataFrame(...)
target_column = 'Tank3OutFlow'
lagged_data = create_lagged_features_multivariate(multivariate_data, target_col=target_column, lag=3)


In [20]:
lagged_data.shape

(14998, 16)

### Split the data

In [10]:
# Split the data into train and test sets
train_size = int(len(lagged_data) * 0.8)
train, test = lagged_data[:train_size], lagged_data[train_size:]

X_train, y_train = train.drop(columns=[target_column]), train[target_column]
X_test, y_test = test.drop(columns=[target_column]), test[target_column]


In [21]:
test.shape

(0, 16)

In [18]:
print(X_test.shape, y_test.shape)
print(train.shape, test.shape)
print(train_size)

(0, 15) (0,)
(14998, 16) (0, 16)
11998


### Train the Random Forest Model

In [11]:
# Initialize and train the Random Forest Regressor
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)


RandomForestRegressor(random_state=42)

### Make Predictions and Evaluate the Model

In [12]:
# Make predictions
y_pred = rf_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

# Plot the results
plt.figure(figsize=(12, 6))
plt.plot(y_test.values, label='Actual')
plt.plot(y_pred, label='Predicted')
plt.legend()
plt.show()


ValueError: Found array with 0 sample(s) (shape=(0, 15)) while a minimum of 1 is required.

### Forecast Future Values

In [None]:
def forecast_future_values_multivariate(model, initial_input, steps=10):
    """
    Forecast future values using the trained model for multivariate time series.
    
    Parameters:
    model: Trained model for forecasting.
    initial_input (np.array): The last observed data point to start forecasting.
    steps (int): The number of future steps to forecast.
    
    Returns:
    list: Forecasted values.
    """
    forecast = []
    current_input = initial_input.copy()
    
    for _ in range(steps):
        prediction = model.predict(current_input.reshape(1, -1))[0]
        forecast.append(prediction)
        
        # Update the input for the next prediction by removing the oldest lag and adding the new prediction
        current_input = np.roll(current_input, -1)
        current_input[-1] = prediction
    
    return forecast

# Example usage:
initial_input = X_test.iloc[-1].values
future_steps = 10
forecasted_values = forecast_future_values_multivariate(rf_model, initial_input, steps=future_steps)

# Plot the forecasted values
plt.figure(figsize=(12, 6))
plt.plot(range(len(y_test), len(y_test) + future_steps), forecasted_values, label='Forecasted')
plt.legend()
plt.show()
