In [None]:
"""
Program: Dynamic Factor Models for Productivity Prediction
Description: Analyze and predict worker productivity using Dynamic Factor Models.
Requirements: Install `statsmodels`, `pandas`, and `matplotlib` via pip.
"""

import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.dynamic_factor import DynamicFactor
from sklearn.metrics import mean_squared_error


In [None]:
def load_data(file_path):
    """
    Load the synthetic worker productivity dataset from a CSV file.

    Parameters:
        file_path (str): Path to the CSV file containing the dataset.

    Returns:
        pd.DataFrame: A DataFrame containing the productivity data.
    """
    return pd.read_csv(file_path, index_col=0, parse_dates=True)

def split_data(data, train_end_date):
    """
    Split the dataset into training and testing sets based on a cutoff date.

    Parameters:
        data (pd.DataFrame): The complete dataset.
        train_end_date (str): The end date for the training set in 'YYYY-MM-DD' format.

    Returns:
        pd.DataFrame, pd.DataFrame: Training and testing datasets.
    """
    train_data = data.loc[:train_end_date]
    test_data = data.loc[train_end_date:]
    return train_data, test_data

def fit_dynamic_factor_model(train_data, factors=1, factor_order=1):
    """
    Fit a Dynamic Factor Model to the training dataset.

    Parameters:
        train_data (pd.DataFrame): The training dataset.
        factors (int): Number of factors.
        factor_order (int): Order of the factors.

    Returns:
        DynamicFactorResults: Fitted Dynamic Factor Model.
    """
    model = DynamicFactor(train_data, k_factors=factors, factor_order=factor_order)
    results = model.fit()
    return results

def forecast_and_evaluate(model, steps, test_data):
    """
    Forecast and evaluate the Dynamic Factor Model.

    Parameters:
        model (DynamicFactorResults): Fitted Dynamic Factor Model.
        steps (int): Number of forecast steps.
        test_data (pd.DataFrame): The testing dataset.

    Returns:
        pd.DataFrame, float: Predicted values and RMSE.
    """
    forecast = model.get_forecast(steps=steps)
    predictions = forecast.predicted_mean
    rmse = np.sqrt(mean_squared_error(test_data, predictions))
    return predictions, rmse

In [None]:
# Load the synthetic dataset
data = load_data("synthetic_worker_productivity.csv")

# Split into training (first 2 years) and testing (last year)
train_data, test_data = split_data(data, "2022-12-31")

# Fit Dynamic Factor Model
dfm_model = fit_dynamic_factor_model(train_data)

# Forecast and evaluate
predictions, error = forecast_and_evaluate(dfm_model, steps=len(test_data), test_data=test_data)

# Display results
print(f"Dynamic Factor Model RMSE: {error}")
predictions.plot(title="Dynamic Factor Model Forecast vs. Actual", figsize=(10, 6))
test_data.plot(ax=plt.gca(), linestyle="--")
plt.legend(["Predictions"] + list(test_data.columns))
plt.show()