In [1]:
# Data manipulation and analysis
import pandas as pd
import numpy as np

# Custom modules and functions
from capstone.model_selection import overunder_error, naive_cross_val_score
from capstone.utils import read_file, get_sectors

In [2]:
# Load in files
sectors = get_sectors()
df = read_file("master_df", index_col=0)

returns = df[sectors]

In [3]:
# Define the forecast horizon in terms of trading days per year
trading_days = 252
forecast = int(trading_days / 2)

# Shift returns for forecasting, align indices
returns_shifted = returns.shift(forecast).dropna()
returns_reind = returns.reindex(returns_shifted.index)

# Initialize output DataFrames
naive_ouls = pd.DataFrame()
naive_preds = pd.DataFrame()

# Loop through sectors
for sector in sectors:
    r_trues = returns_reind[sector]
    r_hats = returns_shifted[sector]
    
    # Time-chunk loop
    for i in range(forecast + 1, len(returns_reind), forecast):
        r_hat = r_hats.iloc[i-forecast:i]
        r_true = r_trues.iloc[i-forecast:i]
        
        # Calculate and store mean over-under loss
        mean_oul = np.mean(
            naive_cross_val_score(
                r_hat, r_true, cv=2, scorer=overunder_error,
                overpred_penalty=2, underpred_penalty=0
            )
        )
        naive_ouls.loc[r_hat.index.max(), sector] = mean_oul

In [4]:
from capstone.utils import read_file

arimax_mean_ouls = read_file("arimax_mean_ouls", index_col=0)
sklearn_mean_ouls = read_file("sklearn_mean_ouls", index_col=0)
naive_mean_ouls = pd.Series(naive_ouls.mean(axis=1), name="Naive")

mean_ouls = pd.concat([arimax_mean_ouls, sklearn_mean_ouls, naive_mean_ouls], axis=1)
selected_models = mean_ouls.idxmin(axis=1)
selected_models[selected_models == "Naive"]

2020-05-28    Naive
dtype: object

In [5]:
# Read pre-calculated mean over-under loss (OUL) scores from files
arimax_mean_ouls = read_file("arimax_mean_ouls", index_col=0)
sklearn_mean_ouls = read_file("sklearn_mean_ouls", index_col=0)

# Compute mean OUL for the naive model
naive_mean_ouls = pd.Series(naive_ouls.mean(axis=1), name="Naive")

# Combine all mean OULs into one DataFrame
mean_ouls = pd.concat([arimax_mean_ouls, sklearn_mean_ouls, naive_mean_ouls], axis=1)

# Identify the model with the lowest mean OUL for each time point
selected_models = mean_ouls.idxmin(axis=1)

# Filter instances where the naive model performed best
selected_models[selected_models == "Naive"]

2020-05-28    Naive
dtype: object