In [None]:
from constants import train_trials
from functions import load_data, run_trial

In [None]:
dfs = load_data(tcks=("AAPL", "DELL", "FORD", "IBM", "MACYS", "SP500"))

In [None]:
# Replicate trials from the paper
mapes = {}
dpas = {}
preds = {}

# Paper constants (adjust as needed)
latency = 10
n_states = 4

for tck, trials in train_trials.items():
    mapes[tck] = {}
    dpas[tck] = {}
    for i, trial in trials.items():
        # TODO: Write predictions to CSV file
        results = run_trial(
            dfs,
            tck=tck, train_period=trial["train"], test_period=trial["test"],
            latency=latency, n_states=n_states
        )
        mapes[tck][i] = results["MAPE"]
        dpas[tck][i] = results["DPA"]

In [None]:
# Experiment 1 - Robustness check on number of latent states

tck = "AAPL" # Ticker label (or the prefix of the CSV)
train_period = ("2003-02-10", "2004-09-10") # Training Period
test_period = ("2004-09-13", "2005-01-21") # Testing Period
latency = 10
n_states = [3, 5, 6, 8]

for n in n_states:
    run_trial(
        dfs, tck, train_period, test_period, latency=latency, n_states=n
)

In [None]:
# Experiment 2 - Robustness check on context window size

tck = "AAPL" # Ticker label (or the prefix of the CSV)
train_period = ("2003-02-10", "2004-09-10") # Training Period
test_period = ("2004-09-13", "2005-01-21") # Testing Period
latency = [6, 20, 50]
n_states = 4

for l in latency:
    run_trial(
        dfs, tck, train_period, test_period, latency=l, n_states=n_states
)

In [None]:
# Experiment 3 - Generalizability test on different stocks at same time period

tcks = ["FORD", "MACYS", "SP500"] # Ticker label (or the prefix of the CSV)
train_period = ("2003-02-10", "2004-09-10") # Training Period
test_period = ("2004-09-13", "2005-01-21") # Testing Period
latency = 10
n_states = 4

for tck in tcks:
    run_trial(
        dfs, tck, train_period, test_period, latency=latency, n_states=n_states
)

In [None]:
# Experiment 4 - Generalizability test of same stock at different time periods

tck = "AAPL" # Ticker label (or the prefix of the CSV)
train_periods = [("2005-02-10", "2005-09-09"), 
                 ("2011-02-10", "2011-09-12"),
                 ("2015-02-10", "2015-09-10")] 
test_periods = [("2005-09-12", "2006-01-20"), 
                ("2011-09-13", "2012-01-23"),
                ("2015-09-14", "2016-01-21")] 
latency = 10
n_states = 4

for i in range(len(train_periods)):
    train_period = train_periods[i]
    test_period = test_periods[i]
    run_trial(
        dfs, tck, train_period, test_period, latency=latency, n_states=n_states
)