In [1]:
from constants import train_trials
from functions import load_data, run_trial

In [2]:
dfs = load_data(tcks=("AAPL", "DELL", "FORD", "IBM", "MACYS", "SP500"))

In [3]:
# Replicate trials from the paper
mapes = {}
dpas = {}
preds = {}

# Paper constants (adjust as needed)
latency = 10
n_states = 4

for tck, trials in train_trials.items():
    mapes[tck] = {}
    dpas[tck] = {}
    for i, trial in trials.items():
        # TODO: Write predictions to CSV file
        results = run_trial(
            dfs,
            tck=tck, train_period=trial["train"], test_period=trial["test"],
            latency=latency, n_states=n_states
        )
        mapes[tck][i] = results["MAPE"]
        dpas[tck][i] = results["DPA"]

Training HMM on train period ('2021-01-04', '2022-01-03')


ValueError: zero-size array to reduction operation minimum which has no identity

In [4]:
# Demo code
tck = "DELL" # Ticker label (or the prefix of the CSV)
train_period = ("2021-01-04", "2022-01-03") # Training Period
test_period = ("2023-01-03", "2023-07-11") # Testing Period
latency = 10 # How far back do you want to look?
n_states = 4 # How many latent states do you want?

# Do not need to edit the following code
# Docstring in functions.py
run_trial(
    dfs, tck, train_period, test_period, latency=latency, n_states=n_states
)

Training HMM on train period ('2021-01-04', '2022-01-03')


ValueError: zero-size array to reduction operation minimum which has no identity

In [3]:
# Experiment 1 - Robustness check on number of latent statesX

tck = "AAPL" # Ticker label (or the prefix of the CSV)
train_period = ("2003-02-10", "2004-09-10") # Training Period
test_period = ("2004-09-13", "2005-01-21") # Testing Period
latency = 10
n_states = [3, 5, 6, 8]

for n in n_states:
    run_trial(
        dfs, tck, train_period, test_period, latency=latency, n_states=n
)

Training HMM on train period ('2003-02-10', '2004-09-10')
Generating Predictions for test period ('2004-09-13', '2005-01-21')


100%|██████████| 92/92 [01:55<00:00,  1.25s/it]


AAPL trial with latent states = 3, context window size = 10
MAPE = 0.018032310975446626, DPA = 0.5217391304347826
Training HMM on train period ('2003-02-10', '2004-09-10')
Generating Predictions for test period ('2004-09-13', '2005-01-21')


100%|██████████| 92/92 [02:49<00:00,  1.85s/it]


AAPL trial with latent states = 5, context window size = 10
MAPE = 0.01556451275223328, DPA = 0.5652173913043478
Training HMM on train period ('2003-02-10', '2004-09-10')
Generating Predictions for test period ('2004-09-13', '2005-01-21')


100%|██████████| 92/92 [03:08<00:00,  2.04s/it]


AAPL trial with latent states = 6, context window size = 10
MAPE = 0.01799853283167973, DPA = 0.5217391304347826
Training HMM on train period ('2003-02-10', '2004-09-10')
Generating Predictions for test period ('2004-09-13', '2005-01-21')


100%|██████████| 92/92 [03:47<00:00,  2.47s/it]

AAPL trial with latent states = 8, context window size = 10
MAPE = 0.019813101049105438, DPA = 0.4673913043478261





In [10]:
# Experiment 2 - Robustness check on context window size

tck = "AAPL" # Ticker label (or the prefix of the CSV)
train_period = ("2003-02-10", "2004-09-10") # Training Period
test_period = ("2004-09-13", "2005-01-21") # Testing Period
latency = [6, 20, 50]
n_states = 4

for l in latency:
    run_trial(
        dfs, tck, train_period, test_period, latency=l, n_states=n_states
)

Training HMM on train period ('2003-02-10', '2004-09-10')
Generating Predictions for test period ('2004-09-13', '2005-01-21')


100%|██████████| 92/92 [02:41<00:00,  1.76s/it]


AAPL trial with latent states = 4, context window size = 6
MAPE = 0.020190013202262852, DPA = 0.4673913043478261
--------------------------------------------------------------------------------
Training HMM on train period ('2003-02-10', '2004-09-10')
Generating Predictions for test period ('2004-09-13', '2005-01-21')


100%|██████████| 92/92 [02:31<00:00,  1.65s/it]


AAPL trial with latent states = 4, context window size = 20
MAPE = 0.03103308427031748, DPA = 0.5434782608695652
--------------------------------------------------------------------------------
Training HMM on train period ('2003-02-10', '2004-09-10')
Generating Predictions for test period ('2004-09-13', '2005-01-21')


100%|██████████| 92/92 [02:33<00:00,  1.67s/it]

AAPL trial with latent states = 4, context window size = 50
MAPE = 0.028545792493187165, DPA = 0.5434782608695652
--------------------------------------------------------------------------------





In [4]:
# Experiment 3 - Generalizability test on different stocks at same time period

tcks = ["FORD", "MACYS", "SP500"] # Ticker label (or the prefix of the CSV)
train_period = ("2003-02-10", "2004-09-10") # Training Period
test_period = ("2004-09-13", "2005-01-21") # Testing Period
latency = 10
n_states = 4

for tck in tcks:
    run_trial(
        dfs, tck, train_period, test_period, latency=latency, n_states=n_states
)

Training HMM on train period ('2003-02-10', '2004-09-10')
Generating Predictions for test period ('2004-09-13', '2005-01-21')


100%|██████████| 92/92 [02:12<00:00,  1.44s/it]


FORD trial with latent states = 4, context window size = 10
MAPE = 0.04928547908458648, DPA = 0.43478260869565216
--------------------------------------------------------------------------------
Training HMM on train period ('2003-02-10', '2004-09-10')
Generating Predictions for test period ('2004-09-13', '2005-01-21')


100%|██████████| 92/92 [02:31<00:00,  1.65s/it]


MACYS trial with latent states = 4, context window size = 10
MAPE = 0.012593953070271639, DPA = 0.5108695652173914
--------------------------------------------------------------------------------
Training HMM on train period ('2003-02-10', '2004-09-10')
Generating Predictions for test period ('2004-09-13', '2005-01-21')


100%|██████████| 92/92 [02:25<00:00,  1.58s/it]

SP500 trial with latent states = 4, context window size = 10
MAPE = 0.012041312248848975, DPA = 0.532608695652174
--------------------------------------------------------------------------------





In [8]:
# Experiment 4 - Generalizability test of same stock at different time periods

tck = "AAPL" # Ticker label (or the prefix of the CSV)
train_periods = [("2005-02-10", "2005-09-09"), 
                 ("2011-02-10", "2011-09-12"),
                 ("2015-02-10", "2015-09-10")] 
test_periods = [("2005-09-12", "2006-01-20"), 
                ("2011-09-13", "2012-01-23"),
                ("2015-09-14", "2016-01-21")] 
latency = 10
n_states = 4

for i in range(len(train_periods)):
    train_period = train_periods[i]
    test_period = test_periods[i]
    run_trial(
        dfs, tck, train_period, test_period, latency=latency, n_states=n_states
)

Training HMM on train period ('2011-02-10', '2011-09-12')
Generating Predictions for test period ('2011-09-13', '2012-01-23')


100%|██████████| 91/91 [02:20<00:00,  1.55s/it]


AAPL trial with latent states = 4, context window size = 10
MAPE = 0.010416632111321406, DPA = 0.5164835164835165
--------------------------------------------------------------------------------
Training HMM on train period ('2015-02-10', '2015-09-10')
Generating Predictions for test period ('2015-09-14', '2016-01-21')


100%|██████████| 90/90 [02:36<00:00,  1.74s/it]

AAPL trial with latent states = 4, context window size = 10
MAPE = 0.012544328290483658, DPA = 0.4111111111111111
--------------------------------------------------------------------------------



