In [None]:
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
from typing import Dict, Tuple
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from model import arreturn, features_label, flawed_model, kfold_cv
import yfinance as yf

In [None]:
# -----------------------------
# simulate returns & quick plot
# -----------------------------
seed = 76
rng = np.random.default_rng(seed)
returns, alphas = arreturn(rng)   # alphas shape = (n, 2)

# Plot returns and alpha coefficients
plt.figure(figsize=(12,4))
plt.plot(returns, label='returns')
plt.plot(alphas[:,0], label='alpha1')   # first AR coefficient over time
plt.plot(alphas[:,1], label='alpha2')   # second AR coefficient over time
plt.title('Simulated returns with regime coefficients')
plt.legend()
plt.show()

In [None]:
# -----------------------------
# Prepare training data (in-sample)
# -----------------------------
returns_train = returns[0:404]   # using an early slice for training experiments
X, Y = features_label(returns_train, q = 4) # q = 4 means using 4 previous returns to predict the next return

In [None]:
# Train a model on the whole training block and report in-sample loss
full_model = flawed_model(X.shape[1])

history = full_model.fit(
    X, Y,
    epochs=100,
    batch_size=32,
    verbose=0
)

# Plot the convergence in training error
plt.figure(figsize=(12,4))
plt.plot(history.history['loss'], marker = 'D', ms = 3.0)
plt.title('training loss after each epoch')
plt.show()

In [None]:
# -----------------------------
# K-fold cross-validation (no shuffle)
# -----------------------------

val_losses = kfold_cv(X, Y)         # default is 20 splits, learning rate = 1e-4 and 100 epochs

In [None]:
# -----------------------------
# True out-of-sample test (far future slice)
# -----------------------------
returns_test = returns[3000:]               # a future block far away
X_test, Y_test = features_label(returns_test)
test_loss = full_model.evaluate(X_test, Y_test, verbose=0)   # using full model trained on entire returns_test

In [None]:
# Print results

print('final training error:', history.history['loss'][-1])
print("\nMean CV loss:", np.mean(val_losses))
print("Std CV loss:", np.std(val_losses))
print('test loss:', test_loss)

In [None]:
# Import S&P 500 data from yahoo finance
ticker = yf.Ticker('^GSPC')
SPX_data = ticker.history(start = "2000-01-01", end = "2025-09-01")
SPX_close = SPX_data['Close'].tolist()
SPX_returns = [(SPX_close[i + 1]/ SPX_close[i] - 1) * 100 for i in range(len(SPX_close) - 1)] # SPX daily returns (* 100 for better visualisation)

# Plot returns
plt.figure(figsize=(12,4))
plt.plot(SPX_returns)
plt.title('S&P 500 daily returns (%)')
plt.show()

In [None]:
spx_train = SPX_returns[0:206]   # using an early slice for training experiments
X, Y = features_label(spx_train, q = 6) # increasing q to 6 to fit better

# Train a model on the whole training block and report in-sample loss
spx_full_model = flawed_model(X.shape[1], learning_rate = 0.002) # increase earning rate to 0.002 to boost convergence

history = spx_full_model.fit(
    X, Y,
    epochs=100,
    batch_size=16,
    verbose=0
)

# Plot the convergence in training error
plt.figure(figsize=(12,4))
plt.plot(history.history['loss'], marker = 'D', ms = 3.0)
plt.title('training loss after each epoch')
plt.show()

In [None]:
# k-fold CV

val_losses = kfold_cv(X, Y, n_splits = 50, learning_rate= 0.002) # 50 splits (each group has 4 observations)

In [None]:
# True out-of-sample test (far future slice)

spx_test = SPX_returns[2000:]               # a future block 
X_test, Y_test = features_label(spx_test, q = 6)
test_loss = spx_full_model.evaluate(X_test, Y_test, verbose=0)   # using full model trained on entire test data

In [None]:
# Results
print('Final training error:', history.history['loss'][-1])

print("\nMean CV loss:", np.mean(val_losses))
print("Std CV loss:", np.std(val_losses))

print('Test loss:', test_loss)