In [11]:
# Install required libraries
!pip install yfinance timesfm
# Set environment variables for TimesFM
os.environ['XLA_PYTHON_CLIENT_PREALLOCATE'] = 'false'
os.environ['JAX_PMAP_USE_TENSORSTORE'] = 'false'
# Import libraries
import yfinance as yf
import pandas as pd
import numpy as np
from collections import defaultdict
import os
import timesfm




# Download stock data from Yahoo Finance
ticker = 'AAPL'  # Example: Apple Inc.
data = yf.download(ticker, start='2020-01-01', end='2023-01-01')

# Calculate moving averages
data['moving_avg_10'] = data['Close'].rolling(window=10).mean()
data['moving_avg_50'] = data['Close'].rolling(window=50).mean()

# Drop rows with NaN values (caused by rolling window calculations)
data = data.dropna()

# Prepare DataFrame
df = data[['Close', 'Volume', 'moving_avg_10', 'moving_avg_50']].reset_index()
df.columns = ['date', 'close', 'volume', 'moving_avg_10', 'moving_avg_50']
df['date'] = pd.to_datetime(df['date'])
df.set_index('date', inplace=True)




[*********************100%%**********************]  1 of 1 completed


In [3]:
df

Unnamed: 0_level_0,close,volume,moving_avg_10,moving_avg_50
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-03-13,69.492500,370732000,70.649000,76.451700
2020-03-16,60.552502,322423600,69.234000,76.161000
2020-03-17,63.215000,324056000,68.322500,75.938150
2020-03-18,61.667500,300233600,66.920750,75.672500
2020-03-19,61.195000,271857200,65.717250,75.404450
...,...,...,...,...
2022-12-23,131.860001,63814900,136.839000,144.131800
2022-12-27,130.029999,69007800,135.392999,143.964799
2022-12-28,126.040001,85438400,133.449999,143.637399
2022-12-29,129.610001,75703700,132.089999,143.354599


In [15]:
timesfm_backend = "cpu"  # Adjust this based on your hardware: "cpu", "gpu", or "tpu"
from jax._src import config
config.update(
    "jax_platforms", {"cpu": "cpu", "gpu": "cuda", "tpu": ""}[timesfm_backend]
)

model = timesfm.TimesFm(
    context_len=512,
    horizon_len=128,
    input_patch_len=32,
    output_patch_len=128,
    num_layers=20,
    model_dims=1280,
    backend=timesfm_backend,
)
model.load_from_checkpoint(repo_id="google/timesfm-1.0-200m")


jax_platforms cpu


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

README.md:   0%|          | 0.00/5.73k [00:00<?, ?B/s]

(…)nts/checkpoint_1100000/metadata/metadata:   0%|          | 0.00/43.9k [00:00<?, ?B/s]

.gitattributes:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

(…)oint_1100000/descriptor/descriptor.pbtxt:   0%|          | 0.00/499 [00:00<?, ?B/s]

checkpoint:   0%|          | 0.00/814M [00:00<?, ?B/s]

Constructing model weights.




Constructed model weights in 5.85 seconds.
Restoring checkpoint from /root/.cache/huggingface/hub/models--google--timesfm-1.0-200m/snapshots/8775f7531211ac864b739fe776b0b255c277e2be/checkpoints.


ERROR:absl:For checkpoint version > 1.0, we require users to provide
          `train_state_unpadded_shape_dtype_struct` during checkpoint
          saving/restoring, to avoid potential silent bugs when loading
          checkpoints to incompatible unpadded shapes of TrainState.


Restored checkpoint in 2.74 seconds.
Jitting decoding.
Jitted decoding in 47.16 seconds.


In [16]:

# Prepare data for TimesFM
def get_batched_data_fn(
    batch_size: int = 128,
    context_len: int = 120,
    horizon_len: int = 24,
):
    examples = defaultdict(list)
    num_examples = 0
    for start in range(0, len(df) - (context_len + horizon_len), horizon_len):
        num_examples += 1
        examples["inputs"].append(df["close"][start:(context_end := start + context_len)].tolist())
        examples["volume"].append(df["volume"][start:context_end + horizon_len].tolist())
        examples["moving_avg_10"].append(df["moving_avg_10"][start:context_end + horizon_len].tolist())
        examples["moving_avg_50"].append(df["moving_avg_50"][start:context_end + horizon_len].tolist())
        examples["outputs"].append(df["close"][context_end:(context_end + horizon_len)].tolist())

    def data_fn():
        for i in range(1 + (num_examples - 1) // batch_size):
            yield {k: v[(i * batch_size) : ((i + 1) * batch_size)] for k, v in examples.items()}

    return data_fn

In [17]:


# Define metrics
def mse(y_pred, y_true):
    y_pred = np.array(y_pred)
    y_true = np.array(y_true)
    return np.mean(np.square(y_pred - y_true), axis=1, keepdims=True)

def mae(y_pred, y_true):
    y_pred = np.array(y_pred)
    y_true = np.array(y_true)
    return np.mean(np.abs(y_pred - y_true), axis=1, keepdims=True)



In [21]:
# Benchmark
batch_size = 128
context_len = 120
horizon_len = 24

input_data = get_batched_data_fn(batch_size=batch_size, context_len=context_len, horizon_len=horizon_len)
input_data

In [35]:


metrics = defaultdict(list)
import time

for i, example in enumerate(input_data()):
    raw_forecast, _ = model.forecast(
        inputs=example["inputs"], freq=[0] * len(example["inputs"])
    )
    start_time = time.time()
    print(len(example["volume"][0]))
    # Forecast with covariates
    # Output: new forecast, forecast by the xreg
    cov_forecast, ols_forecast = model.forecast_with_covariates(
        inputs=example["inputs"],
        dynamic_numerical_covariates={
            "volume": example["volume"],
            "moving_avg_10": example["moving_avg_10"],
            "moving_avg_50": example["moving_avg_50"],
        },
        dynamic_categorical_covariates={},
        static_numerical_covariates={},
        static_categorical_covariates={},
        freq=[0] * len(example["inputs"]),
        xreg_mode="xreg + timesfm",              # default
        ridge=0.0,
        force_on_cpu=False,
        normalize_xreg_target_per_input=True,    # default
    )
    print(
        f"\rFinished batch {i} linear in {time.time() - start_time} seconds",
        end="",
    )
    metrics["eval_mae_timesfm"].extend(
        mae(raw_forecast[:, :horizon_len], example["outputs"])
    )
    metrics["eval_mae_xreg_timesfm"].extend(mae(cov_forecast, example["outputs"]))
    metrics["eval_mae_xreg"].extend(mae(ols_forecast, example["outputs"]))
    metrics["eval_mse_timesfm"].extend(
        mse(raw_forecast[:, :horizon_len], example["outputs"])
    )
    metrics["eval_mse_xreg_timesfm"].extend(mse(cov_forecast, example["outputs"]))
    metrics["eval_mse_xreg"].extend(mse(ols_forecast, example["outputs"]))

print()

for k, v in metrics.items():
    print(f"{k}: {np.mean(v)}")

# The output should show the evaluation metrics for the model's performance with and without covariates.


144
Finished batch 0 linear in 5.205446243286133 seconds
eval_mae_timesfm: 7.543430553542244
eval_mae_xreg_timesfm: 5.603613946172927
eval_mae_xreg: 143.94791399368887
eval_mse_timesfm: 107.54831708626556
eval_mse_xreg_timesfm: 54.88159506622054
eval_mse_xreg: 21026.148813232212


In [28]:
len(cov_forecast[0])

24