# Extract names of images for Overleaf

In [1]:
import os
import glob

# Set the path to the folder containing PNG files
folder_path = '/path/to/your/folder'  # <-- Change this to your folder path

# Use glob to find all .png files in the folder
png_file_paths = glob.glob(os.path.join("./", '*.png'))

# Extract just the file names from the paths
png_file_names = [os.path.basename(path) for path in png_file_paths]

# Print the list of PNG file names
print("Found PNG files:")
for name in png_file_names:
    print(name)


Found PNG files:
Mean Precision Heatmap (Hours x Models) - night hours.png
Mean Precision Heatmap (Models x Horizons).png
Mean Precision Heatmap (Models x Horizons) - day hours only.png
Mean Precision Heatmap (Hours x Models).png
Mean Precision Heatmap (Days x Hours).png
Mean Precision Heatmap (Models x Days) - night hours.png
Mean Precision Heatmap (Days x Hours) - night hours.png
Mean Precision Heatmap (Models x Horizons) - night hours.png
Mean Precision Heatmap (Models x Days) - day hours.png
Mean Precision Heatmap (Hours x Models) - day hours.png
Mean Precision Heatmap (Models x Days).png
Mean Precision Heatmap (Days x Hours) - day hours.png


# Tests of Chronos (prediction of number of passengers)

In [4]:
import pandas as pd  # requires: pip install pandas
import torch
from chronos import BaseChronosPipeline

pipeline = BaseChronosPipeline.from_pretrained(
    "amazon/chronos-t5-tiny",  # use "amazon/chronos-bolt-small" for the corresponding Chronos-Bolt model
    device_map="cpu",  # use "cpu" for CPU inference
    torch_dtype=torch.bfloat16,
)

df = pd.read_csv(
    "https://raw.githubusercontent.com/AileenNielsen/TimeSeriesAnalysisWithPython/master/data/AirPassengers.csv"
)

# context must be either a 1D tensor, a list of 1D tensors,
# or a left-padded 2D tensor with batch as the first dimension
# quantiles is an fp32 tensor with shape [batch_size, prediction_length, num_quantile_levels]
# mean is an fp32 tensor with shape [batch_size, prediction_length]
quantiles, mean = pipeline.predict_quantiles(
    context=torch.tensor(df["#Passengers"]),
    prediction_length=12,
    quantile_levels=[0.1, 0.5, 0.9],
)

print("Quantiles: ", quantiles)
print("Mean: ", mean)

Quantiles:  tensor([[[432.9833, 451.0670, 475.7268],
         [424.3524, 464.4244, 495.6600],
         [451.8890, 483.9467, 516.6208],
         [447.9846, 490.1116, 524.4297],
         [494.8380, 538.4035, 573.7491],
         [545.3904, 577.4480, 651.6326],
         [589.1613, 648.3446, 707.5280],
         [566.3511, 634.9873, 671.1549],
         [462.1639, 549.7059, 576.4205],
         [404.8301, 464.4244, 495.0435],
         [355.0997, 423.3249, 480.2477],
         [389.8288, 447.9846, 485.3852]]])
Mean:  tensor([[452.6083, 460.8282, 483.4329, 488.8786, 532.2386, 595.8400, 654.4069,
         620.6025, 528.1286, 457.5402, 424.4552, 445.0049]])


In [None]:
import torch
from chronos import ChronosPipeline

# Load the Chronos-T5 model
pipeline = ChronosPipeline.from_pretrained(
    "amazon/chronos-t5-small",  # Use "amazon/chronos-bolt-small" for the Bolt model
    device_map="cuda" if torch.cuda.is_available() else "cpu",
    torch_dtype=torch.bfloat16,
)

# Simulating two time series (batch_size=2)
context = torch.tensor([
    [100, 110, 120, 130, 140],  # First time series (e.g., Store A sales)
    [200, 210, 220, 230, 240]   # Second time series (e.g., Store B sales)
], dtype=torch.float32)

# Define prediction parameters
prediction_length = 3  # Forecast next 3 time steps
quantile_levels = [0.1, 0.5, 0.9]  # Lower, median, upper quantiles

# Ensure the context is correctly shaped as [batch_size, sequence_length]
print("Context Shape:", context.shape)  # Should be (2, 5)


# quantiles is an fp32 tensor with shape [batch_size, prediction_length, num_quantile_levels]
# mean is an fp32 tensor with shape [batch_size, prediction_length]
quantiles, mean = pipeline.predict_quantiles(
    context=context, 
    prediction_length=prediction_length, 
    quantile_levels=quantile_levels
)

# Print output
print("Quantiles:\n", quantiles.shape)
print("Mean:\n", mean.shape)

print("Quantiles:\n", quantiles)
print("Mean:\n", mean)


Context Shape: torch.Size([2, 5])
Quantiles:
 torch.Size([2, 3, 3])
Mean:
 torch.Size([2, 3])
Quantiles:
 tensor([[[128.1818, 142.0821, 154.8387],
         [119.6481, 134.1642, 159.1496],
         [111.6422, 131.5249, 148.5924]],

        [[232.0968, 251.6129, 280.9677],
         [229.6774, 254.8387, 283.8710],
         [234.6774, 252.4193, 280.8064]]])
Mean:
 tensor([[142.3460, 136.8915, 134.0323],
        [252.9032, 256.2097, 257.7419]])


# Test Chronos with our data (ES=F)

In [14]:
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt

def fetch(ticker='ES=F', interval="1h", session_start=None, session_end=None, tz="America/Chicago"):
    data = yf.download(tickers=ticker, interval=interval, start=session_start, end=session_end, prepost=False)

    # Set the correct timezone and account for daylight saving hours
    if interval[-1] in ["m", "h"]:
      data.index = data.index.tz_convert(tz)
    # Reset index to get a clean DataFrame
    data.reset_index(inplace=True)
    return data

### Fetch Data

In [20]:
fetch(ticker="ES=F", interval="1h", session_start = pd.Timestamp('2023-02-13 17:00', tz="America/Chicago"), session_end = pd.Timestamp('2024-12-13 16:00', tz="America/Chicago"), tz="America/Chicago")

[*********************100%***********************]  1 of 1 completed


Price,Datetime,Close,High,Low,Open,Volume
Ticker,Unnamed: 1_level_1,ES=F,ES=F,ES=F,ES=F,ES=F
0,2023-02-13 17:00:00-06:00,4148.25,4149.75,4147.25,4149.75,0
1,2023-02-13 18:00:00-06:00,4145.50,4148.25,4143.00,4148.25,6771
2,2023-02-13 19:00:00-06:00,4143.75,4147.50,4142.00,4145.50,6220
3,2023-02-13 20:00:00-06:00,4142.00,4144.50,4140.75,4143.75,4607
4,2023-02-13 21:00:00-06:00,4143.25,4144.00,4140.25,4142.25,4167
...,...,...,...,...,...,...
10536,2024-12-13 11:00:00-06:00,6042.25,6053.25,6041.50,6051.25,188169
10537,2024-12-13 12:00:00-06:00,6054.50,6057.50,6041.25,6042.25,165926
10538,2024-12-13 13:00:00-06:00,6056.50,6059.75,6052.50,6054.50,125419
10539,2024-12-13 14:00:00-06:00,6055.75,6060.25,6050.75,6056.50,253171


## Clean and pre-process data

In [None]:
from sklearn.preprocessing import StandardScaler
import numpy as np
import pywt

def wavelet_denoise(df, wavelet='db1', level=1):
    """
    Apply wavelet denoising to all numeric columns of a Pandas DataFrame.

    Parameters:
    - df: Pandas DataFrame to denoise
    - wavelet: Type of wavelet to use (default is 'db1')
    - level: Level of decomposition for wavelet transform

    Returns:
    - A DataFrame with denoised data.
    """
    def denoise_column(column):
        # Perform wavelet decomposition
        coeffs = pywt.wavedec(column, wavelet, level=level)
        # Apply thresholding to detail coefficients
        sigma = np.median(np.abs(coeffs[-level])) / 0.6745  # Estimate noise sigma
        threshold = sigma * np.sqrt(2 * np.log(len(column)))
        denoised_coeffs = [pywt.threshold(c, threshold) if i > 0 else c for i, c in enumerate(coeffs)]
        # Reconstruct the signal
        return pywt.waverec(denoised_coeffs, wavelet)[:len(column)]

    # Apply denoising to each numeric column
    denoised_data = {}
    for col in df.select_dtypes(include=[np.number]).columns:
        denoised_data[col] = denoise_column(df[col].values)

    # Add the denoised data to a new DataFrame
    denoised_df = df.copy()
    for col, denoised_col in denoised_data.items():
        denoised_df[col] = denoised_col

    return denoised_df

def preprocess(df, dfDaily=None):
  # Remove the ticker index
  df.columns = df.columns.droplevel("Ticker")

  df.columns.names = [None]

  # set the index of the dataframe to the datetime column. Meaning, the datetime is not a column anymore and the data is ordered by row, where each row is a datetime
  df.set_index("Datetime", inplace=True)
  df.index = pd.to_datetime(df.index)

  # Fill frequency and fill the gaps between 4PM-5PM (the market closes at 4PM and opens at 5PM)
  df = df.asfreq('h') # if the dataframe had a 2h interval it will expand it to 1h interval by adding NaN values
  df = df.interpolate(method="time") # fill the eventual NaN values with the time interpolation method

  # Add week of year
  df["Week"] = df.index.isocalendar().week

  # Add day of week
  df["Day"] = df.index.dayofweek

  # Add hour
  df["Hour"] = df.index.hour


  def emroc(df, column, span, lag):
    """
    Rate of Change (ROC) calculation and Exponential Moving Average (EMA) smoothing.

    EWMA smooths the ROC using exponential weighting, where recent values have more weight.
    The span controls how much past data influences the smoothed value.

    """
    df[f"ROC_{column}"] = df[column].pct_change(lag)
    df[f"EMROC_{column}"] = df[f"ROC_{column}"].ewm(span=span, adjust=False).mean()
    return df

  def atr(df, span, lag):
    df['Prev Close'] = df['Close'].shift(lag) # get the previous close price
    high_low = df['High'] - df['Low']
    high_prev_close = (df['High'] - df['Prev Close']).abs()
    low_prev_close = (df['Low'] - df['Prev Close']).abs()
    df['True Range'] = high_low.combine(high_prev_close, max).combine(low_prev_close, max)
    df[f'ATR_{span}'] = df['True Range'].rolling(window=span).mean()
    df[f'ATR_{span}'] = df[f'ATR_{span}'] / df['Close']
    return df

  def rsi(df, column, span, lag):
      df['Price Change'] = df['Close'].diff()
      df['Gain'] = df['Price Change'].apply(lambda x: x if x > 0 else 0)
      df['Loss'] = df['Price Change'].apply(lambda x: -x if x < 0 else 0)
      df['Avg Gain'] = df['Gain'].rolling(window=span, min_periods=lag).mean()
      df['Avg Loss'] = df['Loss'].rolling(window=span, min_periods=lag).mean()

      # Calculate the RS (Relative Strength)
      df['RS'] = df['Avg Gain'] / df['Avg Loss']

      # Calculate the RSI
      df['RSI'] = 100 - (100 / (1 + df['RS']))
      return df

  def distancesToMM(df, column, spans):
    for span in spans:
        df[f'MM_{span}'] = df['Close'].rolling(window=span).mean()
        df[f'DistanceToMM{span}'] = ((df["Close"] - df[f'MM_{span}']) / df[f'MM_{span}']) * 100
    return df

  def distancesToEMM(df, column, spans):
      for span in spans:
          df[f'EMM_{span}'] = df['Close'].ewm(span=span, adjust=False).mean()
          df[f'DistanceToEMM{span}'] = ((df["Close"] - df[f'EMM_{span}']) / df[f'EMM_{span}']) * 100
      return df

  df = emroc(df, "Close", 72, 2)
  df = emroc(df, "Volume", 72, 2)

  if dfDaily is not None:
    dfDaily.columns = dfDaily.columns.droplevel("Ticker")
    dfDaily.columns.names = [None]
    dfDaily["Datetime"] = pd.to_datetime(dfDaily["Date"]).dt.normalize()
    dfDaily.drop(["Date"], axis=1, inplace=True)
    dfDaily.set_index("Datetime", inplace=True)

    dfDaily = dfDaily.asfreq('D')
    dfDaily = dfDaily.interpolate(method="time")

    dfDaily = atr(dfDaily, "Close", 10, 1)
    dfDaily = rsi(dfDaily, "Close", 14, 1)

    dfDaily = distancesToMM(dfDaily, "Close", [20, 60])
    dfDaily = distancesToEMM(dfDaily, "Close", [20, 60])

    row_idx = 0
    rowDaily_idx = 0

    df["ATR_10"] = None
    df["MM_20"] = None
    df["MM_60"] = None
    df["EMM_20"] = None
    df["EMM_60"] = None
    df["RSI"] = None

    step = len(df) / 100
    curr = 1
    import sys
    import time

    def progress_bar(completion, total):
        # Calculate percentage
        percent = (completion / total) * 100
        # Create progress bar with stars based on the percentage
        bar = '*' * int(percent // 2)  # The bar will have half as many stars as percentage
        # Pad with spaces to make it 50 characters long
        bar = bar.ljust(50, ' ')
        # Print progress bar
        if completion == total:
            sys.stdout.write(f"\r[{bar}] 1 of 1 completed\n")
        else:
            sys.stdout.write(f"\r[{bar}] {percent:.0f}%")
        sys.stdout.flush()

    while row_idx < len(df):
        if row_idx >= curr * step:
            progress_bar(curr, 100)
            curr += 1

        if (df.index[row_idx].dayofweek == dfDaily.index[rowDaily_idx].dayofweek and df.index[row_idx].hour >= 17) or ((df.index[row_idx].dayofweek-1)%7 == dfDaily.index[rowDaily_idx].dayofweek and df.index[row_idx].hour < 17):
            # Set values for columns VolumeDiff, CloseDiff, ATR10, MM20, MM60 using .loc
            df.loc[df.index[row_idx], "ATR_10"] = dfDaily.loc[dfDaily.index[rowDaily_idx], "ATR_10"]
            df.loc[df.index[row_idx], "MM_20"] = dfDaily.loc[dfDaily.index[rowDaily_idx], "MM_20"]
            df.loc[df.index[row_idx], "MM_60"] = dfDaily.loc[dfDaily.index[rowDaily_idx], "MM_60"]
            df.loc[df.index[row_idx], "EMM_20"] = dfDaily.loc[dfDaily.index[rowDaily_idx], "EMM_20"]
            df.loc[df.index[row_idx], "EMM_60"] = dfDaily.loc[dfDaily.index[rowDaily_idx], "EMM_60"]
            df.loc[df.index[row_idx], "RSI"] = dfDaily.loc[dfDaily.index[rowDaily_idx], "RSI"]

            row_idx += 1
        else:
            rowDaily_idx += 1
    progress_bar(curr, 100)
    df["DistanceToMM20"] = ((df["Close"] - df["MM_20"]) / df["MM_20"]) * 100
    df["DistanceToMM60"] = ((df["Close"] - df["MM_60"]) / df["MM_60"]) * 100
    df["DistanceToEMM20"] = ((df["Close"] - df["EMM_20"]) / df["EMM_20"]) * 100
    df["DistanceToEMM60"] = ((df["Close"] - df["EMM_60"]) / df["EMM_60"]) * 100

    df["Close_denoised"] = wavelet_denoise(df.copy())["Close"]
    print(df.isna().sum())
    df = df.tail(-max(df.isna().sum()))
    return df
  else:
    return df

In [3]:
import pandas as pd
panda = pd.read_csv("es_future_final_chronos.csv")
panda.columns

Index(['Datetime', 'Close', 'Volume', 'Week', 'Day', 'Hour', 'ROC_Close',
       'EMROC_Close', 'ROC_Volume', 'EMROC_Volume', 'ATR_10', 'MM_20', 'MM_60',
       'EMM_20', 'EMM_60', 'RSI', 'DistanceToMM20', 'DistanceToMM60',
       'DistanceToEMM20', 'DistanceToEMM60', 'Close_denoised',
       'Close_denoised_normalized', 'Hour_sin', 'Hour_cos', 'Day_sin',
       'Day_cos', 'Week_sin', 'Week_cos', 'Close_denoised_standardized',
       'score', 'APE', 'SIGN', 'Result', 'MATRIX_1', 'MATRIX_2', 'MATRIX_3',
       'MATRIX_4', 'MATRIX_5', 'MATRIX_6', 'MATRIX_7', 'MATRIX_8', 'MATRIX_9',
       'MATRIX_10', 'MATRIX_11', 'MATRIX_12'],
      dtype='object')