In [30]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import openai
import pickle
import traceback
import dotenv
import logging
import requests
import re

from pathlib import Path
from typing import Optional
from time import sleep, time
from tqdm import tqdm
from datetime import datetime
from openai import OpenAIError
from data.serialize import SerializerSettings
from models.utils import grid_iter
from models.promptcast import get_promptcast_predictions_data
from models.llmtime import get_llmtime_predictions_data
from models.validation_likelihood_tuning import get_autotuned_predictions_data

# Set up API key and environment
# Create a file '.env' in the directory of this notebook and include 'OPENAI_API_KEY=<your key>'
dotenv.load_dotenv(".env", override=True)
openai.api_key = os.environ['OPENAI_API_KEY']

# Run LLMTime

In [31]:
# LLM model name
model_name = 'gpt-3.5-turbo-instruct'
# Experiment starting date
starting_date = pd.Timestamp('2021-11-01')
# Assets to test on 
assets = ['gdaxi', 'ftse', 'cbu', 'SPGTCLTR', 'djci', 'SP500', 'stoxx', 'CRIX', 'cact']
# Window sizes
windows = [30, 45] # 60, 90, 120, 150
# Number of predictions per step
# Uses the 'n' parameter from GPT completions
n = 128
# Number of steps. Total daily predictions = n * steps
steps = 8
# Max retries per step
# Usually no more than 2 - 3 are required
max_retries = 50
# Save results every 'save_interval' days
save_interval = 1
# Output directory. File names are generated automatically with the form '{date}_{asset}_{model_label}_w={window}.csv'
output_directory = 'simulations_test'

# LLMTime model hyperparameters, as outlined in the paper
llm_hypers = dict(
    temp=0.7,
    alpha=0.95,
    beta=0.3,
    basic=False,
    settings=SerializerSettings(base=10, prec=2, signed=True, time_sep=', ', bit_sep='', minus_sign='-')
)

In [32]:
# Must implement below for any other LLM
def get_model_label(model_name):
    if model_name == 'gpt-3.5-turbo-instruct':
        return 'LLMTime GPT-3.5'
    elif model_name == 'gpt-4':
        return 'LLMTime GPT-4'
    elif model_name == 'gpt-4o':
        return 'LLMTime GPT-4o'
    else:
        raise NotImplementedError(f"Unexpected model '{model_name}'.")

def get_model_hypers(model_name):
    model_label = get_model_label(model_name)
    if model_name == 'gpt-3.5-turbo-instruct':
        return {model_label: {'model': 'gpt-3.5-turbo-instruct', **llm_hypers}}
    elif model_name == 'gpt-4':
        return {model_label: {'model': 'gpt-4', **llm_hypers}}
    elif model_name == 'gpt-4o':
        return {model_label: {'model': 'gpt-4o', **llm_hypers}}
    else:
        raise NotImplementedError(f"Unexpected model '{model_name}'.")

def get_model_predict_fns(model_name):
    model_label = get_model_label(model_name)
    if model_name == 'gpt-3.5-turbo-instruct':
        return {model_label: get_llmtime_predictions_data}
    elif model_name == 'gpt-4':
        return {model_label: get_llmtime_predictions_data}
    elif model_name == 'gpt-4o':
        return {model_label: get_llmtime_predictions_data}
    else:
        raise NotImplementedError(f"Unexpected model '{model_name}'.")



In [33]:
def run_analysis(asset, w):
    window = w + steps

    model_hypers = get_model_hypers(model_name)

    model_predict_fns = get_model_predict_fns(model_name)

    model_names = list(model_predict_fns.keys())

    # Delete existing pkl files at the beginning
    results_filename = os.path.join(output_directory, 'rolling_results.pkl')
    time_log_filename = os.path.join(output_directory, 'time_log.pkl')

    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    
    if os.path.exists(results_filename):
        os.remove(results_filename)
    if os.path.exists(time_log_filename):
        os.remove(time_log_filename)

    # Load the DataFrame
    df = pd.read_excel(f"datasets/{asset}.xlsx")
    df['Date'] = pd.to_datetime(df['Date'])

    if df.index.duplicated().any():
        df = df.reset_index(drop=True)

    # Change start date dynamically
    start_index = max(df[df["Date"] == starting_date].index[0] - w - 1, 0)
    df = df.iloc[start_index:]
    print(df.shape)
    # Handle stoxx header
    df = df.rename(columns={"STOXX": "Close"})
    df = df.set_index('Date')
    df['Log_Return'] = np.log(df['Close'] / df['Close'].shift(1))
    df = df[['Log_Return']]
    df = df.dropna()

    def create_logger(filename: str, name: str = None):
        logger = logging.getLogger(name)
    
       # Set the overall logging level
        logger.setLevel(logging.INFO)
    
        if len(logger.handlers) == 0:
            
            # Create handlers
            file_handler = logging.FileHandler(filename)
            stream_handler = logging.StreamHandler()
            
            # Set logging levels for handlers
            file_handler.setLevel(logging.INFO)
            stream_handler.setLevel(logging.INFO)
            
            # Create formatters and add them to handlers
            formatter = logging.Formatter('%(asctime)s | %(filename)s | %(levelname)s | %(message)s')
            file_handler.setFormatter(formatter)
            stream_handler.setFormatter(formatter)
            
            # Add handlers to the logger
            logger.addHandler(file_handler)
            logger.addHandler(stream_handler)
    
        return logger

    def save_results(results, filename=results_filename):
        with open(filename, "wb") as f:
            pickle.dump(results, f)

    def load_results(filename=results_filename):
        try:
            with open(filename, "rb") as f:
                return pickle.load(f)
        except FileNotFoundError:
            return []

    def save_time_log(start_time, filename=time_log_filename):
        with open(filename, "wb") as f:
            pickle.dump(start_time, f)

    def load_time_log(filename=time_log_filename):
        try:
            with open(filename, "rb") as f:
                return pickle.load(f)
        except FileNotFoundError:
            return None

    def get_dataset_DP(df):
        series = df.iloc[:, 0]
        return series

    def get_datasets_DP(ds, ds_name, testfrac=0.2, predict_steps=steps):
        series = get_dataset_DP(ds)
        splitpoint = len(series) - predict_steps if predict_steps is not None else int(len(series) * (1 - testfrac))
        train = series.iloc[:splitpoint]
        test = series.iloc[splitpoint:]
        return {ds_name: (train, test)}

    def rolling_window_emergency(df, logger, date, model):
        """Only used in case of no enough samples generated by LLMTime, which should not happen because of a fix. Predicts only for one model"""
        start = df.reset_index()[df.reset_index()["Date"] == date].index[0]
        end = start + w
        ds = df.iloc[start:end]
        datasets = get_datasets_DP(ds, 'ds', predict_steps=steps)
        ds_name = 'ds'
        train, test = datasets[ds_name]
        random_seed = np.random.randint(0, 100000)
        model_hypers[model].update({'dataset_name': ds_name, 'random_seed': random_seed})
        hypers = list(grid_iter(model_hypers[model]))
        retries = 0
        obtained_samples = []
        while retries < max_retries and len(obtained_samples) < n:
            try:
                pred_dict = get_autotuned_predictions_data(train, test, hypers, n, model_predict_fns[model], verbose=False, parallel=True)
                obtained_samples = pred_dict['samples']
                break
            except OpenAIError as e:
                retries += 1
                sleep(1)
                if retries == max_retries:
                    logger.error(f"Failed to get predictions for {model} after {max_retries} retries.")
                    obtained_samples = pd.DataFrame({date: np.random.normal(size=n)})

        return obtained_samples

    # Main processing function
    def rolling_window_datasets(df, logger, window_length=window, predict_steps=steps):
        rolling_results = load_results()
        start_index = len(rolling_results)
        total_iterations = len(df) - window_length

        for start in range(start_index, total_iterations):
            try:
                logger.info(f"Processing window starting at index {start} of {total_iterations}")
                end = start + window_length
                ds = df.iloc[start:end]
                datasets = get_datasets_DP(ds, 'ds', predict_steps=steps)
                ds_name = 'ds'
                train, test = datasets[ds_name]

                out = {}
                dates = ds.index[-1]
                last_value = ds.iloc[-1]

                for model in model_names:
                    random_seed = np.random.randint(0, 100000)
                    model_hypers[model].update({'dataset_name': ds_name, 'random_seed': random_seed})
                    hypers = list(grid_iter(model_hypers[model]))

                    retries = 0
                    while retries < max_retries:
                        try:
                            pred_dict = get_autotuned_predictions_data(train, test, hypers, n, model_predict_fns[model], verbose=False, parallel=True)
                            out[model] = pred_dict['samples']
                            break
                        except OpenAIError as e:
                            retries += 1
                            sleep(1)
                            if retries == max_retries:
                                logger.error(f"Failed to get predictions for {model} after {max_retries} retries.")
                                out[model] = np.nan

                rolling_results.append((dates, out, last_value))

                if (start - start_index + 1) % save_interval == 0:
                    logger.info(f"Saving intermediate results at index {start}")
                    save_results(rolling_results)

            except Exception as e:
                logger.error(f"Error at index {start}: {e}")
                traceback.print_exc()
                save_results(rolling_results)
                raise e

        save_results(rolling_results)
        return rolling_results

    # Main loop to handle restarting from the last save
    logger = create_logger(name="llmtime_estimation", filename="llmtime.log")
    while True:
        try:
            start_time = time()
            save_time_log(start_time)
            rolling_results = rolling_window_datasets(df, logger)
            end_time = time()
            logger.info(f"Processing time: {end_time - start_time} seconds")

            # Post-processing and output
            prediction_dict = rolling_results
            m = n * steps
            column_names = ['Date'] + [f'Log Return {i+1}' for i in range(m)]
            output = pd.DataFrame(columns=column_names)
        
            # Loop through each item in the predictions dictionary
            for item in prediction_dict:
                date = item[0]  # Extract the date from DatetimeIndex
                data_matrix = item[1].get(get_model_label(model_name), None)  # Extract the DataFrame
        
                if data_matrix is not None and isinstance(data_matrix, pd.DataFrame):
                    # Drop missing values
                    data_matrix.dropna(inplace=True)
                    # Add from existing values if lower than needed
                    if data_matrix.shape[0] < n:
                        if n - data_matrix.shape[0] < data_matrix.shape[0]:
                            data_matrix = pd.concat([data_matrix, data_matrix.sample(n=n - data_matrix.shape[0], axis=0)], axis=0).reset_index()
                        else:
                            logger.warning(f"Did not find enough samples for {date.strftime('%Y-%m-%d')}, will try to predict again.")
                            data_matrix = rolling_window_emergency(df=df, date=date, logger=logger, model=model_names[0])
                    # Flatten the DataFrame to a single list - keep only n values
                    flat_list = data_matrix.values.flatten().tolist()[:n]
                    
                    # Prepend the date to the list
                    row_data = [date] + flat_list
                    
                    # Append this list as a row in the output DataFrame
                    output.loc[len(output)] = row_data
                else:
                    logger.warning(f"Warning: Expected DataFrame at {date} but got {type(data_matrix)}")
        
            # Write DataFrame to Excel
            current_date = datetime.now().strftime("%Y-%m-%d")
            # output_filename = f"{current_date}_{asset}_LLMTime_GPT-3.5_w={w}.xlsx"
            output_filename = os.path.join(output_directory, f"{current_date}_{asset}_{get_model_label(model_name).replace(' ', '_')}_w={w}.csv")
            output.to_csv(output_filename, index=False)
            logger.info(f"Saved result to {output_filename}.")

            os.remove(results_filename)
            os.remove(time_log_filename)
            break
        except Exception as e:
            logger.error(f"An error occurred: {e}. Restarting from last save...")
            sleep(2)

In [None]:
# Running this cell may take a long time
# All logs are stored in 'llmtime.log'
for asset in assets:
    for w in windows:
        run_analysis(asset, w)

# Test supposed GPT-4 and GPT-4o tokenizers (seem to encode digits separately)

In [None]:
from tiktoken._educational import *
import tiktoken

# Train a BPE tokeniser on a small amount of text
# enc = train_simple_encoding()

# Visualise how the GPT-4 encoder encodes text
# enc = SimpleBytePairEncoding.from_tiktoken("gpt-4o-2024-05-13")
# enc = SimpleBytePairEncoding.from_tiktoken("gpt-3.5-turbo-0301")
enc = tiktoken.encoding_for_model("gpt-4")
# enc.encode("hello world aaaaaaaaaaaa")
# enc.decode([15339, 1917])
# enc.decode([24912, 2375, 261, 117525])
# enc.encode("0.1 2 3")
# enc.decode([17, 18])
enc.decode(list(map(int, "356, 405, -467, 494, 258, -534, -252, 905, -1493, -70, -645, 123, -189, 230, 1995, -397, -535, -147, 35, -469, 428, -1119, -1382, -8199, -1382, 492, 459, -584, -832, 2010, -1146, -279, -301, 480, 42, 5245, -1372, -4528, -725, 242, 247, -457, 8, -55, 1255, -855, -75, -591, -3278, 2783, 1146, 1137, 644, -291, -389, 985, 1332, -446, -2276, 280, -119, -1067, 234, -28, 379, 2671, 150, 189, 3597, -2114, -236, 517, -1536, 1853, -188, -115, -1975, -595, -1572, -816, 538, 204, 770, 4003, -736, -294".split(", "))))

In [9]:
import tiktoken

tiktoken.encoding_for_model("gpt-4o")

<Encoding 'o200k_base'>