# Multi-GPU Backtest the strategies

Use an LLM to go through and predict the buy/ sell/ hold recommendation for the company for the given date. Steps needed:

1. Load the LLM - use DeepSeek R1 Qwen model at 7B parameters first and try the quantised models next
2. Step through each data and each financial statement to get a result
3. Log the results in a file and save to S3 (will need a logging file to save to S3 and resume in case of kernel crash)
4. Need a backtesting framework to apply the results


## Load libraries needed

In [1]:
%package install pytorch-gpu torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia

Running: micromamba install pytorch-gpu torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia --yes --quiet --log-level=error

Note: Packages not from Bloomberg channels are not vetted by Bloomberg.
[93mPlease restart the Jupyter kernel if you run into any issues after installing or updating packages via %package.[0m



In [1]:
import json
import boto3
from s3fs import S3FileSystem
import os
import datetime

import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from huggingface_hub import login
import torch
from accelerate import Accelerator

import pandas as pd
from IPython.display import Markdown, display
from ipywidgets import IntProgress, Label, HBox

from helper import get_s3_folder
import s3Helpers
import company_data
import prompts
from s3Helpers import S3ModelHelper, Logger
from prompts import SYSTEM_PROMPTS

In [2]:
import importlib
importlib.reload(company_data)
importlib.reload(s3Helpers)
importlib.reload(prompts)

<module 'prompts' from '/project/prompts.py'>

In [3]:
torch.cuda.device_count()

4

## Load the LLM

Models to test:
- Qwen (Qwen/Qwen2.5-7B-Instruct)
- Llama (meta-llama/Llama-3.2-7B-Instruct)
- DeepSeek (deepseek-ai/DeepSeek-R1-Distill-Qwen-14B)
- DeepSeek Quantized (deepseek-ai/DeepSeek-R1-Distill-Qwen-32B) quantized to 4 bits

In [4]:
# Log into Huggingface

with open('pass.txt') as p:
    hf_login = p.read()
    
hf_login = hf_login[hf_login.find('=')+1:hf_login.find('\n')]
login(hf_login, add_to_git_credential=False)

In [5]:
# Set up Quantization 
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4"

)

In [6]:
accelerator = Accelerator()

In [7]:
# Flag to download from Huggingface again or use stored model
USE_HF = True
USE_QUANTIZATION = True

model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
model_id_s3 = 'deepseek32'


if USE_HF:
    
    if USE_QUANTIZATION:
        model = AutoModelForCausalLM.from_pretrained(model_id, device_map={"":accelerator.process_index}, quantization_config=quant_config)
    else:
        model = AutoModelForCausalLM.from_pretrained(model_id, device_map={"":accelerator.process_index}, torch_dtype=torch.bfloat16)
    tokenizer = AutoTokenizer.from_pretrained(model_id)
else:
    # load the pre-saved model from S3
    model_helper = s3Helpers.S3ModelHelper(s3_sub_folder='tmp/fs')
    model = model_helper.load_model(model_id_s3, accelerator)
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model_helper.clear_folder(model_id_s3)

print(f"Memory footprint: {model.get_memory_footprint() / 1e9:,.1f} GB")

config.json:   0%|          | 0.00/664 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/64.0k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/8 [00:00<?, ?it/s]

model-00001-of-000008.safetensors:   0%|          | 0.00/8.79G [00:00<?, ?B/s]

model-00002-of-000008.safetensors:   0%|          | 0.00/8.78G [00:00<?, ?B/s]

model-00003-of-000008.safetensors:   0%|          | 0.00/8.78G [00:00<?, ?B/s]

model-00004-of-000008.safetensors:   0%|          | 0.00/8.78G [00:00<?, ?B/s]

model-00005-of-000008.safetensors:   0%|          | 0.00/8.78G [00:00<?, ?B/s]

model-00006-of-000008.safetensors:   0%|          | 0.00/8.78G [00:00<?, ?B/s]

model-00007-of-000008.safetensors:   0%|          | 0.00/8.78G [00:00<?, ?B/s]

model-00008-of-000008.safetensors:   0%|          | 0.00/4.07G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/3.07k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

Memory footprint: 18.7 GB


## Load Financial PIT dataset

In [12]:
## Load from S3 using the helper file
filename = 'data_annual_pit_indu.json' #'data_quarterly_pit_indu.json'
sec_helper = company_data.SecurityData('tmp/fs',filename)
all_data = sec_helper.get_all_data()

In [13]:
# USE WHILE DEVELOPING to
importlib.reload(company_data)
sec_helper = company_data.SecurityData('tmp/fs',filename, all_data)

In [14]:
sec_helper.total_securities_in_backtest()

232

In [4]:
# make this multi-GPU
def run_model(prompt, tokenizer, model):
    tokens = tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True)
    model_inputs = tokenizer([tokens], return_tensors='pt').to('cuda')
    generated_ids = model.generate(**model_inputs, pad_token_id=tokenizer.eos_token_id, max_new_tokens=5000)
    parsed_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]
    return tokenizer.batch_decode(parsed_ids, skip_special_tokens=True)[0]



In [21]:
def format_json(llm_output):
    form = llm_output.replace('\n','')
    # Find the start and end of the JSON input
    soj = form.find('```json')
    eoj = form.find('}```')
    # Pull out the additional context
    additional = form[:soj]
    additional += form[eoj + 4:]
    json_obj = json.loads(form[soj + 7:eoj + 1])
    json_obj['AdditionalContext'] = additional
    return json_obj

## Run the backtest and generate all responses

In [32]:
importlib.reload(s3Helpers)
importlib.reload(company_data)

<module 'company_data' from '/project/company_data.py'>

In [33]:
logger = s3Helpers.Logger('tmp/fs')
def run_backtest(company_info, tokenizer, model, logger, log_at=50, start_count=0):
    # start the timer
    start_time = datetime.datetime.now()
    # get the dates
    dates = company_info.get_dates()
    # set the current date year
    current_year = dates[0][:4]

    # set the array
    year_log = []
    
    # set up the display
    max_count = company_info.total_securities_in_backtest()
    f = IntProgress(min=0, max=max_count) # instantiate the bar
    l = Label(value=str(f.value))
    display(HBox([f,l]))
    
    count = 0

    # run the backtest 
    for date in dates:
        
        securities = company_info.get_securities_reporting_on_date(date)

        for security in securities:
            
            # allow model to start running from a pre-set point
            if count >= start_count:
                
                # Save to S3 every 50 interations to ensure there is a cache
                if count % log_at == 0:
                    # save the file to S3 and reset when it is a new year
                    logger.log(year_log, current_year + str(count) + '.json')
                    # reset the stats
                    year_log = []
                    current_year = date[:4]


                prompt = company_info.get_prompt(date, security, system_prompt)
                response = run_model(prompt, tokenizer, model)
                try:
                    formatted_response = format_json(response)
                    formatted_response['security'] = security
                    formatted_response['date'] = date
                    year_log.append(formatted_response)
                except:
                    print("error with " + security + date)
                    error_json = {'security': security, 'date': date, 'response': response}
                    year_log.append(error_json)
                    
            # Interate along the backtest
            f.value += 1
            count += 1
            l.value = str(count) + "/" + str(max_count)
    
    # Log the last values
    logger.log(year_log, current_year + str(count) + '.json')
    # end the timer
    end_time = datetime.datetime.now()
    print("Completed! Time to execute: ", end_time - start_time)

In [39]:
run_backtest(sec_helper, tokenizer, model, logger, 50, 800)

HBox(children=(IntProgress(value=0, max=896), Label(value='0')))

s3://awmgd-prod-finml-sandbox-user/bclarke16/tmp/fs/logs/2020800.json
Saved s3://awmgd-prod-finml-sandbox-user/bclarke16/tmp/fs/logs/2020800.json
error with XOM UN Equity2024-08-02
error with CVX UN Equity2024-08-02
error with AMGN UW Equity2024-08-06
error with NVDA UQ Equity2024-08-28
error with AXP UN Equity2024-10-18
error with SHW UN Equity2024-10-22
error with IBM UN Equity2024-10-23
error with KO UN Equity2024-10-23
error with DOW UN Equity2024-10-25
error with AMGN UQ Equity2024-10-30
error with CAT UN Equity2024-10-30
error with MSFT UW Equity2024-10-30
s3://awmgd-prod-finml-sandbox-user/bclarke16/tmp/fs/logs/2024850.json
Saved s3://awmgd-prod-finml-sandbox-user/bclarke16/tmp/fs/logs/2024850.json
error with INTC UW Equity2024-10-31
error with CSCO UW Equity2024-11-13
error with CSCO UQ Equity2024-11-13
error with HD UN Equity2024-11-19
error with NVDA UQ Equity2024-11-20
error with JPM UN Equity2025-01-15
error with MSFT UW Equity2025-01-29
error with AAPL UQ Equity2025-01-30


### Concatenate all of the results

In [29]:
logger = s3Helpers.Logger('tmp/fs')
log_list = logger.get_list_of_logs()

In [30]:
logs = logger.create_master_log(save_to_s3=False)

In [142]:
def concat_all_logs():
    log_list = logger.get_list_of_logs()
    logs = []
    for logfile in log_list:
        logs += logger.get_log(logfile[logfile.find('/logs/') + 6:])
    return logs

In [143]:
logs = concat_all_logs()

In [144]:
len(logs)

909

In [37]:
len(logs)

800

## Multi GPU run

In [57]:
test_json = [{'test':'test'}]

In [58]:
logger.log(test_json, 'test.json')

In [40]:
logger.get_list_of_logs()

['bclarke16/tmp/fs/logs/20200.json',
 'bclarke16/tmp/fs/logs/2020100.json',
 'bclarke16/tmp/fs/logs/2020150.json',
 'bclarke16/tmp/fs/logs/2020200.json',
 'bclarke16/tmp/fs/logs/202050.json',
 'bclarke16/tmp/fs/logs/2020800.json',
 'bclarke16/tmp/fs/logs/2021250.json',
 'bclarke16/tmp/fs/logs/2021300.json',
 'bclarke16/tmp/fs/logs/2021350.json',
 'bclarke16/tmp/fs/logs/2022400.json',
 'bclarke16/tmp/fs/logs/2022450.json',
 'bclarke16/tmp/fs/logs/2022500.json',
 'bclarke16/tmp/fs/logs/2022550.json',
 'bclarke16/tmp/fs/logs/2023600.json',
 'bclarke16/tmp/fs/logs/2023650.json',
 'bclarke16/tmp/fs/logs/2023700.json',
 'bclarke16/tmp/fs/logs/2024750.json',
 'bclarke16/tmp/fs/logs/2024800.json',
 'bclarke16/tmp/fs/logs/2024850.json',
 'bclarke16/tmp/fs/logs/2024896.json']

In [97]:
 d = logger.get_log('202010.json')

In [104]:
len(d)

10

In [28]:
df = sec_helper.get_security_statement('2020-01-31','AON UN Equity','is')

In [42]:
with open('Data/base_deepseek_r2.json', 'w') as file:
    json.dump(logs, file)

## Save any model

In [8]:
model_helper = S3ModelHelper('tmp/fs')

In [9]:
model_helper.delete_model_in_s3('deepseek32')

bclarke16/tmp/fs/deepseek32/config.json
bclarke16/tmp/fs/deepseek32/generation_config.json
bclarke16/tmp/fs/deepseek32/model-00001-of-00004.safetensors
bclarke16/tmp/fs/deepseek32/model-00002-of-00004.safetensors
bclarke16/tmp/fs/deepseek32/model-00003-of-00004.safetensors
bclarke16/tmp/fs/deepseek32/model-00004-of-00004.safetensors
bclarke16/tmp/fs/deepseek32/model.safetensors.index.json
Files deleted in S3


In [10]:
model.save_pretrained('Data/DeepSeek32')

In [11]:
model_helper.save_model_to_s3('Data/DeepSeek32','deepseek32')

None


In [15]:
model_helper.clear_folder('Data/DeepSeek32')

In [None]:
# start_time = datetime.datetime.now()
# #formatted_chat = tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True)
# outputs = pipeline(
#     prompt,
#     max_new_tokens=1000,
# )
# end_time = datetime.datetime.now()
# print("Time to execute: ", end_time - start_time)

# test_output = outputs[0]['generated_text'][-1]
# display(Markdown(test_output['content']))