# Multi-GPU Backtest the strategies

Use an LLM to go through and predict the buy/ sell/ hold recommendation for the company for the given date. Steps needed:

1. Load the LLM - use DeepSeek R1 Qwen model at 7B parameters first and try the quantised models next
2. Step through each data and each financial statement to get a result
3. Log the results in a file and save to S3 (will need a logging file to save to S3 and resume in case of kernel crash)
4. Need a backtesting framework to apply the results


## Load libraries needed

In [1]:
%package install pytorch-gpu torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia

Running: micromamba install pytorch-gpu torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia --yes --quiet --log-level=error

Note: Packages not from Bloomberg channels are not vetted by Bloomberg.
[93mPlease restart the Jupyter kernel if you run into any issues after installing or updating packages via %package.[0m



In [1]:
import json
import boto3
from s3fs import S3FileSystem
import os
import datetime

import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from huggingface_hub import login
import torch
from accelerate import Accelerator, notebook_launcher
from accelerate.utils import gather_object

import pandas as pd
from IPython.display import Markdown, display
from ipywidgets import IntProgress, Label, HBox

from helper import get_s3_folder
import s3Helpers
import company_data
import prompts
from s3Helpers import S3ModelHelper, Logger
from prompts import SYSTEM_PROMPTS

In [2]:
import importlib
importlib.reload(company_data)
importlib.reload(s3Helpers)
importlib.reload(prompts)

<module 'prompts' from '/project/prompts.py'>

In [3]:
torch.cuda.device_count()

4

## Load the LLM

Models to test:
- Qwen (Qwen/Qwen2.5-7B-Instruct)
- Llama (meta-llama/Llama-3.2-7B-Instruct)
- DeepSeek (deepseek-ai/DeepSeek-R1-Distill-Qwen-14B)
- DeepSeek Quantized (deepseek-ai/DeepSeek-R1-Distill-Qwen-32B) quantized to 4 bits

In [4]:
# Log into Huggingface

with open('pass.txt') as p:
    hf_login = p.read()
    
hf_login = hf_login[hf_login.find('=')+1:hf_login.find('\n')]
login(hf_login, add_to_git_credential=False)

In [5]:
# Set up Constants and Quantization 
USE_HF = False
USE_QUANTIZATION = True

model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
model_id_s3 = 'deepseek32'

# Quant configuration
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4"

)

## Define the functions needed for Multi-GPU

In [6]:
# Load the models

def load_model(model_id, model_id_s3, accelerator=None, quant_config=None):
    if USE_HF:

        if USE_QUANTIZATION:
            model = AutoModelForCausalLM.from_pretrained(model_id, device_map={"":accelerator.process_index}, quantization_config=quant_config)
        else:
            model = AutoModelForCausalLM.from_pretrained(model_id, device_map={"":accelerator.process_index}, torch_dtype=torch.bfloat16)
        tokenizer = AutoTokenizer.from_pretrained(model_id)
    else:
        # load the pre-saved model from S3
        model_helper = s3Helpers.S3ModelHelper(s3_sub_folder='tmp/fs')
        model = model_helper.load_model(model_id_s3, accelerator)
        tokenizer = AutoTokenizer.from_pretrained(model_id)
        #model_helper.clear_folder(model_id_s3)

    print(f"Memory footprint: {model.get_memory_footprint() / 1e9:,.1f} GB")
    return model, tokenizer

In [7]:
def create_all_prompts(company_info, system_prompt):
    all_prompts = []
    # Get all the dates
    dates = company_info.get_dates()
    # Loop through each date
    for date in dates:
        # Pull out the securities reporting on that date
        securities = company_info.get_securities_reporting_on_date(date)
        # Loop through the securities
        for security in securities:
            # Calculate the prompt
            prompt = company_info.get_prompt(date, security, system_prompt)
            record = {'security': security, 'date': date, 'prompt': prompt}
            all_prompts.append(record)
    return all_prompts


In [8]:
# Multi-GPU implementation of run model function
def run_model(prompt, tokenizer, model):
    tokens = tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True)
    model_inputs = tokenizer([tokens], return_tensors='pt').to("cuda")
    generated_ids = model.generate(**model_inputs, pad_token_id=tokenizer.eos_token_id, max_new_tokens=5000)
    parsed_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]
    return tokenizer.batch_decode(parsed_ids, skip_special_tokens=True)[0]

In [9]:
def format_json(llm_output):
    form = llm_output.replace('\n','')
    # Find the start and end of the JSON input
    soj = form.find('```json')
    eoj = form.find('}```')
    # Pull out the additional context
    additional = form[:soj]
    additional += form[eoj + 4:]
    json_obj = json.loads(form[soj + 7:eoj + 1])
    json_obj['AdditionalContext'] = additional
    return json_obj

In [10]:
## Load from S3 using the helper file
def get_all_data():
    filename = 'data_annual_pit_indu.json' #'data_quarterly_pit_indu.json'
    sec_helper = company_data.SecurityData('tmp/fs',filename)
    return sec_helper

In [11]:
# Function to run the backtest
def run_backtest(all_prompts, tokenizer, model, logger, accelerator, log_at=50, start_count=0):
    # start the timer
    # sync GPUs and start the timer
    
    start_time = datetime.datetime.now()
    results = []
    count = 0
    
    # set up the display
    max_count = len(all_prompts)
    f = IntProgress(min=0, max=max_count) # instantiate the bar
    l = Label(value=str(f.value))
    display(HBox([f,l]))
    
    with accelerator.split_between_processes(all_prompts) as prompts:
        results=[]
        
        for prompt in prompts:
            start_i = datetime.datetime.now()
            response = run_model(prompt['prompt'], tokenizer, model)
            formatted_response = {}
            formatted_response['response'] = response
            formatted_response['security'] = prompt['security']
            formatted_response['date'] = prompt['date']
            results.append(formatted_response)
            
            # Empty the cache 
            torch.cuda.empty_cache()
            end_i = datetime.datetime.now()
            print(f"Returned in: {end_i - start_i}")
            # Interate along the backtest
            f.value += 1
            count += 1
            l.value = str(count) + "/" + str(max_count)
            
    # gather all of the results into a single object
    results_gathered = gather_object(results)
    # Log the last values
    logger.log(results_gathered, 'results2.json')
    # end the timer
    end_time = datetime.datetime.now()
    print("Completed! Time to execute: ", end_time - start_time)

In [12]:
# This is the entry point for backtest
def run_inference():
    accelerator = Accelerator()
    model, tokenizer = load_model(model_id, model_id_s3, accelerator, quant_config)
    
    accelerator.wait_for_everyone()
    
    # Clear the folder cache
    s3 = s3Helpers.S3ModelHelper('tmp/fs')
    s3.clear_folder(model_id_s3)
    
    company_data = get_all_data()
    
    # set up system prompts
    system_prompt = prompts.SYSTEM_PROMPTS['CoT']['prompt']
    all_prompts = create_all_prompts(company_data, system_prompt)
    
    # Limit for testing
    prompt_limit = all_prompts[:5]
    
    # set up the logger
    logger = s3Helpers.Logger('tmp/fs')
    
    #run the backtest
    run_backtest(prompt_limit, tokenizer, model, logger, accelerator)
    

In [13]:
notebook_launcher(run_inference, num_processes=torch.cuda.device_count())

Launching training on 4 GPUs.


Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


Memory footprint: 18.7 GB


Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Memory footprint: 18.7 GB
Memory footprint: 18.7 GB
Memory footprint: 18.7 GB


HBox(children=(IntProgress(value=0, max=5), Label(value='0')))

HBox(children=(IntProgress(value=0, max=5), Label(value='0')))

HBox(children=(IntProgress(value=0, max=5), Label(value='0')))

HBox(children=(IntProgress(value=0, max=5), Label(value='0')))

Returned in: 0:03:03.871807
Returned in: 0:03:13.778600
Returned in: 0:03:30.236949
Returned in: 0:03:32.933885
Returned in: 0:03:01.687441
s3://awmgd-prod-finml-sandbox-user/bclarke16/tmp/fs/logs/results2.jsons3://awmgd-prod-finml-sandbox-user/bclarke16/tmp/fs/logs/results2.jsons3://awmgd-prod-finml-sandbox-user/bclarke16/tmp/fs/logs/results2.jsons3://awmgd-prod-finml-sandbox-user/bclarke16/tmp/fs/logs/results2.json



Saved s3://awmgd-prod-finml-sandbox-user/bclarke16/tmp/fs/logs/results2.json
Completed! Time to execute: Saved s3://awmgd-prod-finml-sandbox-user/bclarke16/tmp/fs/logs/results2.json 
0:06:05.923523Completed! Time to execute: 
 0:06:05.864179
Saved s3://awmgd-prod-finml-sandbox-user/bclarke16/tmp/fs/logs/results2.json
Completed! Time to execute:  0:06:06.052176
Saved s3://awmgd-prod-finml-sandbox-user/bclarke16/tmp/fs/logs/results2.json
Completed! Time to execute:  0:06:06.306207




### Concatenate all of the results

In [17]:
s3_helper = s3Helpers.S3ModelHelper('tmp/fs')

In [18]:
s3_helper.clear_folder('deepseek32')

In [16]:
logger = s3Helpers.Logger('tmp/fs')
log_list = logger.get_list_of_logs()

In [30]:
logs = logger.create_master_log(save_to_s3=False)

In [142]:
def concat_all_logs():
    log_list = logger.get_list_of_logs()
    logs = []
    for logfile in log_list:
        logs += logger.get_log(logfile[logfile.find('/logs/') + 6:])
    return logs

In [143]:
logs = concat_all_logs()

In [144]:
len(logs)

909

In [21]:
log = logger.get_log('results.json')

In [22]:
with open ('Data/test_run.json', 'w') as f:
    json.dump(log, f)

In [23]:
for l in log:
    print(l['security'])

JNJ UN Equity
WMT UN Equity
NVDA UQ Equity
VZ UN Equity
GS UN Equity


## Multi GPU run

In [14]:
sec_helper = get_all_data()
system_prompt = prompts.SYSTEM_PROMPTS['CoT']['prompt']
all_prompts = create_all_prompts(sec_helper, system_prompt)

In [17]:
all_prompts[:5]

[{'security': 'JNJ UN Equity',
  'date': '2020-02-18',
  'prompt': [{'role': 'system',
    'content': "You are a financial analyst tasked with analyzing the financial statements of a company to predict the direction of future earnings.Follow the steps below to perform the analysis. 1. Identify notable changes in the balance sheet and income statement. 2. Compute key financial ratios to understand the health of the company. State the formula before calculating. Compute profitability ratios, leverage ratios, liquidity ratios and efficiency ratios. 3. Interpret each of the ratios. 4. Predict the direction of future earnings in JSON format with a clear recommendation and size of the increase or decrease: {'earnings':'INCREASE', 'magnitude':'LARGE'} or {'earnings':'DECREASE','SMALL'} 5. Provide a rational in less than 250 words. Company Financial Statements: "},
   {'role': 'user',
    'content': 'Income Statement:                                                        t           t-1      

## Save any model

In [8]:
model_helper = S3ModelHelper('tmp/fs')

In [9]:
model_helper.delete_model_in_s3('deepseek32')

bclarke16/tmp/fs/deepseek32/config.json
bclarke16/tmp/fs/deepseek32/generation_config.json
bclarke16/tmp/fs/deepseek32/model-00001-of-00004.safetensors
bclarke16/tmp/fs/deepseek32/model-00002-of-00004.safetensors
bclarke16/tmp/fs/deepseek32/model-00003-of-00004.safetensors
bclarke16/tmp/fs/deepseek32/model-00004-of-00004.safetensors
bclarke16/tmp/fs/deepseek32/model.safetensors.index.json
Files deleted in S3


In [10]:
model.save_pretrained('Data/DeepSeek32')

In [11]:
model_helper.save_model_to_s3('Data/DeepSeek32','deepseek32')

None


In [15]:
model_helper.clear_folder('Data/DeepSeek32')

In [None]:
# start_time = datetime.datetime.now()
# #formatted_chat = tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True)
# outputs = pipeline(
#     prompt,
#     max_new_tokens=1000,
# )
# end_time = datetime.datetime.now()
# print("Time to execute: ", end_time - start_time)

# test_output = outputs[0]['generated_text'][-1]
# display(Markdown(test_output['content']))