# Backtest the strategies

Use an LLM to go through and predict the buy/ sell/ hold recommendation for the company for the given date. Steps needed:

1. Load the LLM - use DeepSeek R1 Qwen model at 7B parameters first and try the quantised models next
2. Step through each data and each financial statement to get a result
3. Log the results in a file and save to S3 (will need a logging file to save to S3 and resume in case of kernel crash)
4. Need a backtesting framework to apply the results


## Load libraries needed

In [1]:
import json
import boto3
from s3fs import S3FileSystem
import os
import datetime

import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from huggingface_hub import login
import torch
from accelerate import Accelerator

import pandas as pd
from IPython.display import Markdown, display
from ipywidgets import IntProgress, Label, HBox

from helper import get_s3_folder
import s3Helpers
import company_data
import prompts
from s3Helpers import S3ModelHelper, Logger
from prompts import SYSTEM_PROMPTS

In [3]:
import importlib
importlib.reload(company_data)
importlib.reload(s3Helpers)
importlib.reload(prompts)

<module 'prompts' from '/project/prompts.py'>

## Load the LLM

Models to test:
- Qwen (Qwen/Qwen2.5-7B-Instruct)
- Llama (meta-llama/Llama-3.2-7B-Instruct)
- DeepSeek (deepseek-ai/DeepSeek-R1-Distill-Qwen-14B)
- DeepSeek Quantized (deepseek-ai/DeepSeek-R1-Distill-Qwen-32B) quantized to 4 bits

In [4]:
# Log into Huggingface

with open('pass.txt') as p:
    hf_login = p.read()
    
hf_login = hf_login[hf_login.find('=')+1:hf_login.find('\n')]
login(hf_login, add_to_git_credential=False)

In [5]:
# Set up Quantization 
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4"

)

In [6]:
# Flag to download from Huggingface again or use stored model
USE_HF = True
USE_QUANTIZATION = True

model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B"
model_id_s3 = 'deepseek14'


if USE_HF:
   
    # pipeline = transformers.pipeline(
    #     "text-generation",
    #     model=model_id,
    #     model_kwargs={"torch_dtype": torch.bfloat16},
    #     device_map="auto",
    # )
    
    if USE_QUANTIZATION:
        model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto', quantization_config=quant_config)
    else:
        model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto', torch_dtype=torch.bfloat16)
    tokenizer = AutoTokenizer.from_pretrained(model_id)
else:
    model_helper = s3Helpers.S3ModelHelper(s3_sub_folder='tmp/fs')
    if USE_QUANTIZATION:
        model = model_helper.load_model(model_id_s3, quant_config)
    else:
        model = model_helper.load_model(model_id_s3)
    tokenizer = AutoTokenizer.from_pretrained(model_id)

    # pipeline = transformers.pipeline(
    #     "text-generation",
    #     model=model,
    #     tokenizer=tokenizer,
    #     model_kwargs={"torch_dtype": torch.bfloat16},
    #     device_map="auto",
    # )
    model_helper.clear_folder(model_id_s3)

print(f"Memory footprint: {model.get_memory_footprint() / 1e9:,.1f} GB")

config.json:   0%|          | 0.00/664 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/48.0k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-000004.safetensors:   0%|          | 0.00/8.71G [00:00<?, ?B/s]

model-00002-of-000004.safetensors:   0%|          | 0.00/8.67G [00:00<?, ?B/s]

model-00003-of-000004.safetensors:   0%|          | 0.00/8.67G [00:00<?, ?B/s]

model-00004-of-000004.safetensors:   0%|          | 0.00/3.49G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/3.07k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

Memory footprint: 9.7 GB


## Load Financial PIT dataset

In [7]:
## Load from S3 using the helper file
filename = 'data_annual_pit_indu.json' #'data_quarterly_pit_indu.json'
sec_helper = company_data.SecurityData('tmp/fs',filename)
all_data = sec_helper.get_all_data()

In [8]:
# USE WHILE DEVELOPING to
importlib.reload(company_data)
sec_helper = company_data.SecurityData('tmp/fs',filename, all_data)

In [9]:
sec_helper.total_securities_in_backtest()

232

In [14]:
all_data['2020-02-18'].keys()

dict_keys(['JNJ UN Equity', 'WMT UN Equity'])

In [15]:
sec_helper.get_security_statement('2020-02-18','JNJ UN Equity','px') #AXP UN Equity2020-04-24

Unnamed: 0_level_0,Price
Date,Unnamed: 1_level_1
2019-02-18,33.33
2019-03-18,33.22
2019-04-18,34.393333
2019-05-18,33.62
2019-06-18,36.55
2019-07-18,38.24
2019-08-18,37.663333
2019-09-18,39.053333
2019-10-18,39.713333
2019-11-18,40.083333


In [10]:
system_prompt = prompts.SYSTEM_PROMPTS['CoT']['prompt']
system_prompt

"You are a financial analyst tasked with analyzing the financial statements of a company to predict the direction of future earnings.Follow the steps below to perform the analysis. 1. Identify notable changes in the balance sheet and income statement. 2. Compute key financial ratios to understand the health of the company. State the formula before calculating. Compute profitability ratios, leverage ratios, liquidity ratios and efficiency ratios. 3. Interpret each of the ratios. 4. Predict the direction of future earnings in JSON format with a clear recommendation and size of the increase or decrease: {'earnings':'INCREASE', 'magnitude':'LARGE'} or {'earnings':'DECREASE','SMALL'} 5. Provide a rational in less than 250 words. Company Financial Statements: "

In [16]:
prompt = sec_helper.get_prompt('2020-02-18','JNJ UN Equity', system_prompt)

In [17]:
tokens = tokenizer.apply_chat_template(prompt, tokenize=True, add_generation_prompt=True)
len(tokens)

6590

## Run an example in LLM

Run into out of memory problem - Potential fixes:
1. reduce size of model (quantize)
2. explore multi-gpu
3. reduce tokens

https://saturncloud.io/blog/how-to-solve-cuda-out-of-memory-error-in-pytorch/

https://huggingface.co/docs/accelerate/usage_guides/distributed_inference

https://medium.com/@geronimo7/llms-multi-gpu-inference-with-accelerate-5a8333e4c5db

Problem with splitting a single prompt into multiple gpus to calculate the result. Tensor parallelism - https://huggingface.co/docs/transformers/main/en/perf_train_gpu_many#tensor-parallelism

nvidia-smi will show available GPUs on the system.

### Run 1
Model used: Llama 3.2 3B Instruct (meta-llama/Llama-3.2-3B-Instruct) 
No quantisation. Run in 5.5 hours on 1 A10G GPU on 896 security/ date combinations. The data is stored in log files in the project for further analysis. The base prompt was used with no chain of thought.

### Run 2
Model used: Deepseek R1 14B Qwen (deepseek-ai/DeepSeek-R1-Distill-Qwen-14B)
No quantisation. Run took over 24 hours to complete on 2 A10G GPU on 896 security/ date combinations. The data is stored in log files in the project for further analysis. The base prompt was used with no chain of thought.

In [18]:
def run_model(prompt, tokenizer, model):
    tokens = tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True)
    model_inputs = tokenizer([tokens], return_tensors='pt').to('cuda')
    generated_ids = model.generate(**model_inputs, pad_token_id=tokenizer.eos_token_id, max_new_tokens=5000)
    parsed_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]
    return tokenizer.batch_decode(parsed_ids, skip_special_tokens=True)[0]



In [19]:
# Time the execution
start_time = datetime.datetime.now()

# Run the model
response = run_model(prompt, tokenizer, model)

#Print the length of time to run
end_time = datetime.datetime.now()
print("Time to execute: ", end_time - start_time)

Time to execute:  0:02:11.944997


In [20]:
# Number of tokens
len(tokenizer.tokenize(response))

894

In [21]:
display(Markdown(response))

Okay, I need to analyze the financial statements of a company to predict the direction of future earnings. Let me go through the steps one by one.

First, I'll look at the income statement. The revenue has been increasing over the past years: from 1.248940e+11 at t-5 to 1.416710e+11 at t. That's a positive sign. The cost of revenue is also increasing but not as fast as revenue, which is good because it means the gross profit is improving. Gross profit went from 3.177800e+10 at t-5 to 3.392300e+10 at t. 

Looking at operating expenses, they've been rising but not as much as revenue. Operating income increased from 4.986000e+9 at t-5 to 5.322000e+9 at t. Non-operating income had some fluctuations, but overall, the net income has been increasing, especially from t-3 to t, showing the company is getting more profitable.

Now, the balance sheet. The company has a lot of cash, around 9.465000e+09 at t, which is good for liquidity. Accounts receivable and inventories are stable, but there's a significant amount in other short-term assets and property, plant, and equipment. The total current assets are solid, and the noncurrent assets are also increasing, which shows growth.

Looking at liabilities, the current liabilities are quite high, especially payables and short-term debt. However, the company's current assets are covering these liabilities, so liquidity shouldn't be a problem. The long-term debt is also increasing, which might be a concern for leverage but seems manageable given the assets.

Now, calculating the financial ratios. Profitability: gross margin is improving, which is good. Operating margin is stable, and net profit margin is increasing, indicating better profitability. Leverage ratio (debt-to-equity) is around 2, which is moderate. Liquidity ratios like current and quick ratios are healthy, meaning the company can cover its short-term obligations. Efficiency ratios like inventory turnover are stable, and asset turnover is improving, showing better use of assets.

Interpreting these ratios: the company is profitable, has good liquidity, and is managing its assets efficiently. The increasing net income and improving margins suggest that future earnings are likely to increase. The company's ability to manage its costs and increase revenue without over-leveraging supports this prediction.

So, I think the earnings will increase, and the magnitude is large because of the consistent growth in revenue and profitability over the past years.
</think>

To predict the direction of future earnings, we analyze the company's financial health through key ratios:

1. **Profitability Ratios**:
   - **Gross Margin**: Improved from 23.7% to 24.0%, indicating better cost control.
   - **Operating Margin**: Stable at ~4.5%, showing consistent operational efficiency.
   - **Net Profit Margin**: Increased from 2.7% to 2.9%, reflecting improved profitability.

2. **Leverage Ratios**:
   - **Debt-to-Equity Ratio**: ~2.0, moderate leverage, indicating a balance between debt and equity financing.

3. **Liquidity Ratios**:
   - **Current Ratio**: ~1.6, healthy liquidity to cover short-term liabilities.
   - **Quick Ratio**: ~1.2, sufficient short-term assets to meet obligations.

4. **Efficiency Ratios**:
   - **Inventory Turnover**: ~3.0, stable inventory management.
   - **Asset Turnover**: ~0.6, improving use of assets to generate revenue.

**Conclusion**: The company's improving profitability, moderate leverage, strong liquidity, and efficient asset management suggest future earnings will **INCREASE** with a **LARGE** magnitude.

```json
{
  "earnings": "INCREASE",
  "magnitude": "LARGE"
}
```

**Rationale**: The company demonstrates consistent revenue growth, improving profitability margins, and effective management of assets and liabilities. These factors support a prediction of a large increase in future earnings.

In [21]:
def format_json(llm_output):
    form = llm_output.replace('\n','')
    # Find the start and end of the JSON input
    soj = form.find('```json')
    eoj = form.find('}```')
    # Pull out the additional context
    additional = form[:soj]
    additional += form[eoj + 4:]
    json_obj = json.loads(form[soj + 7:eoj + 1])
    json_obj['AdditionalContext'] = additional
    return json_obj

In [22]:
response

"Okay, so I'm trying to analyze the financial statements of a company to predict the direction of future earnings. I'm a bit new to this, so I need to break it down step by step. Let me start by understanding the data provided.\n\nFirst, the income statement has several items over the past five years. I notice that revenue has been fluctuating but seems to be decreasing slightly from t-1 to t. Other operating income is also decreasing, which might indicate that the company is earning less from non-core activities. Operating expenses are going down as well, which is a positive sign because it means the company is controlling its costs. However, the operating income or losses are also decreasing, which is concerning because it shows that the company's profitability from operations is declining.\n\nLooking at the balance sheet, the company has a lot of cash and short-term investments, which is good for liquidity. However, long-term investments and receivables are increasing, which could m

In [23]:
format_json(response)

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

## Run the backtest and generate all responses

In [32]:
importlib.reload(s3Helpers)
importlib.reload(company_data)

<module 'company_data' from '/project/company_data.py'>

In [33]:
logger = s3Helpers.Logger('tmp/fs')
def run_backtest(company_info, tokenizer, model, logger, log_at=50, start_count=0):
    # start the timer
    start_time = datetime.datetime.now()
    # get the dates
    dates = company_info.get_dates()
    # set the current date year
    current_year = dates[0][:4]

    # set the array
    year_log = []
    
    # set up the display
    max_count = company_info.total_securities_in_backtest()
    f = IntProgress(min=0, max=max_count) # instantiate the bar
    l = Label(value=str(f.value))
    display(HBox([f,l]))
    
    count = 0

    # run the backtest 
    for date in dates:
        
        securities = company_info.get_securities_reporting_on_date(date)

        for security in securities:
            
            # allow model to start running from a pre-set point
            if count >= start_count:
                
                # Save to S3 every 50 interations to ensure there is a cache
                if count % log_at == 0:
                    # save the file to S3 and reset when it is a new year
                    logger.log(year_log, current_year + str(count) + '.json')
                    # reset the stats
                    year_log = []
                    current_year = date[:4]


                prompt = company_info.get_prompt(date, security, system_prompt)
                response = run_model(prompt, tokenizer, model)
                try:
                    formatted_response = format_json(response)
                    formatted_response['security'] = security
                    formatted_response['date'] = date
                    year_log.append(formatted_response)
                except:
                    print("error with " + security + date)
                    error_json = {'security': security, 'date': date, 'response': response}
                    year_log.append(error_json)
                    
            # Interate along the backtest
            f.value += 1
            count += 1
            l.value = str(count) + "/" + str(max_count)
    
    # Log the last values
    logger.log(year_log, current_year + str(count) + '.json')
    # end the timer
    end_time = datetime.datetime.now()
    print("Completed! Time to execute: ", end_time - start_time)

In [39]:
run_backtest(sec_helper, tokenizer, model, logger, 50, 800)

HBox(children=(IntProgress(value=0, max=896), Label(value='0')))

s3://awmgd-prod-finml-sandbox-user/bclarke16/tmp/fs/logs/2020800.json
Saved s3://awmgd-prod-finml-sandbox-user/bclarke16/tmp/fs/logs/2020800.json
error with XOM UN Equity2024-08-02
error with CVX UN Equity2024-08-02
error with AMGN UW Equity2024-08-06
error with NVDA UQ Equity2024-08-28
error with AXP UN Equity2024-10-18
error with SHW UN Equity2024-10-22
error with IBM UN Equity2024-10-23
error with KO UN Equity2024-10-23
error with DOW UN Equity2024-10-25
error with AMGN UQ Equity2024-10-30
error with CAT UN Equity2024-10-30
error with MSFT UW Equity2024-10-30
s3://awmgd-prod-finml-sandbox-user/bclarke16/tmp/fs/logs/2024850.json
Saved s3://awmgd-prod-finml-sandbox-user/bclarke16/tmp/fs/logs/2024850.json
error with INTC UW Equity2024-10-31
error with CSCO UW Equity2024-11-13
error with CSCO UQ Equity2024-11-13
error with HD UN Equity2024-11-19
error with NVDA UQ Equity2024-11-20
error with JPM UN Equity2025-01-15
error with MSFT UW Equity2025-01-29
error with AAPL UQ Equity2025-01-30


### Concatenate all of the results

In [29]:
logger = s3Helpers.Logger('tmp/fs')
log_list = logger.get_list_of_logs()

In [30]:
logs = logger.create_master_log(save_to_s3=False)

In [142]:
def concat_all_logs():
    log_list = logger.get_list_of_logs()
    logs = []
    for logfile in log_list:
        logs += logger.get_log(logfile[logfile.find('/logs/') + 6:])
    return logs

In [143]:
logs = concat_all_logs()

In [144]:
len(logs)

909

In [37]:
len(logs)

800

## Save the Model

In [7]:
model_helper = S3ModelHelper('tmp/fs')

In [8]:
model.save_pretrained('Data/DeepSeek14Q')

In [9]:
model_helper.save_model_to_s3('Data/DeepSeek14Q','deepseek14Q')

None


In [10]:
model_helper.clear_folder('Data/DeepSeek14Q')

In [57]:
test_json = [{'test':'test'}]

In [58]:
logger.log(test_json, 'test.json')

In [40]:
logger.get_list_of_logs()

['bclarke16/tmp/fs/logs/20200.json',
 'bclarke16/tmp/fs/logs/2020100.json',
 'bclarke16/tmp/fs/logs/2020150.json',
 'bclarke16/tmp/fs/logs/2020200.json',
 'bclarke16/tmp/fs/logs/202050.json',
 'bclarke16/tmp/fs/logs/2020800.json',
 'bclarke16/tmp/fs/logs/2021250.json',
 'bclarke16/tmp/fs/logs/2021300.json',
 'bclarke16/tmp/fs/logs/2021350.json',
 'bclarke16/tmp/fs/logs/2022400.json',
 'bclarke16/tmp/fs/logs/2022450.json',
 'bclarke16/tmp/fs/logs/2022500.json',
 'bclarke16/tmp/fs/logs/2022550.json',
 'bclarke16/tmp/fs/logs/2023600.json',
 'bclarke16/tmp/fs/logs/2023650.json',
 'bclarke16/tmp/fs/logs/2023700.json',
 'bclarke16/tmp/fs/logs/2024750.json',
 'bclarke16/tmp/fs/logs/2024800.json',
 'bclarke16/tmp/fs/logs/2024850.json',
 'bclarke16/tmp/fs/logs/2024896.json']

In [97]:
 d = logger.get_log('202010.json')

In [104]:
len(d)

10

In [28]:
df = sec_helper.get_security_statement('2020-01-31','AON UN Equity','is')

In [42]:
with open('Data/base_deepseek_r2.json', 'w') as file:
    json.dump(logs, file)

In [None]:
# start_time = datetime.datetime.now()
# #formatted_chat = tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True)
# outputs = pipeline(
#     prompt,
#     max_new_tokens=1000,
# )
# end_time = datetime.datetime.now()
# print("Time to execute: ", end_time - start_time)

# test_output = outputs[0]['generated_text'][-1]
# display(Markdown(test_output['content']))