## Fine-tune a model to write code to trade stocks
[notebook source](https://colab.research.google.com/drive/1wry2-4AGw-U7K0LQ_jEgduoTQqVIvo1x?usp=sharing#scrollTo=GHsssBgWM_l0)

### An example code generator by fine-tuning Qwen1.5 using QLoRA

### This needs to be run on a T4 runtime (or better)

NOTE: This is a toy example to illustrate the technique – please don’t use
any of this code to make trading decisions!

Previously, we created a dataset and uploaded it to HuggingFace. Now we download the dataset and use it to fine-tune Qwen1.5 using QLoRA.

We'll see what kind of trade() functions our model can create before and after training.

In [None]:
# pip installs

!pip install -q requests torch peft bitsandbytes transformers trl accelerate sentencepiece wandb #2.31.0

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m122.4/122.4 MB[0m [31m16.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m310.9/310.9 kB[0m [31m15.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m26.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m179.3/179.3 kB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the f

In [None]:
# imports

import os
from google.colab import userdata
from huggingface_hub import login
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TextStreamer, TrainingArguments
from datasets import load_dataset, Dataset
import wandb
from peft import LoraConfig
from trl import SFTTrainer, SFTConfig
from peft import PeftModel
from datetime import datetime

# Bringing in the data

If you generated training data yourself, you should update the DATASET_NAME below to point to your data.

If you'd like to use mine, leave it as is.

You may get better results if you craft a more comprehensive training dataset!

In [None]:
# Constants

BASE_MODEL = "Qwen/CodeQwen1.5-7B"
PROJECT_NAME = "trading"
RUN_NAME =  f"{datetime.now():%Y-%m-%d_%H.%M.%S}"
PROJECT_RUN_NAME = f"{PROJECT_NAME}-{RUN_NAME}"
DATASET_NAME = "ed-donner/trade_code_data"

# Hyperparameters for QLoRA Fine-Tuning

EPOCHS = 1
LORA_ALPHA = 32
LORA_R = 16
LORA_DROPOUT = 0.1
BATCH_SIZE = 1
GRADIENT_ACCUMULATION_STEPS = 1
LEARNING_RATE = 2e-4
LR_SCHEDULER_TYPE = 'cosine'
WEIGHT_DECAY = 0.001
TARGET_MODULES = ["q_proj", "v_proj", "k_proj", "o_proj"]
MAX_SEQUENCE_LENGTH = 320

# Other config

STEPS = 10
SAVE_STEPS = 300

### Log in to HuggingFace and Weights & Biases

If you don't already have a HuggingFace account, visit https://huggingface.co to sign up and create a token.

Then select the Secrets for this Notebook by clicking on the key icon in the left, and add a new secret called `HF_TOKEN` with the value as your token.

Repeat this for weightsandbiases at https://wandb.ai and add a secret called `WANDB_API_KEY`

In [None]:
# Log in to HuggingFace

hf_token = userdata.get('HF_TOKEN')
login(hf_token, add_to_git_credential=True)

In [None]:
# Log in to Weights & Biases
wandb_api_key = userdata.get('WANDB_API_KEY')
os.environ["WANDB_API_KEY"] = wandb_api_key
wandb.login()

# Configure Weights & Biases to record against our project
os.environ["WANDB_PROJECT"] = PROJECT_NAME
os.environ["WANDB_LOG_MODEL"] = "end"
os.environ["WANDB_WATCH"] = "false"

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33med-donner[0m. Use [1m`wandb login --relogin`[0m to force relogin


## Now load the Tokenizer and Model

In [None]:
# Load the Tokenizer and the Model

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

quant_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_use_double_quant=True)

base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    quantization_config=quant_config,
    device_map="auto",
)
base_model.generation_config.pad_token_id = tokenizer.pad_token_id

print(f"Memory footprint: {base_model.get_memory_footprint() / 1e6:.1f} MB")

tokenizer_config.json:   0%|          | 0.00/619 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/4.46M [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/1.42M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/702 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/31.7k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/3.89G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/3.95G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/3.95G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/2.71G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/213 [00:00<?, ?B/s]



Memory footprint: 4761.4 MB


## Let's try out the model before we do fine-tuning

In [None]:
prompt = """
# tickers is a list of stock tickers
import tickers

# prices is a dict; the key is a ticker and the value is a list of historic prices, today first
import prices

# Trade represents a decision to buy or sell a quantity of a ticker
import Trade

import random
import numpy as np

def trade():
"""

In [None]:
from transformers import TextStreamer
streamer = TextStreamer(tokenizer)

inputs = tokenizer.encode(prompt, return_tensors="pt").to("cuda")
outputs = base_model.generate(inputs, max_new_tokens=100, streamer=streamer)

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.



# tickers is a list of stock tickers
import tickers

# prices is a dict; the key is a ticker and the value is a list of historic prices, today first
import prices

# Trade represents a decision to buy or sell a quantity of a ticker
import Trade

import random
import numpy as np

def trade():




    # random.choice(tickers)
    # random.randint(0, 100)
    # random.randint(0, 100)
    # random.randint(0, 100)
    # random.randint(0_ 100)
    # random.randint(0_  100)
    # random.randint(0_  100)
    # random.randint(0_


In [None]:
# Load our dataset
dataset = load_dataset(DATASET_NAME)['train']
dataset

README.md:   0%|          | 0.00/269 [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/37.5k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/242 [00:00<?, ? examples/s]

Dataset({
    features: ['text'],
    num_rows: 242
})

In [None]:
# First, specify the configuration parameters for LoRA

peft_parameters = LoraConfig(
    lora_alpha=LORA_ALPHA,
    lora_dropout=LORA_DROPOUT,
    r=LORA_R,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=TARGET_MODULES,
)

# Next, specify the general configuration parameters for training

train_params = SFTConfig(
    output_dir=PROJECT_RUN_NAME,
    num_train_epochs=EPOCHS,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=1,
    eval_strategy="no",
    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
    optim="paged_adamw_32bit",
    save_steps=SAVE_STEPS,
    save_total_limit=10,
    logging_steps=STEPS,
    learning_rate=LEARNING_RATE,
    weight_decay=WEIGHT_DECAY,
    # fp16=False,
    # bf16=True,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type=LR_SCHEDULER_TYPE,
    report_to="wandb",
    run_name=RUN_NAME,
    max_seq_length=MAX_SEQUENCE_LENGTH,
    dataset_text_field="text",
)

# And now, the Supervised Fine Tuning Trainer will carry out the fine-tuning
# Given these 2 sets of configuration parameters

fine_tuning = SFTTrainer(
    model=base_model,
    train_dataset=dataset,
    peft_config=peft_parameters,
    tokenizer=tokenizer,
    args=train_params
)

# Fine-tune!
fine_tuning.train()

# Push our fine-tuned model to Hugging Face
fine_tuning.model.push_to_hub(PROJECT_RUN_NAME, private=True)

Map:   0%|          | 0/242 [00:00<?, ? examples/s]

Step,Training Loss
10,1.1719
20,0.6262
30,0.3106
40,0.3773
50,0.1635
60,0.3642
70,0.2388
80,0.2417
90,0.2504
100,0.1466


adapter_model.safetensors:   0%|          | 0.00/52.5M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/ed-donner/trading-2024-11-21_17.41.58/commit/20048c80fb28334b928bc6092b9f073a5d79b4df', commit_message='Upload model', commit_description='', oid='20048c80fb28334b928bc6092b9f073a5d79b4df', pr_url=None, repo_url=RepoUrl('https://huggingface.co/ed-donner/trading-2024-11-21_17.41.58', endpoint='https://huggingface.co', repo_type='model', repo_id='ed-donner/trading-2024-11-21_17.41.58'), pr_revision=None, pr_num=None)

In [None]:
fine_tuned_model = PeftModel.from_pretrained(base_model, PROJECT_RUN_NAME)

In [None]:
# Code up a trade

inputs = tokenizer.encode(prompt, return_tensors="pt").to("cuda")
outputs = fine_tuned_model.generate(inputs, max_new_tokens=120, streamer=streamer)


# tickers is a list of stock tickers
import tickers

# prices is a dict; the key is a ticker and the value is a list of historic prices, today first
import prices

# Trade represents a decision to buy or sell a quantity of a ticker
import Trade

import random
import numpy as np

def trade():




    # Sell stocks that have shown a downward trend in the last 5 days
    downward_trend_tickers = [ticker for ticker in tickers if prices[ticker][0] < prices[ticker][4]]
    trades = [Trade(ticker, -100) for ticker in random.sample(downward_trend_tickers, min(3, len(downward_trend_tickers)))]
    return trades

    # Buy stocks that have shown a upward trend in


In [None]:
# Another! Use do_sample and temperature to make this more creative:

outputs = fine_tuned_model.generate(inputs, max_new_tokens=180, do_sample=True, temperature=0.8, streamer=streamer)


# tickers is a list of stock tickers
import tickers

# prices is a dict; the key is a ticker and the value is a list of historic prices, today first
import prices

# Trade represents a decision to buy or sell a quantity of a ticker
import Trade

import random
import numpy as np

def trade():
    # Buy if the current price is higher than the previous day's price
    trades = []
    for ticker in tickers:
        if prices[ticker][0] > prices[ticker][1]:
            quantity = random.randrange(1, 100)
            trades.append(Trade(ticker, quantity))
    return trades

import random
import numpy as np

def trade():
    # Buy stocks that have been trading lower than the lower low for the last 10 days
    trades = []
    for ticker in tickers:
        lower_low = min(prices[ticker][:10])
        if prices[ticker][0] < lower_low:
            quantity = random.randrange(1, 100)
            tr


## That's the example of QLoRA Fine Tuning to write code to carry out a specific function (but don't actually use this for trading!)