# Llama 3 fine-tuning for finance prediction

In [1]:
!pip install transformers bitsandbytes accelerate peft --quiet

In [2]:
!pip install datasets --quiet

In [None]:
!pip install finnhub-python yfinance --quiet

In [3]:
import os
import re
import csv
import math
import time 
import json
import random
import pandas as pd
from tqdm import tqdm
from functools import partial
from datetime import datetime
from collections import defaultdict

In [4]:
import datasets
from datasets import Dataset

In [5]:
import torch
from torch.optim import AdamW
from transformers import (
    AutoTokenizer, AutoModelForCausalLM, AutoConfig, BitsAndBytesConfig,
    GenerationConfig, pipeline, Trainer, TrainingArguments, DataCollatorForSeq2Seq
)

from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model, PeftModel, PeftConfig

2024-08-20 19:36:13.736275: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-20 19:36:13.736420: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-20 19:36:13.903872: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [6]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()

In [7]:
os.environ["HF_TOKEN"] = user_secrets.get_secret("HF_TOKEN")

## Collect data

In [None]:
import finnhub
import yfinance as yf

### Get news and stocks of companies

In [None]:
finnhub_client = finnhub.Client(api_key=user_secrets.get_secret("FINNHUB_API_KEY"))

In [None]:
def bin_mapping(ret):
    up_down = 'U' if ret >= 0 else 'D'
    integer = math.ceil(abs(100 * ret))
    return up_down + (str(integer) if integer <= 5 else '5+')

In [None]:
def get_returns(stock_ticker, start_date, end_date):
    stock_data = yf.download(stock_ticker, start=start_date, end=end_date)

    weekly_data = stock_data['Adj Close'].resample('W').ffill()
    weekly_returns = weekly_data.pct_change()[1:]
    weekly_start_prices = weekly_data[:-1]
    weekly_end_prices = weekly_data[1:]

    weekly_data = pd.DataFrame({
        'start_date': weekly_start_prices.index,
        'start_price': weekly_start_prices.values,
        'end_date': weekly_end_prices.index,
        'end_price': weekly_end_prices.values,
        'weekly_returns': weekly_returns.values
    })

    weekly_data['bin_label'] = weekly_data['weekly_returns'].map(bin_mapping)

    return weekly_data

In [None]:
def get_news(ticker, data):
    news_list = []

    for _, row in data.iterrows():
        start_date = row['start_date'].strftime('%Y-%m-%d')
        end_date = row['end_date'].strftime('%Y-%m-%d')
        time.sleep(1) # control qpm
        weekly_news = finnhub_client.company_news(ticker, _from=start_date, to=end_date)
        weekly_news = [
            {
                "date": datetime.fromtimestamp(n['datetime']).strftime('%Y%m%d%H%M%S'),
                "headline": n['headline'],
                "summary": n['summary'],
            } for n in weekly_news
        ]
        weekly_news.sort(key=lambda x: x['date'])
        news_list.append(json.dumps(weekly_news))

    data['news'] = news_list

    return data

In [None]:
def get_basics(ticker, data, start_date, always=False):
    basic_financials = finnhub_client.company_basic_financials(ticker, 'all')

    final_basics, basic_list, basic_dict = [], [], defaultdict(dict)

    for metric, value_list in basic_financials['series']['quarterly'].items():
        for value in value_list:
            basic_dict[value['period']].update({metric: value['v']})

    for k, v in basic_dict.items():
        v.update({'period': k})
        basic_list.append(v)

    basic_list.sort(key=lambda x: x['period'])

    for i, row in data.iterrows():

        start_date = row['end_date'].strftime('%Y-%m-%d')
        last_start_date = start_date if i < 2 else data.loc[i-2, 'start_date'].strftime('%Y-%m-%d')

        used_basic = {}
        for basic in basic_list[::-1]:
            if (always and basic['period'] < start_date) or (last_start_date <= basic['period'] < start_date):
                used_basic = basic
                break
        final_basics.append(json.dumps(used_basic))

    data['basics'] = final_basics

    return data

In [None]:
def prep_data_for_ticker(ticker, data_dir, start_date, end_date):

    _ = get_returns(ticker, start_date, end_date)
    data = get_news(ticker, _)

    data = get_basics(ticker, data, start_date)
    data.to_csv(f"{data_dir}/{ticker}_{start_date}_{end_date}.csv")

### CSV

In [None]:
def append_to_csv(filename, input_data, output_data):
    with open(filename, mode='a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow([input_data, output_data])

In [None]:
def initialize_csv(filename):
    with open(filename, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["prompt", "answer"])

### Prompts

In [None]:
def create_company_profile(ticker):
    profile = finnhub_client.company_profile2(symbol=ticker)
    company_template = "[Company Introduction]:\n\n{name} is a leading entity in the {finnhubIndustry} sector. " \
                      "Incorporated and publicly traded since {ipo}, the company has established its reputation " \
                      "as one of the key players in the market. \n\n{name} operates primarily in the {country}, " \
                      "trading under the ticker {ticker} on the {exchange}. As a dominant force in the {finnhubIndustry} space, " \
                      "the company continues to innovate and drive progress within the industry."

    formatted_str = company_template.format(**profile)

    return formatted_str

In [None]:
def map_bin_label(bin_lb):
    lb = bin_lb.replace('U', 'up by ')
    lb = lb.replace('D', 'down by ')
    lb = lb.replace('1', '0-1%')
    lb = lb.replace('2', '1-2%')
    lb = lb.replace('3', '2-3%')
    lb = lb.replace('4', '3-4%')
    if lb.endswith('+'):
        lb = lb.replace('5+', 'more than 5%')
    else:
        lb = lb.replace('5', '4-5%')

    return lb

In [None]:
def sample_news(news, n=5):
    if not 0 <= n <= len(news):
        raise ValueError(f"Bad N")
    sampled_indices = random.sample(range(len(news)), n)
    return [news[i] for i in sampled_indices]

In [None]:
def get_prompt_by_row(ticker, row):

    start_date = row['start_date'].strftime('%Y-%m-%d') if isinstance(row['start_date'], datetime) else str(row['start_date'])
    end_date = row['end_date'].strftime('%Y-%m-%d') if isinstance(row['end_date'], datetime) else str(row['end_date'])

    term = 'increased' if row['end_price'] > row['start_price'] else 'decreased'
    head = f"From {start_date} to {end_date}, {ticker}'s stock price {term} " \
           f"from {row['start_price']:.2f} to {row['end_price']:.2f}. News during this period are listed below:\n\n"

    news = json.loads(row["news"])
    news = [f"[Headline]: {n['headline']}\n[Summary]: {n['summary']}\n"
            for n in news
            if n['date'][:8] <= end_date.replace('-', '')
            and not n['summary'].startswith("Looking for stock market analysis and research with proves results?")]

    basics = json.loads(row['basics'])
    if not basics:
        basics_str = "[Basic Financials]:\n\nNo basic financial reported."
    else:
        basics_str = f"Some recent basic financials of {ticker}, reported at {basics['period']}, are presented below:\n\n[Basic Financials]:\n\n"
        basics_str += "\n".join(f"{k}: {v}" for k, v in basics.items() if k != 'period')

    return head, news, basics_str

In [None]:
def build_prompt(ticker, row, prev_rows, max_weeks, info_prompt):
    prompt = ""
    if prev_rows:
        num_prev_rows = min(random.choice(range(1, max_weeks+1)), len(prev_rows))
        for i in range(-num_prev_rows, 0):
            prompt += f"\n{prev_rows[i][0]}"  # price movement (top of a period)
            sampled_news = sample_news(prev_rows[i][1], min(5, len(prev_rows[i][1])))
            if sampled_news:
                prompt += "\n".join(sampled_news)
            else:
                prompt += "\nNo relative news reported."

    head, news, basics = get_prompt_by_row(ticker, row)
    prev_rows.append((head, news, basics))

    if len(prev_rows) > max_weeks:
        prev_rows.pop(0)

    if not prompt:
        return "", prev_rows

    prediction = map_bin_label(row['bin_label'])
    prompt = f"{info_prompt}\n{prompt}\n{basics}"

    instruction_prompt = "\n\nBased on all the information before {start_date}, let's first analyze the positive developments and potential concerns for {ticker}. " \
                          "Come up with 2-4 most important factors respectively and keep them concise. Most factors should be inferred from company related news. " \
                          "Then let's assume your prediction for next week ({start_date} to {end_date}) is {prediction}. " \
                          "Provide a summary analysis to support your prediction. The prediction result need to be inferred from your analysis at the end, " \
                          "and thus not appearing as a foundational factor of your analysis."

    prompt += instruction_prompt.format(
        start_date=row['start_date'],
        end_date=row['end_date'],
        ticker=ticker,
        prediction=prediction,
    )

    return prompt.strip(), prev_rows

In [None]:
def create_prompts(ticker, data_dir, start_date, end_date, max_weeks):
    df = pd.read_csv(f'{data_dir}/{ticker}_{start_date}_{end_date}.csv')

    info_prompt = create_company_profile(ticker)
    prev_rows = []
    all_prompts = []

    for _, row in df.iterrows():
        prompt, prev_rows = build_prompt(ticker, row, prev_rows, max_weeks, info_prompt)
        if prompt:
            all_prompts.append(prompt)

    return all_prompts

### Llama3

In [15]:
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

In [17]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_id)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/187 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/51.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [None]:
generation_config = GenerationConfig.from_pretrained(model_id)
generation_config.temperature = 0.001

llama3 = pipeline(
  "text-generation",
  model=model,
  tokenizer=tokenizer,
  generation_config=generation_config,
)

### Llama 3 completion

In [None]:
def get_completion(messages):
    prompt = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )

    outputs = llama3(prompt)
    return outputs

def get_assistant_response(output):
    gen_text = output[0]['generated_text']
    assistant_text = gen_text.rsplit('<|eot_id|>', 1)[1]
    response = assistant_text.replace('assistant\n\n', "", 1)

    return response

In [None]:
def llama3_completion(tickers, data_dir, start_date, end_date, max_weeks=3):

    for ticker in tqdm(tickers):

        print("Processing ticker:", ticker)

        csv_file = f'{data_dir}/{ticker}_{start_date}_{end_date}_llama3.csv'

        if not os.path.exists(csv_file):
            initialize_csv(csv_file)
            pre_done = 0
        else:
            df = pd.read_csv(csv_file)
            pre_done = len(df)

        prompts = create_prompts(ticker, data_dir, start_date, end_date, max_weeks)
        system_prompt = "You are a seasoned stock market analyst. " \
                        "Your task is to list the positive developments and potential " \
                        "concerns for companies based on relevant news and basic financials from the past weeks, " \
                        "then provide an analysis and prediction for the companies' stock price movement for the upcoming week. " \
                        "Your answer format should be as follows: " \
                        "\n\n[Positive Developments]:\n1. ..." \
                        "\n\n[Potential Concerns]:\n1. ..." \
                        "\n\n[Prediction & Analysis]:\n...\n"

        for i, prompt in enumerate(prompts):
            if i < pre_done:
                continue

            completion = get_completion(
                  messages=[
                      {"role": "system", "content": system_prompt},
                      {"role": "user", "content": prompt}
                    ]
            )

            answer = get_assistant_response(completion)
            append_to_csv(csv_file, prompt, answer)

## Prepare data for training

In [None]:
TICKERS = [
    "AXP", "AMGN", "AAPL", "BA", "CAT", "CSCO", "CVX", "GS", "HD", "HON",
    "IBM", "INTC", "JNJ", "KO", "JPM", "MCD", "MMM", "MRK", "MSFT", "NKE",
    "PG", "TRV", "UNH", "CRM", "VZ", "V", "WBA", "WMT", "DIS", "DOW"

    ## With my account there is access only to the US tickers

    # "ADS.DE", "ADYEN.AS", "AD.AS", "AI.PA", "AIR.PA", "ALV.DE",
    # "ABI.BR", "ASML.AS", "CS.PA", "BAS.DE", "BAYN.DE", "BBVA.MC",
    # "SAN.MC", "BMW.DE", "BNP.PA", "BN.PA", "DAI.DE", "DPW.DE", "DTE.DE",
    # "ENEL.MI", "ENGI.PA", "EL.PA", "FRE.DE", "IBE.MC", "ITX.MC", "IFX.DE",
    # "INGA.AS", "ISP.MI", "KER.PA", "AD.AS", "PHIA.AS", "OR.PA", "LIN.DE",
    # "MC.PA", "MUV2.DE", "NOKIA.SE", "ORA.PA", "RI.PA", "SAF.PA", "SAN.PA",
    # "SAP.DE", "SU.PA", "SIE.DE", "GLE.PA", "STM.PA", "TEF.MC", "TTE.PA",
    # "UNA.AS", "DG.PA", "VOW3.DE"
]

In [None]:
START_DATE = "2023-09-01"
END_DATE = "2024-06-01"

DATA_DIR = f"./llama_{START_DATE}_{END_DATE}"
os.makedirs(DATA_DIR, exist_ok=True)

In [None]:
for ticker in TICKERS:
    prep_data_for_ticker(ticker, DATA_DIR, START_DATE, END_DATE)

[*********************100%%**********************]  1 of 1 completed







[*********************100%%**********************]  1 of 1 completed







[*********************100%%**********************]  1 of 1 completed







[*********************100%%**********************]  1 of 1 completed







[*********************100%%**********************]  1 of 1 completed







[*********************100%%**********************]  1 of 1 completed







[*********************100%%**********************]  1 of 1 completed







[*********************100%%**********************]  1 of 1 completed







[*********************100%%**********************]  1 of 1 completed







[*********************100%%**********************]  1 of 1 completed







[*********************100%%**********************]  1 of 1 completed







[*********************100%%**********************]  1 of 1 completed







[*********************100%%**********************]  1 of 1 completed







[***********

In [None]:
llama3_completion(TICKERS, DATA_DIR, START_DATE, END_DATE)

  0%|          | 0/30 [00:00<?, ?it/s]

Processing ticker: AXP







CREATING PROMPTS


  7%|▋         | 2/30 [00:00<00:08,  3.20it/s]

CREATING PROMPTS END







Processing ticker: AMGN







CREATING PROMPTS







CREATING PROMPTS END







Processing ticker: AAPL







CREATING PROMPTS


 13%|█▎        | 4/30 [00:00<00:05,  5.18it/s]

CREATING PROMPTS END







Processing ticker: BA







CREATING PROMPTS







CREATING PROMPTS END







Processing ticker: CAT







CREATING PROMPTS


 20%|██        | 6/30 [00:01<00:03,  6.54it/s]

CREATING PROMPTS END







Processing ticker: CSCO







CREATING PROMPTS







CREATING PROMPTS END







Processing ticker: CVX







CREATING PROMPTS


 27%|██▋       | 8/30 [00:01<00:02,  7.47it/s]

CREATING PROMPTS END







Processing ticker: GS







CREATING PROMPTS







CREATING PROMPTS END







Processing ticker: HD







CREATING PROMPTS


 33%|███▎      | 10/30 [00:01<00:02,  7.44it/s]

CREATING PROMPTS END







Processing ticker: HON







CREATING PROMPTS







CREATING PROMPTS END







Processing ticker: IBM







CREATING PROMPTS


 40%|████      | 12/30 [00:02<00:02,  7.16it/s]

CREATING PROMPTS END







Processing ticker: INTC







CREATING PROMPTS







CREATING PROMPTS END







Processing ticker: JNJ







CREATING PROMPTS


 47%|████▋     | 14/30 [00:02<00:02,  7.56it/s]

CREATING PROMPTS END







Processing ticker: KO







CREATING PROMPTS







CREATING PROMPTS END







Processing ticker: JPM







CREATING PROMPTS


 53%|█████▎    | 16/30 [00:02<00:01,  7.98it/s]

CREATING PROMPTS END







Processing ticker: MCD







CREATING PROMPTS







CREATING PROMPTS END







Processing ticker: MMM







CREATING PROMPTS


 60%|██████    | 18/30 [00:02<00:01,  7.95it/s]

CREATING PROMPTS END







Processing ticker: MRK







CREATING PROMPTS







CREATING PROMPTS END







Processing ticker: MSFT







CREATING PROMPTS


 67%|██████▋   | 20/30 [00:03<00:01,  8.01it/s]

CREATING PROMPTS END







Processing ticker: NKE







CREATING PROMPTS







CREATING PROMPTS END







Processing ticker: PG







CREATING PROMPTS


 73%|███████▎  | 22/30 [00:03<00:00,  8.58it/s]

CREATING PROMPTS END







Processing ticker: TRV







CREATING PROMPTS







CREATING PROMPTS END







Processing ticker: UNH







CREATING PROMPTS


 80%|████████  | 24/30 [00:03<00:00,  8.80it/s]

CREATING PROMPTS END







Processing ticker: CRM







CREATING PROMPTS







CREATING PROMPTS END







Processing ticker: VZ







CREATING PROMPTS


 87%|████████▋ | 26/30 [00:03<00:00,  8.71it/s]

CREATING PROMPTS END







Processing ticker: V







CREATING PROMPTS







CREATING PROMPTS END







Processing ticker: WBA







CREATING PROMPTS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CREATING PROMPTS END







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset







Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


 90%|█████████ | 27/30 [09:19<08:20, 166.93s/it]

COMPLETION FINISHED







EXTRACTING RESPONSE







Processing ticker: WMT







CREATING PROMPTS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CREATING PROMPTS END







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


 93%|█████████▎| 28/30 [36:38<20:17, 608.63s/it]

COMPLETION FINISHED







EXTRACTING RESPONSE







Processing ticker: DIS







CREATING PROMPTS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CREATING PROMPTS END







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


 97%|█████████▋| 29/30 [1:04:19<15:24, 924.34s/it]

COMPLETION FINISHED







EXTRACTING RESPONSE







Processing ticker: DOW







CREATING PROMPTS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


CREATING PROMPTS END







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


COMPLETION FINISHED







EXTRACTING RESPONSE







COMPLETION IN PROCESS


100%|██████████| 30/30 [1:31:59<00:00, 183.97s/it] 

COMPLETION FINISHED







EXTRACTING RESPONSE





## Transform for training

### Methods

In [14]:
def transform2train(tickers, data_dir, tokenizer):
    train_list, test_list = [], []
    train_size = 0.8

    for ticker in tickers:
        data_dict = gen2train(ticker, data_dir, tokenizer)

        dataset = Dataset.from_dict(data_dict)
        train_split = round(train_size * len(dataset))

        train_list.append(dataset.select(range(train_split)))
        test_list.append(dataset.select(range(train_split, len(dataset))))

    train_dataset = datasets.concatenate_datasets(train_list)
    test_dataset = datasets.concatenate_datasets(test_list)

    dataset = datasets.DatasetDict({
      'train': train_dataset,
      'test': test_dataset
    })

    return dataset

### Transform

In [19]:
def gen2train(ticker, data_dir, tokenizer):
    csv_file = f'{data_dir}/{ticker}.csv'
    df = pd.read_csv(csv_file)
    prompts, answers, periods, labels = [], [], [], []

    for i, row in df.iterrows():
        prompt, answer = row['prompt'], row['answer']
        res = re.search(r"Then let's assume your prediction for next week \((.*)\) is ((:?up|down) by .*%).", prompt)
        period, label = res.group(1), res.group(2)
        prompt = re.sub(
            r"Then let's assume your prediction for next week \((.*)\) is (up|down) by ((:?.*)%). Provide a summary analysis to support your prediction. The prediction result need to be inferred from your analysis at the end, and thus not appearing as a foundational factor of your analysis.",
            f"Then make your prediction of the {ticker} stock price movement for next week ({period}). Provide a summary analysis to support your prediction.",
            prompt
        )
        answer = re.sub(
            r"\[Prediction & Analysis\]:\s*",
            f"[Prediction & Analysis]:\nPrediction: {label.capitalize()}\nAnalysis: ",
            answer
        )

        system_prompt = "You are a seasoned stock market analyst. " \
                        "Your task is to list the positive developments and potential " \
                        "concerns for companies based on relevant news and basic financials from the past weeks, " \
                        "then provide an analysis and prediction for the companies' stock price movement for the upcoming week. " \
                        "Your answer format should be as follows: " \
                        "\n\n[Positive Developments]:" \
                        "\n1. ..." \
                        "\n\n[Potential Concerns]:" \
                        "\n1. ..." \
                        "\n\n[Prediction & Analysis]:" \
                        "\nPrediction: ..." \
                        "\nAnalysis: ...\n"

        messages=[
          {"role": "system", "content": system_prompt},
          {"role": "user", "content": prompt}
        ]
        train_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

        prompts.append(train_prompt)
        answers.append(answer)
        periods.append(period)
        labels.append(label)

    return {
        "prompt": prompts,
        "answer": answers,
        "period": periods,
        "label": labels,
    }

In [16]:
TICKERS = [
    "AXP", "AMGN", "AAPL", "BA", "CAT", "CSCO", "CVX", "GS", "HD", "HON",
    "IBM", "INTC", "JNJ", "KO", "JPM", "MCD", "MMM", "MRK", "MSFT", "NKE",
    "PG", "TRV", "UNH", "CRM", "VZ", "V", "WBA", "WMT", "DIS", "DOW"
]

In [24]:
DATA_DIR = "/kaggle/input/llama-2023-09-01-2024-06-01"

In [18]:
finance_llama3_8b = "instruction-pretrain/finance-Llama3-8B"
finance_tokenizer = AutoTokenizer.from_pretrained(finance_llama3_8b)

# model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
# tokenizer = AutoTokenizer.from_pretrained(model_id)

tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [25]:
llama3_dataset = transform2train(TICKERS, DATA_DIR, finance_tokenizer)

No chat template is set for this tokenizer, falling back to a default class-level template. This is very error-prone, because models are often trained with templates different from the class default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which point any code depending on them will stop working. We recommend setting a valid chat template before then to ensure that this model continues working without issues.


In [26]:
llama3_dataset['train'][0]

{'prompt': "<|im_start|>system\nYou are a seasoned stock market analyst. Your task is to list the positive developments and potential concerns for companies based on relevant news and basic financials from the past weeks, then provide an analysis and prediction for the companies' stock price movement for the upcoming week. Your answer format should be as follows: \n\n[Positive Developments]:\n1. ...\n\n[Potential Concerns]:\n1. ...\n\n[Prediction & Analysis]:\nPrediction: ...\nAnalysis: ...\n<|im_end|>\n<|im_start|>user\n[Company Introduction]:\n\nAmerican Express Co is a leading entity in the Financial Services sector. Incorporated and publicly traded since 1977-05-18, the company has established its reputation as one of the key players in the market. \n\nAmerican Express Co operates primarily in the US, trading under the ticker AXP on the NEW YORK STOCK EXCHANGE, INC.. As a dominant force in the Financial Services space, the company continues to innovate and drive progress within the

In [27]:
llama3_dataset.save_to_disk('./fin-prediction-2023-09-01_2024-06-01-llama3')

Saving the dataset (0/1 shards):   0%|          | 0/900 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/240 [00:00<?, ? examples/s]

## Fine-tuning

In [7]:
os.environ["WANDB_API_KEY"] = user_secrets.get_secret("WANDB_API_KEY")
os.environ['WANDB_PROJECT'] = 'llama3-fin-pred'

### Finance Llama 3 8B

In [13]:
finance_llama3_8b = "instruction-pretrain/finance-Llama3-8B"

In [None]:
bnb_config = BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_quant_type="nf4",
#     bnb_4bit_compute_dtype=torch.float16,
#     bnb_4bit_use_double_quant=True,
    load_in_8bit=True,
)
model = AutoModelForCausalLM.from_pretrained(finance_llama3_8b, quantization_config=bnb_config, device_map="auto")

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/7 [00:00<?, ?it/s]

model-00001-of-00007.safetensors:   0%|          | 0.00/4.89G [00:00<?, ?B/s]

model-00002-of-00007.safetensors:   0%|          | 0.00/4.83G [00:00<?, ?B/s]

model-00003-of-00007.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00004-of-00007.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00005-of-00007.safetensors:   0%|          | 0.00/4.83G [00:00<?, ?B/s]

model-00006-of-00007.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00007-of-00007.safetensors:   0%|          | 0.00/2.57G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

In [30]:
tokenizer = AutoTokenizer.from_pretrained(finance_llama3_8b)
# tokenizer.pad_token = tokenizer.eos_token
# tokenizer.padding_side = "right"

tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [11]:
model.hf_device_map

{'model.embed_tokens': 0,
 'model.layers.0': 0,
 'model.layers.1': 0,
 'model.layers.2': 0,
 'model.layers.3': 0,
 'model.layers.4': 0,
 'model.layers.5': 0,
 'model.layers.6': 0,
 'model.layers.7': 0,
 'model.layers.8': 1,
 'model.layers.9': 1,
 'model.layers.10': 1,
 'model.layers.11': 1,
 'model.layers.12': 1,
 'model.layers.13': 1,
 'model.layers.14': 1,
 'model.layers.15': 1,
 'model.layers.16': 1,
 'model.layers.17': 1,
 'model.layers.18': 1,
 'model.layers.19': 1,
 'model.layers.20': 1,
 'model.layers.21': 1,
 'model.layers.22': 1,
 'model.layers.23': 1,
 'model.layers.24': 1,
 'model.layers.25': 1,
 'model.layers.26': 1,
 'model.layers.27': 1,
 'model.layers.28': 1,
 'model.layers.29': 1,
 'model.layers.30': 1,
 'model.layers.31': 1,
 'model.norm': 1,
 'lm_head': 1}

In [25]:
# model = prepare_model_for_kbit_training(model)

### Dataset

In [10]:
def tokenize(tokenizer, feature):
    prompt = feature['prompt']
    answer = feature['answer']

    prompt_ids = tokenizer.encode(prompt, add_special_tokens=True, truncation=True, max_length=4096)
    answer_ids = tokenizer.encode(answer, add_special_tokens=False, truncation=True, max_length=4096)

    input_ids = prompt_ids + answer_ids
    too_big = len(input_ids) >= 8192

    if input_ids[-1] != tokenizer.eos_token_id and not too_big:
        input_ids.append(tokenizer.eos_token_id)

    label_ids = [tokenizer.pad_token_id] * len(prompt_ids) + input_ids[len(prompt_ids):]

    return {
        "input_ids": input_ids,
        "labels": label_ids,
        "is_too_big": too_big
    }

In [11]:
dataset_name = "/kaggle/working/fin-prediction-2023-09-01-2024-06-01-llama3"
dataset = datasets.load_from_disk(dataset_name)

In [12]:
tokenized_dataset = dataset.map(partial(tokenize, tokenizer))
print('original dataset length: ', len(dataset['train']))
tokenized_dataset = tokenized_dataset.filter(lambda x: not x['is_too_big'])
print('filtered dataset length: ', len(dataset['train']))
tokenized_dataset = tokenized_dataset.remove_columns(
    ['prompt', 'answer', 'label', 'period', 'is_too_big']
)

original dataset length:  900

filtered dataset length:  900


In [15]:
tokenized_dataset['train'][0]

{'input_ids': [128000,
  27,
  91,
  318,
  5011,
  91,
  29,
  9125,
  198,
  2675,
  527,
  264,
  52614,
  5708,
  3157,
  18738,
  13,
  4718,
  3465,
  374,
  311,
  1160,
  279,
  6928,
  26006,
  323,
  4754,
  10742,
  369,
  5220,
  3196,
  389,
  9959,
  3754,
  323,
  6913,
  6020,
  82,
  505,
  279,
  3347,
  5672,
  11,
  1243,
  3493,
  459,
  6492,
  323,
  20212,
  369,
  279,
  5220,
  6,
  5708,
  3430,
  7351,
  369,
  279,
  14827,
  2046,
  13,
  4718,
  4320,
  3645,
  1288,
  387,
  439,
  11263,
  25,
  4815,
  58,
  36590,
  8000,
  1392,
  10556,
  16,
  13,
  5585,
  43447,
  354,
  2335,
  52347,
  82,
  10556,
  16,
  13,
  5585,
  43447,
  1171,
  2538,
  612,
  18825,
  10556,
  89379,
  25,
  12515,
  27671,
  25,
  12515,
  27,
  91,
  318,
  6345,
  91,
  397,
  27,
  91,
  318,
  5011,
  91,
  29,
  882,
  198,
  58,
  14831,
  29438,
  69662,
  29518,
  17855,
  3623,
  374,
  264,
  6522,
  5502,
  304,
  279,
  17961,
  8471,
  10706,
  13,
  6779

In [16]:
tokenizer.decode(tokenized_dataset['train'][0]['input_ids'])

"<|begin_of_text|><|im_start|>system\nYou are a seasoned stock market analyst. Your task is to list the positive developments and potential concerns for companies based on relevant news and basic financials from the past weeks, then provide an analysis and prediction for the companies' stock price movement for the upcoming week. Your answer format should be as follows: \n\n[Positive Developments]:\n1....\n\n[Potential Concerns]:\n1....\n\n[Prediction & Analysis]:\nPrediction:...\nAnalysis:...\n<|im_end|>\n<|im_start|>user\n[Company Introduction]:\n\nAmerican Express Co is a leading entity in the Financial Services sector. Incorporated and publicly traded since 1977-05-18, the company has established its reputation as one of the key players in the market. \n\nAmerican Express Co operates primarily in the US, trading under the ticker AXP on the NEW YORK STOCK EXCHANGE, INC.. As a dominant force in the Financial Services space, the company continues to innovate and drive progress within t

### Params

In [13]:
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj'],
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM",
)

model.gradient_checkpointing_enable()
model.enable_input_require_grads()
model.is_parallelizable = True
model.model_parallel = True
model.model.config.use_cache = False

peft_model = get_peft_model(model, lora_config)

In [28]:
peft_model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(128256, 4096)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                  (checkpoint-20): Dropout(p=0.1, inplace=False)
                  (checkpoint-30): Dropout(p=0.1, inplace=False)
                  (checkpoint-40): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=16, bias=False)
                  (checkpoint-20): Linear(in_features=4096, out_features=16, bias=False)
                  (checkpoint-30): Linear(in_features=4096, out_features=16

In [14]:
# current_time = datetime.now().strftime('%Y%m%d-%H-%M')
current_time = "20240813-03-33"

optimizer = AdamW(
    peft_model.parameters(),
    lr=3e-5,
    betas=(0.9, 0.97),
    eps=1e-8,
    weight_decay=0.005
)

training_args = TrainingArguments(
    output_dir=f"./results_{current_time}",
    num_train_epochs=4,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    log_level='info',
    gradient_accumulation_steps=32,
    eval_strategy='steps',
    save_strategy="steps",
    fp16=True,
    logging_steps=1,
    save_total_limit=3,
    save_steps=10,
    eval_steps=10,
    remove_unused_columns=False,
    report_to='wandb',
    run_name="llama3_fin_pred_run",
    max_grad_norm=0.3,
    warmup_ratio=0.03,
)

trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['test'],
    tokenizer=tokenizer,
    data_collator=DataCollatorForSeq2Seq(
      tokenizer, padding=True,
      return_tensors="pt"
    ),
    optimizers=(optimizer, None),  # (optimizer, scheduler)
)

trainer.is_model_parallel = True

peft_model.gradient_checkpointing_enable()
peft_model.enable_input_require_grads()

You have loaded a model on multiple GPUs. `is_model_parallel` attribute will be force-set to `True` to avoid any unexpected behavior such as device placement mismatching.

Using auto half precision backend


### Train

In [15]:
trainer.train("/kaggle/working/results_20240813-03-33/checkpoint-40")

trainer.save_model(f"{training_args.output_dir}/trainer_model")
model.save_pretrained(f"{training_args.output_dir}/pretrained")

Loading model from /kaggle/working/results_20240813-03-33/checkpoint-40.

***** Running training *****

  Num examples = 900

  Num Epochs = 4

  Instantaneous batch size per device = 1

  Total train batch size (w. parallel, distributed & accumulation) = 32

  Gradient Accumulation steps = 32

  Total optimization steps = 112

  Number of trainable parameters = 41,943,040

  Continuing training from checkpoint, will skip to saved global_step

  Continuing training from epoch 1

  Continuing training from global step 40

  Will skip the first 1 epochs then the first 384 batches in the first epoch.

Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)

[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize

[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctr

  


[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Step,Training Loss,Validation Loss
50,0.1357,0.130672
60,0.1153,0.11824
70,0.1107,0.110976
80,0.1082,0.106154
90,0.098,0.103041
100,0.0933,0.101208
110,0.0933,0.100461




***** Running Evaluation *****

  Num examples = 240

  Batch size = 1

Saving model checkpoint to ./results_20240813-03-33/checkpoint-50

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--instruction-pretrain--finance-Llama3-8B/snapshots/b573f2ae317bca8e137321c8d8dca2919dfc37ef/config.json

Model config LlamaConfig {

  "architectures": [

    "LlamaForCausalLM"

  ],

  "attention_bias": false,

  "attention_dropout": 0.0,

  "bos_token_id": 128000,

  "eos_token_id": 128001,

  "hidden_act": "silu",

  "hidden_size": 4096,

  "initializer_range": 0.02,

  "intermediate_size": 14336,

  "max_position_embeddings": 8192,

  "mlp_bias": false,

  "model_type": "llama",

  "num_attention_heads": 32,

  "num_hidden_layers": 32,

  "num_key_value_heads": 8,

  "pretraining_tp": 1,

  "rms_norm_eps": 1e-05,

  "rope_scaling": null,

  "rope_theta": 500000.0,

  "tie_word_embeddings": false,

  "torch_dtype": "float16",

  "transformers_version": "4.

In [20]:
trainer.train()

trainer.save_model(f"{training_args.output_dir}/trainer_model")
model.save_pretrained(f"{training_args.output_dir}/pretrained")

***** Running training *****

  Num examples = 900

  Num Epochs = 4

  Instantaneous batch size per device = 1

  Total train batch size (w. parallel, distributed & accumulation) = 32

  Gradient Accumulation steps = 32

  Total optimization steps = 112

  Number of trainable parameters = 41,943,040

Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"

[34m[1mwandb[0m: Currently logged in as: [33maskador[0m ([33maskador-Kharkiv National University of Radio Electronics[0m). Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss,Validation Loss
10,7.7031,6.98365
20,2.001,1.631318
30,0.4008,0.348252
40,0.163,0.160674




***** Running Evaluation *****

  Num examples = 240

  Batch size = 1

Saving model checkpoint to ./results_20240813-03-33/checkpoint-10

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--instruction-pretrain--finance-Llama3-8B/snapshots/b573f2ae317bca8e137321c8d8dca2919dfc37ef/config.json

Model config LlamaConfig {

  "architectures": [

    "LlamaForCausalLM"

  ],

  "attention_bias": false,

  "attention_dropout": 0.0,

  "bos_token_id": 128000,

  "eos_token_id": 128001,

  "hidden_act": "silu",

  "hidden_size": 4096,

  "initializer_range": 0.02,

  "intermediate_size": 14336,

  "max_position_embeddings": 8192,

  "mlp_bias": false,

  "model_type": "llama",

  "num_attention_heads": 32,

  "num_hidden_layers": 32,

  "num_key_value_heads": 8,

  "pretraining_tp": 1,

  "rms_norm_eps": 1e-05,

  "rope_scaling": null,

  "rope_theta": 500000.0,

  "tie_word_embeddings": false,

  "torch_dtype": "float16",

  "transformers_version": "4.

OutOfMemoryError: CUDA out of memory. Tried to allocate 1.36 GiB. GPU 1 has a total capacty of 14.74 GiB of which 1.28 GiB is free. Process 2392 has 13.46 GiB memory in use. Of the allocated memory 10.13 GiB is allocated by PyTorch, and 3.20 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

## Evaluation

### Dataset

In [10]:
dataset_name = "./fin-prediction-2023-09-01-2024-06-01-llama3"
dataset = datasets.load_from_disk(dataset_name)
eval_dataset = dataset['test'].shuffle(seed=333).select(range(20))

In [11]:
def change_prompt_instruct_to_prcnt(feature):
    prompt = feature['prompt']
    feature['prompt'] = prompt.replace("Prediction: ...", "Prediction: Up|Down by X-Y%")
    return feature

better_prompt_dataset = eval_dataset.map(change_prompt_instruct_to_prcnt)

### Models

In [12]:
finance_llama3_8b = "instruction-pretrain/finance-Llama3-8B"
# finance_peft_adapter_path = "/kaggle/input/fin-prediction-llama3/transformers/peft-adapter/3"
finance_peft_adapter_path = "/kaggle/input/fine-tuned-fin-pred-llama3-model/transformers/peft-adapter/1"
# finance_peft_adapter_path2 = "/kaggle/input/fine-tuned-fin-pred-llama3-model/transformers/peft-adapter/2"
base_llama3_8b = "meta-llama/Meta-Llama-3-8B-Instruct"

In [23]:
finance_tokenizer = AutoTokenizer.from_pretrained(finance_llama3_8b)
base_tokenizer = AutoTokenizer.from_pretrained(base_llama3_8b)

tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


tokenizer_config.json:   0%|          | 0.00/51.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [13]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

### Base

In [13]:
base_model = AutoModelForCausalLM.from_pretrained(base_llama3_8b, quantization_config=bnb_config, device_map='cuda:1', low_cpu_mem_usage=True)
base_model = base_model.eval()

config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/187 [00:00<?, ?B/s]

In [12]:
base_model_pipeline = pipeline(
    "text-generation",
    model=base_model,
    tokenizer=base_tokenizer,
    eos_token_id=base_tokenizer.eos_token_id,
    do_sample=True,
    return_full_text=False
)

### Fine-tuned

In [14]:
finance_model = AutoModelForCausalLM.from_pretrained(
    finance_llama3_8b, 
    return_dict=True, 
    quantization_config=bnb_config, 
    low_cpu_mem_usage=True,
    device_map="cuda:0"
)
PeftModel.from_pretrained(finance_model, finance_peft_adapter_path)
finetuned_model = finance_model.eval()

config.json:   0%|          | 0.00/705 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/7 [00:00<?, ?it/s]

model-00001-of-00007.safetensors:   0%|          | 0.00/4.89G [00:00<?, ?B/s]

model-00002-of-00007.safetensors:   0%|          | 0.00/4.83G [00:00<?, ?B/s]

model-00003-of-00007.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00004-of-00007.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00005-of-00007.safetensors:   0%|          | 0.00/4.83G [00:00<?, ?B/s]

model-00006-of-00007.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00007-of-00007.safetensors:   0%|          | 0.00/2.57G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/177 [00:00<?, ?B/s]

In [None]:
# finance_finetuned_model = PeftModel.from_pretrained(finance_llama3_8b_model, finance_peft_adapter_path)
# finance_finetuned_model = finance_finetuned_model.merge_and_unload()
# finance_finetuned_model = finance_finetuned_model.eval()

In [27]:
finetuned_pipeline = pipeline(
    "text-generation",
    model=finetuned_model,
    tokenizer=finance_tokenizer,
    eos_token_id=finance_tokenizer.eos_token_id,
    do_sample=True,
#     return_full_text=False
)

### Running completions for evaluation

In [15]:
def decode(tokenizer, output):
    return tokenizer.decode(output[0]).split("|>assistant\n", 1)[1].split('<|e')[0]

In [16]:
from tqdm.notebook import tqdm
completions = {'base': [], 'finetuned': []}

for i, row in tqdm(enumerate(better_prompt_dataset)):
    base_inputs = base_tokenizer(
        row['prompt'], return_tensors='pt',
        add_special_tokens=True
    ).to(base_model.device)
    
    base_res = base_model.generate(
        **base_inputs, 
        do_sample=True,
        eos_token_id=base_tokenizer.eos_token_id
    )
    completions['base'].append(decode(base_tokenizer, base_res))
    
    
    finetuned_inputs = finance_tokenizer(
        row['prompt'], return_tensors='pt',
        add_special_tokens=True
    ).to(finetuned_model.device)
    
    finetuned_res = finetuned_model.generate(
        **finetuned_inputs, 
        do_sample=True,
        eos_token_id=finance_tokenizer.eos_token_id
    )
    
    completions['finetuned'].append(decode(finance_tokenizer, finetuned_res))

0it [00:00, ?it/s]

Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for

In [34]:
with open("./completions.json", 'w') as f:
    json.dump(completions, f, ensure_ascii=False, indent=4)

In [30]:
completions = []
with open("/kaggle/working/completions.json", 'r') as f:
    completions = json.load(f)

In [27]:
answers = []
for row in better_prompt_dataset:
    answers.append(row['answer'])

### Evaluation

In [25]:
from collections import defaultdict
from sklearn.metrics import accuracy_score, mean_squared_error

def parse_answer(answer):
    
    match_res = re.search(r"(?:\[|\*\*)?Positive Developments(?:\]:|:\*\*)?\s*(.*?)(?:\n+|\s+)*(?:\[|\*\*)?Potential Concerns(?:\]:|:\*\*)?\s*(.*?)(?:\n+|\s+)*(?:\[|\*\*)?Prediction\s*(?:&|and)\s*Analysis(?:\]:|:\*\*)?\s*(.*)\s*", answer, flags=re.DOTALL)
    if not match_res:
        return None
    
    positive, concerns, predict = match_res.group(1), match_res.group(2), match_res.group(3)
        

    match_predictions = re.match(r'^Prediction:\s*(.*)\s*Analysis:\s*(.*)\s*$', predict, flags=re.DOTALL)
    if not match_predictions:
        match_predictions = re.search(r'((?:increase|decrease|up|down|decline)\s+by\s+(?:\d(?:-\d)?%))', predict, flags=re.DOTALL)
        if not match_predictions:
            return None
        prediction = match_predictions.group(1)
        analysis = prediction
    else:
        prediction, analysis = match_predictions.group(1), match_predictions.group(2)

    
    if re.search(r'up|increase', prediction.lower()):
        prediction_bin = 1
    elif re.search(r'down|decrease|decline', prediction.lower()):
        prediction_bin = -1
    else:
        prediction_bin = 0
        

    match_res = re.search(r'(\d)-(\d)%', prediction)
    if not match_res:
        match_res = re.search(r'(?:more than )?(\d)+?%', prediction)    
        
    prediction_margin = prediction_bin * (int(match_res.group(1)) + 0.5) if match_res else 0.
        
    return {
        "positive_developments": positive,
        "potential_concerns": concerns,
        "prediction": prediction_margin,
        "prediction_binary": prediction_bin,
        "analysis": analysis
    }


def evaluate(completions, answers):
    completions_dict = defaultdict(list)
    answers_dict = defaultdict(list)
    
    for completion, answer in zip(completions, answers):
        completion_dict = parse_answer(completion)
        answer_dict = parse_answer(answer)
        
        if completion_dict and answer_dict:
            print(f"Completion:\t{completion_dict['prediction']}\t| Answer:\t{answer_dict['prediction']}")
            for k in completion_dict.keys():
                completions_dict[k].append(completion_dict[k])
                answers_dict[k].append(answer_dict[k])
                
    if not completions_dict['prediction']:
        return {}
    
    bin_acc = accuracy_score(completions_dict['prediction_binary'], answers_dict['prediction_binary'])
    mse = mean_squared_error(completions_dict['prediction'], answers_dict['prediction'])
    return {
        "valid_count": len(completions_dict['prediction']),
        "bin_acc": bin_acc,
        "mse": mse
    }

In [10]:
basemodel_result = evaluate(completions['base'], answers)
print("Base model evaluation", basemodel_result)

Completion:	5.5	| Answer:	-5.5
Completion:	2.5	| Answer:	1.5
Completion:	5.5	| Answer:	-0.5
Completion:	1.5	| Answer:	-0.5
Completion:	1.5	| Answer:	-1.5
Completion:	2.5	| Answer:	-0.5
Completion:	2.5	| Answer:	-3.5
Completion:	5.5	| Answer:	1.5
Completion:	5.5	| Answer:	1.5
Completion:	2.5	| Answer:	1.5
Completion:	-5.5	| Answer:	-5.5
Completion:	2.5	| Answer:	4.5
Completion:	-5.5	| Answer:	3.5
Completion:	1.5	| Answer:	-3.5
Completion:	1.5	| Answer:	0.5
Completion:	1.5	| Answer:	0.5
Completion:	2.5	| Answer:	-5.5
Completion:	2.5	| Answer:	-0.5
Completion:	2.5	| Answer:	-4.5
Completion:	5.5	| Answer:	-0.5
Base model evaluation {'valid_count': 20, 'bin_acc': 0.4, 'mse': 25.95}


In [33]:
finetuned_result = evaluate(completions['finetuned'], answers)
print("Finetuned model evaluation", finetuned_result)

Completion:	-5.5	| Answer:	-5.5
Completion:	1.5	| Answer:	1.5
Completion:	2.5	| Answer:	-0.5
Completion:	-1.5	| Answer:	-0.5
Completion:	-0.5	| Answer:	-1.5
Completion:	-0.5	| Answer:	-0.5
Completion:	-2.5	| Answer:	-3.5
Completion:	2.5	| Answer:	1.5
Completion:	1.5	| Answer:	1.5
Completion:	2.5	| Answer:	1.5
Completion:	-3.5	| Answer:	-5.5
Completion:	-1.5	| Answer:	4.5
Completion:	-2.5	| Answer:	3.5
Completion:	1.5	| Answer:	-3.5
Completion:	1.5	| Answer:	0.5
Completion:	1.5	| Answer:	0.5
Completion:	1.5	| Answer:	-5.5
Completion:	1.5	| Answer:	-0.5
Completion:	-1.5	| Answer:	-4.5
Completion:	2.5	| Answer:	-0.5
Finetuned model evaluation {'valid_count': 20, 'bin_acc': 0.65, 'mse': 9.4}


In [35]:
completions_base_parsed = [parse_answer(c) for c in completions['base']]
completions_finetuned_parsed = [parse_answer(c) for c in completions['finetuned']]
answers_parsed = [parse_answer(c) for c in answers]

In [36]:
for base, answer in zip(completions_base_parsed, answers_parsed):
    if not base:
        print("No Base")
        continue
    space_base = " " if base['prediction'] > 0 else ""
    space_answer = " " if answer['prediction'] > 0 else ""
    is_eq = answer['prediction'] == base['prediction']
    print(f"Predictions: {space_base}{base['prediction']} {space_answer}{answer['prediction']} | Base == Answer {is_eq}")

Predictions:  5.5 -5.5 | Base == Answer False
Predictions:  2.5  1.5 | Base == Answer False
Predictions:  5.5 -0.5 | Base == Answer False
Predictions:  1.5 -0.5 | Base == Answer False
Predictions:  1.5 -1.5 | Base == Answer False
Predictions:  2.5 -0.5 | Base == Answer False
Predictions:  2.5 -3.5 | Base == Answer False
Predictions:  5.5  1.5 | Base == Answer False
Predictions:  5.5  1.5 | Base == Answer False
Predictions:  2.5  1.5 | Base == Answer False
Predictions: -5.5 -5.5 | Base == Answer True
Predictions:  2.5  4.5 | Base == Answer False
Predictions: -5.5  3.5 | Base == Answer False
Predictions:  1.5 -3.5 | Base == Answer False
Predictions:  1.5  0.5 | Base == Answer False
Predictions:  1.5  0.5 | Base == Answer False
Predictions:  2.5 -5.5 | Base == Answer False
Predictions:  2.5 -0.5 | Base == Answer False
Predictions:  2.5 -4.5 | Base == Answer False
Predictions:  5.5 -0.5 | Base == Answer False


In [37]:
for finetuned, answer in zip(completions_finetuned_parsed, answers_parsed):
    if not finetuned:
        print("No Finetuned")
        continue
    space_answer = " " if answer['prediction'] > 0 else ""
    space_finetuned = " " if finetuned['prediction'] > 0 else ""
    is_eq = finetuned['prediction'] == answer['prediction']
    print(f"Predictions: {space_answer}{answer['prediction']} {space_finetuned}{finetuned['prediction']} | Finetuned == Answer {is_eq}")

Predictions: -5.5 -5.5 | Finetuned == Answer True
Predictions:  1.5  1.5 | Finetuned == Answer True
Predictions: -0.5  2.5 | Finetuned == Answer False
Predictions: -0.5 -1.5 | Finetuned == Answer False
Predictions: -1.5 -0.5 | Finetuned == Answer False
Predictions: -0.5 -0.5 | Finetuned == Answer True
Predictions: -3.5 -2.5 | Finetuned == Answer False
Predictions:  1.5  2.5 | Finetuned == Answer False
Predictions:  1.5  1.5 | Finetuned == Answer True
Predictions:  1.5  2.5 | Finetuned == Answer False
Predictions: -5.5 -3.5 | Finetuned == Answer False
Predictions:  4.5 -1.5 | Finetuned == Answer False
Predictions:  3.5 -2.5 | Finetuned == Answer False
Predictions: -3.5  1.5 | Finetuned == Answer False
Predictions:  0.5  1.5 | Finetuned == Answer False
Predictions:  0.5  1.5 | Finetuned == Answer False
Predictions: -5.5  1.5 | Finetuned == Answer False
Predictions: -0.5  1.5 | Finetuned == Answer False
Predictions: -4.5 -1.5 | Finetuned == Answer False
Predictions: -0.5  2.5 | Finetuned 

In [38]:
for base, finetuned in zip(completions_base_parsed, completions_finetuned_parsed):
    if not base:
        print("No Base")
        continue
    if not finetuned:
        print("No Finetuned")
        continue
    space_base = " " if base['prediction'] > 0 else ""
    space_finetuned = " " if finetuned['prediction'] > 0 else ""
    is_eq = finetuned['prediction'] == base['prediction']
    print(f"Predictions: {space_base}{base['prediction']} {space_finetuned}{finetuned['prediction']} | Base == Finetuned {is_eq}")

Predictions:  5.5 -5.5 | Base == Finetuned False
Predictions:  2.5  1.5 | Base == Finetuned False
Predictions:  5.5  2.5 | Base == Finetuned False
Predictions:  1.5 -1.5 | Base == Finetuned False
Predictions:  1.5 -0.5 | Base == Finetuned False
Predictions:  2.5 -0.5 | Base == Finetuned False
Predictions:  2.5 -2.5 | Base == Finetuned False
Predictions:  5.5  2.5 | Base == Finetuned False
Predictions:  5.5  1.5 | Base == Finetuned False
Predictions:  2.5  2.5 | Base == Finetuned True
Predictions: -5.5 -3.5 | Base == Finetuned False
Predictions:  2.5 -1.5 | Base == Finetuned False
Predictions: -5.5 -2.5 | Base == Finetuned False
Predictions:  1.5  1.5 | Base == Finetuned True
Predictions:  1.5  1.5 | Base == Finetuned True
Predictions:  1.5  1.5 | Base == Finetuned True
Predictions:  2.5  1.5 | Base == Finetuned False
Predictions:  2.5  1.5 | Base == Finetuned False
Predictions:  2.5 -1.5 | Base == Finetuned False
Predictions:  5.5  2.5 | Base == Finetuned False
