# Implementing "FX sentiment analysis with large language models" (Ballinari et al.)
This paper can be found at 

## Imports

In [1]:
import numpy as np
import pandas as pd
import torch
import re
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments, EarlyStoppingCallback
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from sklearn.model_selection import train_test_split
from dotenv import load_dotenv
import os
from huggingface_hub import login
from trl import SFTTrainer
from datasets import Dataset
from sklearn.metrics import accuracy_score, f1_score, precision_recall_fscore_support, classification_report
# import bitsandbytes as bnb
# import accelerate
# import trl

  from .autonotebook import tqdm as notebook_tqdm


## 1. Dataset Preprocessing

### 1.1. Filtering
- Load the datasets
- Drop articles with <20 words
- Remove duplicate articles 
- Convert time to datetime

In [None]:
# Loading the dailyfx news articles dataset (Title,Author,Date,Full Text,URL)
df_news = pd.read_csv('datasets/news_articles/dailyfx_articles_012011-062024.csv')
df_news = df_news[['Title', 'Date', 'Full Text', 'URL']]

# Load the fxstreet news articles dataset into same df (Title,Date,Full Text,URL)
temp = pd.read_csv('datasets/news_articles/fxstreet_articles_062024-072024.csv')
temp = temp[['Title', 'Date', 'Full Text', 'URL']]
df_news = pd.concat([df_news, temp], ignore_index=True)

# Load investing.com news articles dataset into same df (Title,Full Text,URL,Date,Author)
temp = pd.read_csv('datasets/news_articles/investing_articles_062024-072024.csv')
temp = temp[['Title', 'Date', 'Full Text', 'URL']]
df_news = pd.concat([df_news, temp], ignore_index=True)

In [None]:
# Drop articles with <20 words
df_news = df_news[df_news['Full Text'].str.split().str.len() >= 20]

# Remove duplicate articles
df_news = df_news.drop_duplicates(subset=['Full Text'])
df_news = df_news.drop_duplicates(subset=['Title'])
df_news = df_news.drop_duplicates(subset=['URL'])

# Convert time to datetime
df_news['Date'] = pd.to_datetime(df_news['Date'], utc=True)

# Only keep articles before 2020
df_news = df_news[df_news['Date'] < pd.to_datetime('2020-01-01', utc=True)]

# Truncate articles to max 32,767 characters
df_news['Full Text'] = df_news['Full Text'].str[:32767]

# Randomly sample 30000 articles
df_news = df_news.sample(n=30000, random_state=42)

In [None]:
# Convert to New York timezone
df_news['Date'] = df_news['Date'].dt.tz_convert('America/New_York')

# Remove time info - paper says use date
df_news['Date'] = df_news['Date'].dt.normalize()

# Sort by date
df_news = df_news.sort_values(by='Date')

### 1.2. Creating mentioned_currency column
- Use regex to capture all the currencies used in an article
- Make use of common synomyms
- Filter articles that don't mention any of the G10 currencies 

In [3]:
# Dictionary mapping ISO codes to the regex patterns (synonyms) from Figure A.1
currency_synonyms = {
    "EUR": [r"EUR", r"Euro"],
    "USD": [r"USD", r"Dollar", r"Dollars", r"US Dollar", r"US-Dollar", r"U\.S\. Dollar", 
            r"US Dollars", r"US-Dollars", r"U\.S\. Dollars", r"Greenback"],
    "JPY": [r"JPY", r"Yen", r"Japanese Yen"],
    "GBP": [r"GBP", r"Pound", r"Pounds", r"Sterling", r"British Pound", r"British Pounds"],
    "AUD": [r"AUD", r"Australian Dollar", r"Australian Dollars", r"Aussie"],
    "CAD": [r"CAD", r"Canadian Dollar", r"Canadian Dollars"],
    "CHF": [r"CHF", r"Swiss Franc", r"Swiss Francs", r"Swissie"],
    "NZD": [r"NZD", r"New Zealand Dollar", r"New Zealand Dollars", r"Kiwi"],
    "NOK": [r"NOK", r"Norwegian Krone", r"Norwegian Kroner"],
    "SEK": [r"SEK", r"Swedish Krona", r"Swedish Kronor"]
}

# Get list of mentioned currencies from text
def get_mentioned_currencies(text):
    mentioned_currencies = list()

    for currency, patterns in currency_synonyms.items():
        for pattern in patterns:
            if re.search(pattern, text, re.IGNORECASE):
                mentioned_currencies.append(currency)
                break

    return mentioned_currencies

df_news['mentioned_currencies'] = df_news['Full Text'].apply(get_mentioned_currencies)

# Filter articles to keep only those where 'mentioned_currencies' is non empty
df_news = df_news[df_news['mentioned_currencies'].apply(lambda x: len(x) > 0)]

# So that it is incrimenting by 1 properly due to dropped values from before
df_news = df_news.reset_index(drop=True)

### 1.3. Getting historical prices
Using nominal narrow effective exchange rate (daily) for each country.

Narrow effective exchange rate is a good proxy for the tradable currency index that the authors used.

Allows us to put a number to the currency rather than using a pair as then the currency can be effected by the other in the pair. 

It is done by taking the geometric mean from the exchange rate of various other currencies (narrow means only a small number of industrialised countries so that the average isn't skewed by some other non industrialised country going down).

In [4]:
# All links to get data from for effective exchage rate
urls = {
    "USD": "https://stats.bis.org/api/v2/data/dataflow/BIS/WS_EER/1.0/D.N.N.US?startPeriod=2011-01-01&endPeriod=2024-06-01&format=csv",
    "EUR": "https://stats.bis.org/api/v2/data/dataflow/BIS/WS_EER/1.0/D.N.N.XM?startPeriod=2011-01-01&endPeriod=2024-06-01&format=csv",
    "JPY": "https://stats.bis.org/api/v2/data/dataflow/BIS/WS_EER/1.0/D.N.N.JP?startPeriod=2011-01-01&endPeriod=2024-06-01&format=csv",
    "GBP": "https://stats.bis.org/api/v2/data/dataflow/BIS/WS_EER/1.0/D.N.N.GB?startPeriod=2011-01-01&endPeriod=2024-06-01&format=csv",
    "CAD": "https://stats.bis.org/api/v2/data/dataflow/BIS/WS_EER/1.0/D.N.N.CA?startPeriod=2011-01-01&endPeriod=2024-06-01&format=csv",
    "AUD": "https://stats.bis.org/api/v2/data/dataflow/BIS/WS_EER/1.0/D.N.N.AU?startPeriod=2011-01-01&endPeriod=2024-06-01&format=csv",
    "CHF": "https://stats.bis.org/api/v2/data/dataflow/BIS/WS_EER/1.0/D.N.N.CH?startPeriod=2011-01-01&endPeriod=2024-06-01&format=csv", 
    "SEK": "https://stats.bis.org/api/v2/data/dataflow/BIS/WS_EER/1.0/D.N.N.SE?startPeriod=2011-01-01&endPeriod=2024-06-01&format=csv",
    "NOK": "https://stats.bis.org/api/v2/data/dataflow/BIS/WS_EER/1.0/D.N.N.NO?startPeriod=2011-01-01&endPeriod=2024-06-01&format=csv",
    "NZD": "https://stats.bis.org/api/v2/data/dataflow/BIS/WS_EER/1.0/D.N.N.NZ?startPeriod=2011-01-01&endPeriod=2024-06-01&format=csv"
}

# Initialise an empty DataFrame (EER = effective exchange rate)
df_EER = pd.DataFrame()

for code, url in urls.items():
    # Read only the required columns from the CSV
    df_temp = pd.read_csv(url, usecols=lambda c: c in ["TIME_PERIOD", "OBS_VALUE"])
    
    # Convert OBS_VALUE to float for log calculations later
    df_temp["OBS_VALUE"] = pd.to_numeric(df_temp["OBS_VALUE"], errors="coerce")
    
    # Rename "OBS_VALUE" to currency code
    df_temp = df_temp.rename(columns={
        "OBS_VALUE": code,
        "TIME_PERIOD": "date"
    })
    
    # If the main df is empty, set it to this df
    if df_EER.empty:
        df_EER = df_temp
    else:
        # Join on "date", keep all records (outer join)
        df_EER = pd.merge(df_EER, df_temp, on=["date"], how='outer')


df_EER['date'] = pd.to_datetime(df_EER['date'])
df_EER = df_EER.set_index('date')

# drop all NaNs in the data
df_EER.dropna(inplace=True)

df_EER.head()

Unnamed: 0_level_0,USD,EUR,JPY,GBP,CAD,AUD,CHF,SEK,NOK,NZD
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2011-01-03,82.07,103.41,120.99,105.23,129.89,130.38,87.3,116.49,136.0,98.33
2011-01-04,82.05,103.98,119.81,105.84,129.59,128.97,86.05,116.47,135.74,97.42
2011-01-05,82.77,102.87,120.35,106.7,128.98,128.61,86.01,116.37,135.51,97.75
2011-01-06,83.09,102.38,119.32,106.95,130.06,128.83,85.4,116.25,135.96,97.37
2011-01-07,83.46,101.62,119.31,107.42,130.42,128.88,86.29,115.74,136.06,97.72


### 1.4 Calculate log returns

In [5]:
# Calculate daily log returns
df_log_returns = np.log(df_EER / df_EER.shift(1))

df_log_returns.dropna(inplace=True)  # created in the shifting

### 1.5 Calculate cumulative 5 day windows

In [6]:
# Future returns
# At index t, we want the sum of t+1, t+2, t+3, t+4, t+5 returns
df_future_returns = df_log_returns.rolling(window=5, min_periods=5).sum().shift(-5)
df_future_returns.dropna(inplace=True)

# Past returns
# At index t, we want the sum of t-1, t-2, t-3, t-4, t-5 returns
df_past_returns = df_log_returns.rolling(window=5, min_periods=5).sum().shift(1)
df_past_returns.dropna(inplace=True)


# Merge future and past returns DataFrames into df_log_returns, aligning on date index.
df_log_returns = df_log_returns.join(df_future_returns.add_suffix('_future'), how='inner')
df_log_returns = df_log_returns.join(df_past_returns.add_suffix('_past'), how='inner')

### 1.6 Get sentiment labels

Based of future returns:

For each timestep:
- Top 3 (30%) -> "appreciation"
- Middle 4 (40%) -> "unchanged"
- Bottom 3 (30%) -> "depreciation"

In [7]:
# Get list of currency codes (G10 currencies)
currency_codes = ['USD', 'EUR', 'JPY', 'GBP', 'CAD', 'AUD', 'CHF', 'SEK', 'NOK', 'NZD']

# Initialize label columns for each currency
for currency in currency_codes:
    df_log_returns[f'{currency}_label'] = None

# For each date (row), rank currencies by their future returns and assign labels
for date in df_log_returns.index:
    # Get future returns for this date
    future_returns = {}
    for currency in currency_codes:
        value = df_log_returns.loc[date, f'{currency}_future']
        if pd.notna(value):
            future_returns[currency] = value

    # Get past returns for this date
    past_returns = {}
    for currency in currency_codes:
        value = df_log_returns.loc[date, f'{currency}_past']
        if pd.notna(value):
            past_returns[currency] = value
    
    # Rank currencies by future returns (highest to lowest)
    sorted_currencies_future = sorted(future_returns.items(), key=lambda x: x[1], reverse=True)

    # Rank currencies by past returns (highest to lowest)
    sorted_currencies_past = sorted(past_returns.items(), key=lambda x: x[1], reverse=True)
    
    # Assign labels based on ranking
    # Top 3 (30%) -> "appreciation"
    # Middle 4 (40%) -> "unchanged"
    # Bottom 3 (30%) -> "depreciation"
    for i, (currency, _) in enumerate(sorted_currencies_future):
        if i < 3:  # Top 3 (0, 1, 2)
            df_log_returns.loc[date, f'{currency}_future_label'] = 'appreciation'
        elif i >= 7:  # Bottom 3 (7, 8, 9)
            df_log_returns.loc[date, f'{currency}_future_label'] = 'depreciation'
        else:  # Middle 4 (3, 4, 5, 6)
            df_log_returns.loc[date, f'{currency}_future_label'] = 'unchanged'
    
    for i, (currency, _) in enumerate(sorted_currencies_past):
        if i < 3:  # Top 3 (0, 1, 2)
            df_log_returns.loc[date, f'{currency}_past_label'] = 'depreciation'
        elif i >= 7:  # Bottom 3 (7, 8, 9)
            df_log_returns.loc[date, f'{currency}_past_label'] = 'appreciation'
        else:  # Middle 4 (3, 4, 5, 6)
            df_log_returns.loc[date, f'{currency}_past_label'] = 'unchanged'

# Only keep labels
df_labels = df_log_returns[
    [f'{currency}_future_label' for currency in currency_codes] + 
    [f'{currency}_past_label' for currency in currency_codes]
]

# Convert to New York timezone
df_labels.index = df_labels.index.tz_localize('UTC').tz_convert('America/New_York')

# Get rid of time info
df_labels.index = df_labels.index.normalize()

# Have date as a column
df_labels = df_labels.reset_index()

df_labels.columns

Index(['date', 'USD_future_label', 'EUR_future_label', 'JPY_future_label',
       'GBP_future_label', 'CAD_future_label', 'AUD_future_label',
       'CHF_future_label', 'SEK_future_label', 'NOK_future_label',
       'NZD_future_label', 'USD_past_label', 'EUR_past_label',
       'JPY_past_label', 'GBP_past_label', 'CAD_past_label', 'AUD_past_label',
       'CHF_past_label', 'SEK_past_label', 'NOK_past_label', 'NZD_past_label'],
      dtype='object')

### 1.7 Assign labels to news articles
- df_labels - labels for sentiment for each valid day
- df_news_future and past - mapping between all real dates and valid dates based on the way we backfill
- df_news - now will contain these new sentiment labels joined using the dates from the news_future/past

In [8]:
# maps news date to future trading date
df_news_future = pd.merge_asof(
    df_news[['Date']],
    df_labels[['date']],
    left_on='Date',
    right_on='date',
    direction='forward'
).rename(columns={'Date': 'news_date', 'date': 'trading_date_future'})


# maps news date to past trading date
df_news_past = pd.merge_asof(
    df_news[['Date']],
    df_labels[['date']],
    left_on='Date',
    right_on='date',
    direction='backward'
).rename(columns={'Date': 'news_date', 'date': 'trading_date_past'})

# Perform a concat of df_news and df_news_future and df_news_past
# Example: Only concat 'news_date' from df_news_future, and 'trading_date_past' from df_news_past
df_news = pd.concat([
    df_news,
    df_news_future[['trading_date_future']],
    df_news_past[['trading_date_past']]
], axis=1)


# Merge the future labels into the news dataframe
future_cols = ['date'] + [col for col in df_labels.columns if col.endswith('_future_label')]
df_labels_future = df_labels[future_cols]

df_news = df_news.merge(df_labels_future, left_on='trading_date_future', right_on='date', how='left')
df_news = df_news.drop(columns=['date'])  # Drop the date column as we don't need it

# Merge the past labels into the news dataframe
past_cols = ['date'] + [col for col in df_labels.columns if col.endswith('_past_label')]
df_labels_past = df_labels[past_cols]

df_news = df_news.merge(df_labels_past, left_on='trading_date_past', right_on='date', how='left')
df_news = df_news.drop(columns=['date'])  # Drop the date column as we don't need it


### 1.8 Cleaning final DataFrame
- drop rows with nulls
- Removing unnecessary columns

In [9]:
# Only keep title, full text and all labels
df_news = df_news[['Title', 'Full Text', 'mentioned_currencies', *future_cols[1:], *past_cols[1:]]]

df_news = df_news.dropna()

df_news.to_pickle("df_news.pkl")

df_news

Unnamed: 0,Title,Full Text,mentioned_currencies,USD_future_label,EUR_future_label,JPY_future_label,GBP_future_label,CAD_future_label,AUD_future_label,CHF_future_label,...,USD_past_label,EUR_past_label,JPY_past_label,GBP_past_label,CAD_past_label,AUD_past_label,CHF_past_label,SEK_past_label,NOK_past_label,NZD_past_label
3,EUR/CHF Trades Near All-Time Low,Sha...,"[EUR, CHF]",depreciation,appreciation,depreciation,appreciation,unchanged,unchanged,depreciation,...,depreciation,appreciation,unchanged,depreciation,depreciation,appreciation,appreciation,unchanged,unchanged,unchanged
4,Higher Probability Trading,Sha...,"[USD, CHF]",depreciation,appreciation,depreciation,appreciation,unchanged,unchanged,depreciation,...,depreciation,appreciation,unchanged,depreciation,depreciation,appreciation,appreciation,unchanged,unchanged,unchanged
5,Higher Probability Entries,Sha...,"[USD, CAD]",depreciation,appreciation,depreciation,appreciation,unchanged,unchanged,depreciation,...,depreciation,appreciation,unchanged,depreciation,depreciation,appreciation,appreciation,unchanged,unchanged,unchanged
6,EUR/AUD Bounces Up Off of the Lows,Sha...,"[EUR, AUD]",depreciation,appreciation,appreciation,unchanged,depreciation,unchanged,depreciation,...,depreciation,appreciation,unchanged,depreciation,depreciation,appreciation,appreciation,unchanged,unchanged,unchanged
7,EUR/CHF Finds Support,Sha...,"[EUR, CHF]",unchanged,appreciation,appreciation,unchanged,depreciation,depreciation,appreciation,...,depreciation,appreciation,appreciation,depreciation,depreciation,unchanged,appreciation,unchanged,unchanged,unchanged
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47622,Japanese Yen Sentiment Analysis & Outlook – US...,Sha...,"[EUR, USD, JPY, GBP]",depreciation,depreciation,unchanged,depreciation,unchanged,unchanged,appreciation,...,unchanged,unchanged,appreciation,depreciation,appreciation,unchanged,appreciation,unchanged,depreciation,depreciation
47623,"US Dollar Technical Outlook: EUR/USD, GBP/USD,...",Sha...,"[EUR, USD, GBP, NZD]",depreciation,depreciation,unchanged,depreciation,unchanged,unchanged,appreciation,...,unchanged,unchanged,appreciation,depreciation,appreciation,unchanged,appreciation,unchanged,depreciation,depreciation
47624,"Gold Prices Sink, Support Breakdown Heralds Mo...",Sha...,"[EUR, USD, JPY, GBP]",depreciation,depreciation,unchanged,depreciation,unchanged,unchanged,appreciation,...,unchanged,unchanged,appreciation,depreciation,appreciation,unchanged,appreciation,unchanged,depreciation,depreciation
47625,Gold Price Outlook: XAU/USD Turns on Hawkish F...,Sha...,[USD],depreciation,depreciation,unchanged,depreciation,unchanged,unchanged,appreciation,...,unchanged,unchanged,appreciation,depreciation,appreciation,unchanged,appreciation,unchanged,depreciation,depreciation


### 1.9 Train Test Split
- 200 examples for final eval
- Otherwise 80/20 train test split

In [None]:
df_news = pd.read_pickle("df_news.pkl")

df_news_before_2020 = df_news[df_news['Date'] < pd.to_datetime('2020-01-01', utc=True)]     # we train the model on this for now
# df_news_after_2020 = df_news[df_news['Date'] >= pd.to_datetime('2020-01-01', utc=True)]   # we use this for the trading strategy

df_rest, df_eval = train_test_split(df_news_before_2020, test_size=200, random_state=42)
df_train, df_test = train_test_split(df_rest, test_size=0.2, random_state=42)



print("Size of train set: ", len(df_train))
print("Size of test set: ", len(df_test))
print("Size of eval set: ", len(df_eval))

Size of train set:  2
Size of test set:  2
Size of eval set:  200


## 2 Generate Prompt

In [11]:
def generate_prompt(row):
    title = row.get('Title', '')
    text = row.get('Full Text', '')
    currencies = row.get('mentioned_currencies')

    target_currencies = ''
    for c in currencies:
        target_currencies += f'{c}_past: "appreciation, depreciation, or unchanged",\n'
        target_currencies += f'{c}_future: "appreciation, depreciation, or unchanged",\n'
    target_currencies = target_currencies.strip().rstrip(",") # Remove last comma

    # Same structure as per paper
    return (
        f"Title: {title}\n"
        f"Text: {text}\n\n"
        "Instructions:\n"
        "Objective: For each mentioned currency, answer the following questions:\n"
        "- What has been the current/past movement of the currency (appreciation, depreciation, or unchanged)?\n"
        "- What is the future expectation for the currency (appreciation, depreciation, or unchanged)?\n\n"
        "You must answer these two questions for each of the following currencies mentioned in the article:\n"
        f"{target_currencies}\n\n"
        "Output Format:\n"
        "- Important: Provide your answer in separate rows for each currency as shown above.\n"
        "- Do not combine multiple currencies in the same row.\n"
        '- Each currency should have its own line with "_past" or "_future" specified.\n\n'
        "Example:\n"
        '- If the article states, "The EUR is expected to appreciate," the output should be:\n'
        '    EUR_past: "unchanged",\n'
        '    EUR_future: "appreciation"\n'
        '- If the article states, "EUR/USD depreciated last week," the output should be:\n'
        '    EUR_past: "depreciation",\n'
        '    USD_past: "appreciation"\n'
        '- If only future movements are mentioned for a currency, the past movement should be labelled as "unchanged" and vice versa.\n\n'
        "Currency Pair Interpretation:\n"
        "- If currencies are discussed in pairs, interpret as follows:\n"
        '    - If "EUR/USD appreciated," label EUR_past as "appreciation" and USD_past as "depreciation".\n'
        '    - If "EUR/USD depreciated," label EUR_past as "depreciation" and USD_past as "appreciation".\n\n'
        "Synonyms:\n"
        "- Recognize the following synonyms for each currency:\n"
        "- **EUR**: EUR, Euro\n"
        "- **USD**: USD, Dollar, Dollars, US Dollar, US-Dollar, U.S. Dollar, US Dollars, US-Dollars, U.S. Dollars, Greenback\n"
        "- **JPY**: JPY, Yen, Japanese Yen\n"
        "- **GBP**: GBP, Pound, Pounds, Sterling, British Pound, British Pounds\n"
        "- **AUD**: AUD, Australian Dollar, Australian Dollars, Aussie\n"
        "- **CAD**: CAD, Canadian Dollar, Canadian Dollars\n"
        "- **CHF**: CHF, Swiss Franc, Swiss Francs, Swissie\n"
        "- **NZD**: NZD, New Zealand Dollar, New Zealand Dollars, Kiwi\n"
        "- **NOK**: NOK, Norwegian Krone, Norwegian Kroner\n"
        "- **SEK**: SEK, Swedish Krona, Swedish Kronor\n"
        "Answer below in the given format:"
    )

## 3 Model setup

In [12]:
load_dotenv()
login(token=os.getenv("HF_TOKEN"))

# 'meta-llama/Llama-3.1-8B-Instruct' - real
# "meta-llama/Llama-3.2-1B-Instruct" - for local testing as smallest possible model
model_id =  "meta-llama/Llama-3.2-1B-Instruct"

# quntisation config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,      # "Double Quantization"
    bnb_4bit_quant_type="nf4",           # 4-bit NormalFloat data type
    bnb_4bit_compute_dtype=torch.bfloat16 # Compute in bfloat16 for stability
)

# load tokeniser
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.model_max_length = 8192
tokenizer.pad_token = tokenizer.eos_token # Llama has no default pad token
tokenizer.padding_side = "right"  # TODO Check this

# load model 
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto", # Automatically puts model on GPU
    dtype=torch.bfloat16
)

# Move model to GPU if available
if torch.cuda.is_available():
    device = torch.device("cuda:0")
    model = model.to(device)
    print(f"Model explicitly loaded onto: {device}")
else:
    device = torch.device("cpu")
    model = model.to(device)
    print("CUDA not available. Model loaded onto CPU.")

model.config.use_cache = False
model.config.pretraining_tp = 1

# Prepare for training 
model = prepare_model_for_kbit_training(model)

# LoRA config 
# Params from Table A.1
peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",           # TODO Check this
    task_type="CAUSAL_LM", # TODO Check this
    
    # inject low-rank adaptation matrices into all linear layers TODO check this
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj", # Attention projections
        "gate_proj", "up_proj", "down_proj"     # MLP projections
    ]
)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


Model explicitly loaded onto: cuda:0


## 4 LLM Fine Tuning
- Stopping criterion is used
    - Optimisises for least loss in the validation stage rather than most traning epochs
    - So if the model with best validation loss is in epoch 1 or 2, then the weights in epoch 3 will be discarded
    - Used to prevent overfitting due to this being a small dataset
    - Stops traning if the validation loss stagnates due to overfitting

In [13]:
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=1,      # TODO Check this
    per_device_eval_batch_size=1,       # TODO Check this
    gradient_accumulation_steps=32, 
    optim="paged_adamw_32bit",          # 
    save_steps=50,                      # TODO get better number
    learning_rate=1e-5,                 #  Note: significantly lower than standard
    weight_decay=0.1,                   #  High weight decay
    fp16=False,
    bf16=True,
    max_grad_norm=0.3,                  # TODO apparenly this is the best for lora??? - not said in the paper
    warmup_ratio=0.0,                   # 
    lr_scheduler_type="cosine",         #                  
    save_strategy="steps",              # for early stopping   (could be epoch)
    eval_strategy="steps",              # for early stopping   (could be epoch)
    load_best_model_at_end=True,         # for early stopping
    metric_for_best_model="eval_loss",   # for early stopping
    greater_is_better=False,     # less loss is better
    logging_steps=10,                   # TODO get a better number
    group_by_length=True,
    report_to="none"                    # Disable wandb unless needed
)


df_train = Dataset.from_pandas(df_train)
df_test = Dataset.from_pandas(df_test)

trainer = SFTTrainer(
    model=model,
    train_dataset=df_train, # Ensure this is loaded
    eval_dataset=df_test,
    peft_config=peft_config,
    formatting_func=generate_prompt,
    processing_class=tokenizer,
    args=training_args,
    # packing=False,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=1)] # to stop after epoch 1 if validaiton loss gets worse
)


trainer.train()

trainer.model.save_pretrained('llama_small_finetuned')
print("Model saved.")

Applying formatting function to train dataset: 100%|██████████| 2/2 [00:00<00:00, 26.50 examples/s]
Adding EOS to train dataset: 100%|██████████| 2/2 [00:00<00:00, 119.03 examples/s]
Tokenizing train dataset: 100%|██████████| 2/2 [00:00<00:00, 11.37 examples/s]
Truncating train dataset: 100%|██████████| 2/2 [00:00<00:00, 92.35 examples/s]
Applying formatting function to eval dataset: 100%|██████████| 2/2 [00:00<00:00, 130.05 examples/s]
Adding EOS to eval dataset: 100%|██████████| 2/2 [00:00<00:00, 155.58 examples/s]
Tokenizing eval dataset: 100%|██████████| 2/2 [00:00<00:00, 58.27 examples/s]
Truncating eval dataset: 100%|██████████| 2/2 [00:00<00:00, 243.13 examples/s]
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 128009, 'pad_token_id': 128009}.
  return fn(*args, **kwargs)


Step,Training Loss,Validation Loss


Model saved.


## 5 Evaluate

### 5.1 Predict sentiment
- Gets the sentiment for a single article
- Used for evaulation

In [13]:
def get_sentiment(row):
    prompt = generate_prompt(row)

    inputs = tokenizer(
        prompt,
        return_tensors="pt",
        truncation=True,  # to avoid crashing model due to very large article
        max_length=8192
    )
    inputs = {k: v.to(model.device) for k, v in inputs.items()}

    with torch.no_grad():
        outputs = model.generate(
            **inputs, 
            max_new_tokens=512,     # only needs to generate enough for sentiment
            temperature=0.1,        # incase there was sampling
            do_sample=False,        # no sampling - so no randomness
            pad_token_id=tokenizer.eos_token_id
        )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(response)
    response = response[len(prompt):].strip()    # skips over prompt

    # Parse response to get labels into a dict
    sentiment = {}
    for line in response.split('\n'):
        if line.strip():
            currency, label = line.split(':')
            currency = currency.strip()
            label = label.strip()
            sentiment[currency] = label

    return sentiment

### 5.2 Get evaulation statistics

In [None]:
currency_codes = ['EUR', 'USD', 'GBP', 'JPY', 'AUD', 'CAD', 'CHF', 'NZD', 'NOK', 'SEK']

all_actual = []
all_predictions = []

for i, row in df_eval.iterrows():
    sentiment = get_sentiment(row)
    for c in currency_codes:
        for t in ['past', 'future']:
            all_actual.append(row[f'{c}_{t}_label'])
            all_predictions.append(sentiment.get(f'{c}_{t}', 'unchanged'))

    
    
accuracy = accuracy_score(all_actual, all_predictions)
f1 = f1_score(all_actual, all_predictions, average='macro')
precision_per_class, recall_per_class, f1_per_class, support_per_class = precision_recall_fscore_support(all_actual, all_predictions, labels=['appreciation', 'depreciation', 'unchanged'])

stats = {
    'accuracy': accuracy,
    'f1': f1,
    'precision_per_class': dict(zip(['appreciation', 'depreciation', 'unchanged'], precision_per_class)),
    'recall_per_class': dict(zip(['appreciation', 'depreciation', 'unchanged'], recall_per_class)),
    'f1_per_class': dict(zip(['appreciation', 'depreciation', 'unchanged'], f1_per_class)),
    'support_per_class': dict(zip(['appreciation', 'depreciation', 'unchanged'], support_per_class))
}

report = classification_report(all_actual, all_predictions)

print(stats)

print()
print()

print(report)

{'accuracy': 1.0, 'f1': 1.0, 'precision_per_class': {'appreciation': np.float64(1.0), 'depreciation': np.float64(1.0), 'unchanged': np.float64(1.0)}, 'recall_per_class': {'appreciation': np.float64(1.0), 'depreciation': np.float64(1.0), 'unchanged': np.float64(1.0)}, 'f1_per_class': {'appreciation': np.float64(1.0), 'depreciation': np.float64(1.0), 'unchanged': np.float64(1.0)}, 'support_per_class': {'appreciation': np.int64(60), 'depreciation': np.int64(60), 'unchanged': np.int64(80)}}


              precision    recall  f1-score   support

appreciation       1.00      1.00      1.00        60
depreciation       1.00      1.00      1.00        60
   unchanged       1.00      1.00      1.00        80

    accuracy                           1.00       200
   macro avg       1.00      1.00      1.00       200
weighted avg       1.00      1.00      1.00       200

