**Important: When you run, make sure you choose runtime A100 GPU**

Git and install libraries

In [None]:
!pip install -U torch

In [None]:
!git clone -b working https://github.com/GhostOf0days/PIXIU.git --recursive
%cd PIXIU
!pip install -r requirements.txt
%cd /content/PIXIU/src/financial-evaluation
!pip install -e .[multilingual]
!pip install bert_score
!pip install gdown
!pip install vllm==0.5.4
!pip install torch==2.4.0 torchvision==0.19
!pip install peft
!pip install lm-eval google-generativeai

Download BART checkpoint to src/metrics/BARTScore/

In [None]:
from google.colab import drive
import os
import gdown

drive.mount('/content/drive')

source_path = "/content/drive/My Drive/bart_score.pth"
destination_path = "/content/PIXIU/src/metrics/BARTScore/bart_score.pth"

if os.path.exists(source_path) and not os.path.exists(destination_path):
    !cp "{source_path}" "{destination_path}"
    print("File found in Google Drive and copied.")
else:
    file_id = '19Fpob1RhQHyvMlOqxPO89z1W58PvkOm-'
    url = f'https://drive.google.com/uc?id={file_id}'
    gdown.download(url, destination_path, quiet=False)
    print("File not found in Google Drive. Downloaded instead.")


In [None]:
%cd /content/PIXIU/src

In [None]:
%cd /content

In [None]:
import os

os.environ['PYTHONPATH'] += ":/content/PIXIU/src/metrics/BARTScore/"
!echo $PYTHONPATH

# Please login with your Hugging Face token. Make sure to request access to all models on Hugging Face.

In [None]:
from huggingface_hub import login
login(token="token")

# Tasks Names Defined Below (You should see how to define tasks in src/tasks)

1. **NER**: `flare_ner`
2. **FINER-ORD**: `flare_finer_ord`
3. **FinRED**: `flare_finred`
4. **SC**: `flare_causal20_sc`
5. **CD**: `flare_cd`
6. **FNXL**: `flare_fnxl`
7. **FSRL**: `flare_fsrl`
8. **FPB**: `flare_fpb`
9. **FiQA-SA**: `flare_fiqasa`
10. **TSA**: `flare_tsa`
11. **Headlines**: `flare_headlines`
12. **FOMC**: `flare_fomc`
13. **FinArg-ACC**: `flare_finarg_ecc_auc`
14. **FinArg-ARC**: `flare_finarg_ecc_arc`
15. **MultiFin**: `flare_multifin_en`
16. **MA**: `flare_ma`
17. **MLESG**: `flare_mlesg`
18. **FinQA**: `flare_finqa`
19. **TATQA**: `flare_tatqa`
20. **Regulations**: (No specific task name provided for this)
21. **ConvFinQA**: `flare_convfinqa`
22. **EDTSUM**: `flare_edtsum`
23. **ECTSUM**: `flare_ectsum`
24. **BigData22**: `flare_sm_bigdata`
25. **ACL18**: `flare_sm_acl`
26. **CIKM18**: `flare_sm_cikm`
27. **German**: `flare_german`
28. **Australian**: `flare_australian`
29. **LendingClub**: `flare_cra_lendingclub`
30. **ccf**: `flare_cra_ccf`
31. **ccfraud**: `flare_cra_ccfraud`
32. **polish**: `flare_cra_polish`
33. **taiwan**: `flare_cra_taiwan`
34. **portoseguro**: `flare_cra_portoseguro`
35. **travelinsurance**: `flare_cra_travelinsurace`
36. **ES_FinanceES**: `flare_es_financees`
37. **ES_Multifin**: `flare_es_multifin`
38. **ES_EFP**: `flare_es_efp`
39. **ES_EFPA**: `flare_es_efpa`
40. **ES_FNS**: `flare_es_fns`
41. **ES_TSA**: `flare_es_tsa`


In [None]:
tasks_list = [
    "flare_ner",
    "flare_finer_ord",
    "flare_finred",
    "flare_causal20_sc",
    "flare_cd",
    "flare_fnxl",
    "flare_fsrl",
    "flare_fpb",
    "flare_fiqasa",
    "flare_tsa",
    "flare_headlines",
    "flare_fomc",
    "flare_finarg_ecc_auc",
    "flare_finarg_ecc_arc",
    "flare_multifin_en",
    "flare_ma",
    "flare_mlesg",
    "flare_finqa",
    "flare_tatqa",
    # "Regulations"
    "flare_convfinqa",
    "flare_edtsum",
    "flare_ectsum",
    "flare_sm_bigdata",
    "flare_sm_acl",
    "flare_sm_cikm",
    "flare_german",
    "flare_australian",
    "flare_cra_lendingclub",
    "flare_cra_ccf",
    "flare_cra_ccfraud",
    "flare_cra_polish",
    "flare_cra_taiwan",
    "flare_cra_portoseguro",
    "flare_cra_travelinsurace",
    "flare_es_financees",
    "flare_es_multifin",
    "flare_es_efp",
    "flare_es_efpa",
    "flare_es_fns",
    "flare_es_tsa"
]

In [None]:
pretrained = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
tokenizer = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
max_gen_toks = 512
batch_size = 20000
num_fewshot = 0
results_dir = "/content/results"
model_type = "hf-causal-vllm"
model_name = "DeepSeek-R1-Distill-Llama-8B"

In [None]:
os.makedirs(f"{results_dir}/{model_name}", exist_ok=True)

In [None]:
for task in tasks_list:
    output_file_path = f"{results_dir}/{model_name}/{task}_results.txt"
    print(f"Running task: {task}\nSaving output to: {output_file_path}\n")

    !python PIXIU/src/eval.py \
        --model $model_type \
        --model_args "pretrained=$pretrained,tokenizer=$tokenizer,trust_remote_code=True,use_fast=False,max_gen_toks=$max_gen_toks" \
        --tasks $task \
        --batch_size $batch_size \
        --num_fewshot $num_fewshot \
        --output_base_path $results_dir \
        > $output_file_path

**Below are more examples of evaluating models**`

In [None]:
pretrained = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
tokenizer = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
max_gen_toks = 512
batch_size = 20000
num_fewshot = 0
results_dir = "/content/results"
model_type = "hf-causal-vllm"
model_name = "DeepSeek-R1-Distill-Qwen-1.5B"

In [None]:
os.makedirs(f"{results_dir}/{model_name}", exist_ok=True)

In [None]:
for task in tasks_list:
    output_file_path = f"{results_dir}/{model_name}/{task}_results.txt"
    print(f"Running task: {task}\nSaving output to: {output_file_path}\n")

    !python PIXIU/src/eval.py \
        --model $model_type \
        --model_args "pretrained=$pretrained,tokenizer=$tokenizer,trust_remote_code=True,use_fast=False,max_gen_toks=$max_gen_toks" \
        --tasks $task \
        --batch_size $batch_size \
        --num_fewshot $num_fewshot \
        --output_base_path $results_dir \
        > $output_file_path

In [None]:
!zip -r results.zip /content/results

In [None]:
from google.colab import files
files.download('results.zip')

# Using a Hugging Face Model

In [None]:
!python PIXIU/src/eval.py \
    --model "hf-causal-vllm" \
    --model_args "pretrained=codellama/CodeLlama-7b-hf,tokenizer=codellama/CodeLlama-7b-hf,trust_remote_code=True,use_fast=False,max_gen_toks=25" \
    --tasks "flare_es_multifin" \
    --batch_size 20000 \
    --num_fewshot 0

In [None]:
!python PIXIU/src/eval.py \
    --model "hf-causal-vllm" \
    --model_args "pretrained=Qwen/Qwen2-7B-Instruct,tokenizer=Qwen/Qwen2-7B-Instruct,dtype=float16,use_fast=False,max_gen_toks=128" \
    --tasks "flare_edtsum" \
    --batch_size 20000 \
    --num_fewshot 0

In [None]:
!python PIXIU/src/eval.py \
    --model "hf-causal-vllm" \
    --model_args "pretrained=google/gemma-2-9b-it,tokenizer=google/gemma-2-9b-it,dtype=float16,use_fast=False,max_gen_toks=128" \
    --tasks "flare_fnxl" \
    --batch_size 20000 \
    --num_fewshot 0

# Using a Model from the Transformers Library

In [None]:
!python PIXIU/src/eval.py \
    --model "hf-causal-vllm" \
    --model_args "pretrained=meta-llama/Llama-2-7b-chat-hf,peft=xiangr/fingpt-forecaster_dow30_llama2-7b_lora,tokenizer=meta-llama/Llama-2-7b-chat-hf,dtype=float16,use_fast=False,max_gen_toks=25" \
    --tasks "flare_es_efpa" \
    --batch_size 20000 \
    --num_fewshot 0

In [None]:
!python PIXIU/src/eval.py \
    --model "hf-causal-vllm" \
    --model_args "pretrained=meta-llama/Llama-2-7b-chat-hf,peft=xiangr/fingpt-forecaster_dow30_llama2-7b_lora,tokenizer=meta-llama/Llama-2-7b-chat-hf,dtype=float16,use_fast=False,max_gen_toks=25" \
    --tasks "flare_es_fns" \
    --batch_size 20000 \
    --num_fewshot 0

In [None]:
!python PIXIU/src/eval.py \
    --model "hf-causal-vllm" \
    --model_args "pretrained=meta-llama/Llama-2-7b-chat-hf,peft=xiangr/fingpt-forecaster_dow30_llama2-7b_lora,tokenizer=meta-llama/Llama-2-7b-chat-hf,dtype=float16,use_fast=False,max_gen_toks=25" \
    --tasks "flare_es_financees" \
    --batch_size 20000 \
    --num_fewshot 0

In [None]:
!python PIXIU/src/eval.py \
    --model "hf-causal-vllm" \
    --model_args "pretrained=meta-llama/Llama-2-7b-chat-hf,peft=xiangr/fingpt-forecaster_dow30_llama2-7b_lora,tokenizer=meta-llama/Llama-2-7b-chat-hf,dtype=float16,use_fast=False,max_gen_toks=25" \
    --tasks "flare_es_tsa" \
    --batch_size 20000 \
    --num_fewshot 0