In [None]:
!pip install -q "openai==1.40.2" "httpx==0.27.2" "httpcore==1.0.5" datasets pandas scikit-learn tqdm



In [None]:
import os, getpass

PROVIDER = "openai"

if PROVIDER == "openai":
    os.environ["API_KEY"] = getpass.getpass("Paste your OpenAI API key: ")
    BASE_URL = None
    MODEL = "gpt-4o"

print("Provider:", PROVIDER, "Model:", MODEL)


Paste your OpenAI API key: ··········
Provider: openai Model: gpt-4o


In [None]:
from datasets import load_dataset
ds = load_dataset("ChanceFocus/en-fpb", split="test")
len(ds), ds.column_names[:8]


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


(970, ['id', 'query', 'answer', 'text', 'choices', 'gold'])

In [None]:
import openai, httpx
print("openai:", openai.__version__)
print("httpx:", httpx.__version__)


openai: 1.40.2
httpx: 0.27.2


In [None]:
from openai import OpenAI
import os, re, time
from tqdm import tqdm
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score, classification_report

if "BASE_URL" not in globals() or BASE_URL is None:
    BASE_URL = "https://api.openai.com/v1"

client = OpenAI(api_key=os.environ["API_KEY"], base_url=BASE_URL)

def normalize_to_choice(raw, choices):
    if not raw: return None
    s = re.split(r"[\n\r]", raw.strip())[0].strip().strip(".:;").lower()
    for c in choices:
        if s == c.lower(): return c
    alias = {"pos":"positive","neg":"negative","neu":"neutral",
             "bullish":"positive","bearish":"negative"}
    s = alias.get(s, s)
    for c in choices:
        if s == c.lower(): return c
    for c in choices:
        if c.lower().startswith(s): return c
    return None

SYSTEM = ("You are a financial sentiment classifier. "
          "Choose exactly one label from the options. Output ONLY the label.")

def ask_model(sentence, choices, retries=3, sleep=1):
    user = f"Sentence: {sentence}\nOptions: {', '.join(choices)}\nAnswer with ONE of the options only."
    last_e = None
    for _ in range(retries):
        try:
            resp = client.chat.completions.create(
                model=MODEL, temperature=0, max_tokens=8,
                messages=[{"role":"system","content":SYSTEM},
                          {"role":"user","content":user}]
            )
            return resp.choices[0].message.content
        except Exception as e:
            last_e = e; time.sleep(sleep)
    raise last_e


In [None]:
ex = ds[0]
pred_raw = ask_model(ex["text"], list(ex["choices"]))
pred = normalize_to_choice(pred_raw, list(ex["choices"]))
ex["text"], ex["choices"], pred_raw, pred, ex["answer"]


('The new agreement , which expands a long-established cooperation between the companies , involves the transfer of certain engineering and documentation functions from Larox to Etteplan .',
 ['positive', 'neutral', 'negative'],
 'neutral',
 'neutral',
 'positive')

In [None]:
N = len(ds)
rows, y_true, y_pred = [], [], []

for i in tqdm(range(N)):
    x = ds[i]
    choices = list(x["choices"])
    gold = x["answer"]
    raw = ask_model(x["text"], choices)
    pred = normalize_to_choice(raw, choices) or "UNKNOWN"
    rows.append({"id": x.get("id", i), "text": x["text"], "choices": "|".join(choices),
                 "pred_raw": raw, "pred": pred, "label": gold})
    y_true.append(gold); y_pred.append(pred)

df = pd.DataFrame(rows)
df.to_csv("/content/fpb_predictions.csv", index=False)
print("Saved to /content/fpb_predictions.csv")


100%|██████████| 970/970 [12:42<00:00,  1.27it/s]

Saved to /content/fpb_predictions.csv





In [None]:
ok = df[df["pred"]!="UNKNOWN"]
print("Used for scoring:", len(ok), "/", len(df))
print("Accuracy:", round(accuracy_score(ok["label"], ok["pred"]), 4))
print("Macro-F1:", round(f1_score(ok["label"], ok["pred"], average="macro"), 4))
print("\nReport:\n", classification_report(ok["label"], ok["pred"]))


Used for scoring: 970 / 970
Accuracy: 0.8402
Macro-F1: 0.8358

Report:
               precision    recall  f1-score   support

    negative       0.83      0.97      0.90       116
     neutral       0.85      0.89      0.87       577
    positive       0.82      0.67      0.74       277

    accuracy                           0.84       970
   macro avg       0.83      0.85      0.84       970
weighted avg       0.84      0.84      0.84       970

