In [3]:
# Remove RAPIDS bits that force old pyarrow
!pip -q uninstall -y cudf-cu12 pylibcudf-cu12

# Install compatible, up-to-date stack
!pip -q install -U "pyarrow>=21,<23" datasets openai scikit-learn tqdm huggingface_hub

# Sanity check
import pyarrow, datasets, sklearn, openai, huggingface_hub
print("pyarrow:", pyarrow.__version__)
print("datasets:", datasets.__version__)


pyarrow: 19.0.1
datasets: 4.3.0


In [15]:
import os, getpass

DEEPSEEK_BASE_URL = "https://api.deepseek.com"
DEEPSEEK_MODEL = "deepseek-chat"   # or "deepseek-chat" on some accounts

# Prompt once if not set
if not os.getenv("DEEPSEEK_API_KEY") or os.getenv("DEEPSEEK_API_KEY") == "YOUR_DEEPSEEK_API_KEY":
    os.environ["DEEPSEEK_API_KEY"] = getpass.getpass("Paste DEEPSEEK_API_KEY (input hidden): ").strip()

# quick sanity ping (tiny request)
from openai import OpenAI
client = OpenAI(api_key=os.environ["DEEPSEEK_API_KEY"], base_url=DEEPSEEK_BASE_URL)
resp = client.chat.completions.create(
    model=DEEPSEEK_MODEL,
    messages=[{"role":"system","content":"Say OK."},{"role":"user","content":"OK?"}],
    max_tokens=1, temperature=0
)
print("DeepSeek ready:", resp.choices[0].message.content)


DeepSeek ready: 好的


In [16]:
from huggingface_hub import login
# login()  # <- uncomment, run once if you need gated dataset access


In [9]:
import os, getpass
from huggingface_hub import login

os.environ["HF_TOKEN"] = getpass.getpass("Paste HF token (hidden): ").strip()
# set both common env names so libraries can find it
os.environ["HUGGINGFACE_HUB_TOKEN"] = os.environ["HF_TOKEN"]

login(token=os.environ["HF_TOKEN"])


Paste HF token (hidden): ··········


Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [11]:
# Dataset loader (self-contained, fixes NameError)
from datasets import load_dataset

DATASET_NAME = "TheFinAI/en-fpb"
SPLIT = "train"

# Define defaults here so this cell works even if a prior config cell was skipped
SEED = 42
N_SAMPLES = 100

ds = load_dataset(DATASET_NAME, split=SPLIT)

# Use "is not None" so 0 doesn't accidentally mean "skip sampling"
if N_SAMPLES is not None:
    ds = ds.shuffle(seed=SEED).select(range(min(N_SAMPLES, len(ds))))

print("Rows:", len(ds))
ds[0]


Rows: 100


{'id': 'fpb2204',
 'query': 'Analyze the sentiment of this statement extracted from a financial news article. Provide your answer as either negative, positive, or neutral.\nText: In August , Latvijas Finieris ordered all production lines for a new green veneer mill to be built in Ukmerge , Lithuania .\nAnswer:',
 'answer': 'neutral',
 'text': 'In August , Latvijas Finieris ordered all production lines for a new green veneer mill to be built in Ukmerge , Lithuania .',
 'choices': ['positive', 'neutral', 'negative'],
 'gold': 1}

In [12]:
ALLOWED = ["positive", "neutral", "negative"]

SYSTEM_PROMPT = (
    "You are a finance sentiment rater. "
    "Return exactly one word from this set: positive, neutral, negative. "
    "No explanation. If uncertain, choose neutral."
)

def build_user_prompt(row):
    # The dataset already includes an instruction in 'query' and a financial sentence in 'text'.
    # We’ll be explicit and pass the sentence and the allowed labels.
    text = row["text"]
    return (
        "Classify the sentiment of the following financial news statement as "
        "one of: positive, neutral, negative.\n\n"
        f"Statement: {text}\n\n"
        "Answer with only one word."
    )


In [13]:
import time, json, re, pathlib, random
from tqdm import tqdm
from openai import OpenAI, APIError, RateLimitError, APITimeoutError

client = OpenAI(api_key=os.environ["DEEPSEEK_API_KEY"], base_url=DEEPSEEK_BASE_URL)

CACHE_PATH = pathlib.Path("deepseek_en_fpb_predictions.jsonl")

def normalize_label(s: str) -> str:
    if not s: return "neutral"
    s = s.strip().lower()
    # accept the first allowed label mentioned
    for lab in ALLOWED:
        if re.search(rf"\b{lab}\b", s):
            return lab
    # sometimes models return punctuation/newline; take a single word guess
    s = re.sub(r"[^a-z]", " ", s).split()
    for tok in s:
        if tok in ALLOWED:
            return tok
    return "neutral"

def chat_once(prompt, temperature=0.0):
    # One call with robust error handling
    tries, max_tries, backoff = 0, 6, 1.5
    while True:
        tries += 1
        try:
            resp = client.chat.completions.create(
                model=DEEPSEEK_MODEL,
                temperature=temperature,
                messages=[
                    {"role": "system", "content": SYSTEM_PROMPT},
                    {"role": "user",   "content": prompt},
                ],
            )
            return resp.choices[0].message.content
        except (RateLimitError, APITimeoutError, APIError) as e:
            if tries >= max_tries:
                raise
            time.sleep(backoff)
            backoff *= 1.8

# Resume support
seen_ids = set()
pred_rows = []
if CACHE_PATH.exists():
    with open(CACHE_PATH, "r", encoding="utf-8") as f:
        for line in f:
            obj = json.loads(line)
            seen_ids.add(obj["id"])
            pred_rows.append(obj)

# Run inference
for row in tqdm(ds, total=len(ds)):
    rid = row["id"]
    if rid in seen_ids:
        continue
    prompt = build_user_prompt(row)
    raw = chat_once(prompt, temperature=0.0)
    pred = normalize_label(raw)
    out = {
        "id": rid,
        "text": row["text"],
        "gold": row["answer"],     # gold labels are strings in this dataset
        "pred": pred,
        "raw": raw,
    }
    pred_rows.append(out)
    with open(CACHE_PATH, "a", encoding="utf-8") as f:
        f.write(json.dumps(out, ensure_ascii=False) + "\n")

len(pred_rows)


100%|██████████| 100/100 [01:50<00:00,  1.10s/it]


100

In [14]:
# Compute accuracy and macro-F1 from saved predictions or in-memory df
import pandas as pd, pathlib, json
from sklearn.metrics import accuracy_score, f1_score

def load_gold_pred():
    if 'df' in globals() and {'gold','pred'}.issubset(df.columns):
        return df[['gold','pred']].copy()
    p_csv   = pathlib.Path("deepseek_en_fpb_predictions.csv")
    p_jsonl = pathlib.Path("deepseek_en_fpb_predictions.jsonl")
    if p_csv.exists():
        d = pd.read_csv(p_csv)
    elif p_jsonl.exists():
        d = pd.read_json(p_jsonl, lines=True)
    else:
        raise FileNotFoundError("No predictions found. Run the evaluation cell first to create predictions.")
    return d[['gold','pred']].copy()

d = load_gold_pred()
y_true = d['gold'].str.lower()
y_pred = d['pred'].str.lower()

acc = accuracy_score(y_true, y_pred)
macro_f1 = f1_score(y_true, y_pred, average='macro')

print(f"Accuracy : {acc:.4f}")
print(f"Macro F1 : {macro_f1:.4f}")


Accuracy : 0.8700
Macro F1 : 0.8729
