In [None]:
%pip install -q "openai>=1.40.0" "httpx>=0.27.2" "httpcore>=1.0.5" "pandas>=2.2.2" "tqdm>=4.66.4"


In [None]:
import os, getpass, json, time, platform
from importlib.metadata import version, PackageNotFoundError

def ver(pkg):
    try:
        return version(pkg)
    except PackageNotFoundError:
        return None

# === Configure Grok 4 ===
MODEL    = os.getenv("GROK_MODEL", "grok-4")
BASE_URL = os.getenv("GROK_BASE_URL", "https://api.x.ai/v1")

api_key = os.getenv("XAI_API_KEY")
if not api_key:
    api_key = getpass.getpass("Paste your Grok (xAI) API key: ")
os.environ["XAI_API_KEY"] = api_key

save_dir  = "./results_grok"
meta_path = os.path.join(save_dir, "meta.json")

meta = {
    "model": MODEL,
    "base_url": BASE_URL,
    "python": platform.python_version(),
    "time_utc": time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime()),
    "openai_pkg": ver("openai"),
    "httpx": ver("httpx"),
    "pandas": ver("pandas"),
    "tqdm": ver("tqdm"),
}

os.makedirs(save_dir, exist_ok=True)
with open(meta_path, "w") as f:
    json.dump(meta, f, indent=2)

print("Meta saved ->", meta_path)
print("MODEL:", MODEL, "| BASE_URL:", BASE_URL)
print("XAI_API_KEY set:", bool(os.environ.get("XAI_API_KEY")))

In [None]:
from openai import OpenAI

client = OpenAI(api_key=os.environ["XAI_API_KEY"], base_url=BASE_URL)
print("Grok 4 client ready.")

In [None]:
messages = [
    {"role": "system", "content": "You are Grok 4, a witty and intelligent assistant."},
    {"role": "user", "content": "Explain what backpropagation does in a neural network."},
]

try:
    resp = client.chat.completions.create(
        model=MODEL,
        messages=messages,
        temperature=0.7,
        max_tokens=300,
    )
    print("Grok 4 says:", resp.choices[0].message.content)
except Exception as e:
    print("API call failed:", repr(e))

In [None]:
%pip install -q "datasets>=2.19.0" "pandas>=2.2.2" "tqdm>=4.66.4" "scikit-learn>=1.5.0"

from datasets import load_dataset, Dataset
import pandas as pd, json, os, re, time
from tqdm import tqdm

LABELS = ["negative","neutral","positive"]

ds_raw = load_dataset("TheFinAI/flare-fpb", split="test")
print("Loaded flare-fpb test:", len(ds_raw), "columns:", ds_raw.column_names)

_alias = {"pos":"positive","neg":"negative","neu":"neutral",
          "bullish":"positive","bearish":"negative"}

def _norm_label(v):
    if v is None: return None
    if isinstance(v,(int,float)) or (isinstance(v,str) and v.isdigit()):
        i = int(v); return LABELS[i] if 0 <= i < len(LABELS) else None
    s = str(v).strip().lower(); s = _alias.get(s, s)
    return s if s in LABELS else None

def _map_row(x):
    text = x.get("text") or x.get("sentence") or x.get("content") or x.get("input") or ""
    lab  = _norm_label(x.get("label", x.get("labels", x.get("answer"))))
    return {"text": text, "choices": LABELS, "answer": lab}

ds = Dataset.from_list([{**r, **_map_row(r)} for r in ds_raw])
print("Final usable samples:", len(ds))

In [None]:
import requests, json
from tqdm import tqdm

def classify_with_grok(sentence, choices=LABELS, max_tokens=128):
    url = f"{BASE_URL.rstrip('/')}/chat/completions"
    headers = {
        "Authorization": f"Bearer {os.environ['XAI_API_KEY']}",
        "Content-Type": "application/json",
    }

    user_prompt = (
        "Task: classify the following sentence into one of these labels: "
        f"{', '.join(choices)}.\n\n"
        f"Sentence: {sentence}\n\n"
        "Return ONLY a JSON object exactly like this:\n"
        "{\"label\":\"negative|neutral|positive\"}"
    )

    payload = {
        "model": MODEL,
        "messages": [
            {"role": "system", "content": "You are a precise text classifier."},
            {"role": "user", "content": user_prompt},
        ],
        "max_tokens": max_tokens,
        "temperature": 0.0,
    }

    r = requests.post(url, headers=headers, json=payload, timeout=90)
    r.raise_for_status()
    data = r.json()

    txt = data["choices"][0]["message"]["content"].strip()
    txt = re.sub(r"^```json|```$", "", txt).strip()
    try:
        obj = json.loads(txt)
        lab = obj.get("label", "").strip().lower()
        if lab not in choices:
            raise ValueError(f"Invalid label {lab}")
        return lab
    except Exception as e:
        return "UNKNOWN"

# Run evaluation
results = []
for i, sample in tqdm(enumerate(ds), total=len(ds)):
    pred = classify_with_grok(sample["text"])
    results.append({
        "row_idx": i,
        "text": sample["text"],
        "label": sample["answer"],
        "pred": pred,
    })

df = pd.DataFrame(results)
df.to_csv("grok4_flare_predictions.csv", index=False)
print("Saved predictions -> grok4_flare_predictions.csv")

In [None]:
from sklearn.metrics import accuracy_score, f1_score

ok = df[df["pred"] != "UNKNOWN"].copy()

f1_macro = f1_score(ok["label"], ok["pred"], labels=LABELS, average="macro", zero_division=0)
acc      = accuracy_score(ok["label"], ok["pred"])

print(f"F1: {f1_macro:.4f},  Accuracy: {acc:.4f}")