In [26]:
import os
import json
from transformers import pipeline
from tqdm import tqdm

## Add new metric over existing completions

We don't want to recompute the expensive part of the grid, so we just
add metrics on top of existing completions.

In [7]:
sentiment_analysis = pipeline("sentiment-analysis", model="siebert/sentiment-roberta-large-english", device="cuda")

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

In [11]:
sentiment_analysis(["I love you", "I hate you"])

[{'label': 'POSITIVE', 'score': 0.998561680316925},
 {'label': 'NEGATIVE', 'score': 0.9991401433944702}]

In [27]:
for exp_name in os.listdir("grid_results"):
    for f in tqdm(os.listdir(f"grid_results/{exp_name}"), desc=f"adding sentiment to {exp_name}"):
        if f == "meta.json":
            continue

        with open(f"grid_results/{exp_name}/{f}") as fp:
            d = json.load(fp)

        generations = [c[len(p):].replace("<|end_of_text|>", "") for c, p in zip(d["completions"], d["prompt_batch"])]
        results = sentiment_analysis(generations)
        prob_positive = [r["score"] if r["label"] == "POSITIVE" else 1 - r["score"] for r in results]
        d["sentiment"] = prob_positive

        with open(f"grid_results/{exp_name}/{f}", "w") as fp:
            json.dump(d, fp)

100%|██████████| 65/65 [00:34<00:00,  1.91it/s]
100%|██████████| 65/65 [00:32<00:00,  1.97it/s]
100%|██████████| 65/65 [00:33<00:00,  1.96it/s]
