In [2]:
import os
import json
from transformers import pipeline
from tqdm import tqdm

## Add new metric over existing completions

We don't want to recompute the expensive part of the grid, so we just
add metrics on top of existing completions.

In [3]:
sentiment_analysis = pipeline("sentiment-analysis", model="siebert/sentiment-roberta-large-english", device="cuda")

  return self.fget.__get__(instance, owner)()


In [4]:
sentiment_analysis(["I love you", "I hate you"])

[{'label': 'POSITIVE', 'score': 0.998561680316925},
 {'label': 'NEGATIVE', 'score': 0.9991401433944702}]

In [1]:
def add_sentiment(exp_name):
    for f in tqdm(os.listdir(f"grid_results/{exp_name}"), desc=f"adding sentiment to {exp_name}"):
        if f == "meta.json":
            continue

        with open(f"grid_results/{exp_name}/{f}") as fp:
            d = json.load(fp)

        if "sentiment" in d:
            continue # already computed

        generations = [c[len(p):].replace("<|end_of_text|>", "") for c, p in zip(d["completions"], d["prompt_batch"])]
        results = sentiment_analysis(generations)
        prob_positive = [r["score"] if r["label"] == "POSITIVE" else 1 - r["score"] for r in results]
        d["sentiment"] = prob_positive

        with open(f"grid_results/{exp_name}/{f}", "w") as fp:
            json.dump(d, fp)

for exp_name in os.listdir("grid_results"):
    add_sentiment(exp_name)

adding sentiment to toxicity_n64: 100%|██████████| 65/65 [00:00<00:00, 1778.37it/s]
adding sentiment to negative_n64: 100%|██████████| 65/65 [00:00<00:00, 3272.00it/s]
adding sentiment to positive_n64: 100%|██████████| 65/65 [00:00<00:00, 3852.23it/s]
adding sentiment to positive_n1024:   3%|▎         | 10/312 [00:11<05:53,  1.17s/it]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
adding sentiment to positive_n1024:   5%|▌         | 17/312 [00:19<05:55,  1.20s/it]

In [None]:
import time
while True:
    time.sleep(60)
    for exp_name in os.listdir("grid_results"):
        add_sentiment(exp_name)