In [21]:
!pip install -q sentence-transformers evaluate rouge_score bert_score

  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone


In [12]:
import os
import math
from typing import List, Dict, Tuple

import numpy as np
import pandas as pd
from tqdm import tqdm

from sentence_transformers import SentenceTransformer, util
import evaluate

In [13]:
class Config:
    TRAIN_PATH = "/content/trac2_CONVT_train.csv"
    DEV_PATH = "/content/trac2_CONVT_dev.csv"
    TEST_PATH = "/content/goldstandard_CONVT_test.csv"
    CORPUS_GEN_OUT = "/content/generations_corpus.csv"
    ID_COL = "id"
    CONV_ID_COL = "conversation_id"
    TURN_COL = "turn_id"
    TEXT_COL = "text"

    EMO_INT_COL = "Emotion"
    EMPATHY_COL = "Empathy"
    POLARITY_COL = "EmotionalPolarity"
    POLARITY_IS_INT = False
    ST_MODEL_NAME = "sentence-transformers/all-mpnet-base-v2"

    W_TEXT = 0.6
    W_EMOTION = 0.15
    W_EMPATHY = 0.15
    W_POLARITY = 0.10
cfg = Config()


In [18]:
def load_split(path: str) -> pd.DataFrame:
    if not os.path.exists(path):
        raise FileNotFoundError(f"File not found: {path}")
    df = pd.read_csv(path)
    print(f"[load_split] {path} -> shape {df.shape}")
    return df

def normalize_series_to_0_1(s: pd.Series) -> pd.Series:
    s = s.astype(float)
    min_v, max_v = s.min(), s.max()
    if math.isclose(max_v, min_v):
        return pd.Series(0.5, index=s.index)
    return (s - min_v) / (max_v - min_v)

def encode_polarity(df: pd.DataFrame, all_labels_for_mapping: List = None) -> Tuple[np.ndarray, Dict]:

    if all_labels_for_mapping is not None:
        if cfg.POLARITY_IS_INT:
            processed_labels = []
            for x in all_labels_for_mapping:
                if pd.notna(x) and str(x).replace('.', '', 1).isdigit():
                    processed_labels.append(int(float(x)))
            classes = sorted(list(set(processed_labels)))
        else:
            classes = sorted(list(set(str(x) for x in all_labels_for_mapping if pd.notna(x))))
    else:
        if cfg.POLARITY_IS_INT:
            classes = sorted(df[cfg.POLARITY_COL].dropna().astype(int).unique())
        else:
            classes = sorted(df[cfg.POLARITY_COL].dropna().astype(str).unique())

    label2id = {lab: i for i, lab in enumerate(classes)}
    if cfg.POLARITY_IS_INT:
        current_labels_series = df[cfg.POLARITY_COL].dropna().astype(int)
    else:
        current_labels_series = df[cfg.POLARITY_COL].dropna().astype(str)


    ids = np.array([label2id[v] for v in current_labels_series])
    one_hot = np.eye(len(classes))[ids]
    return one_hot, label2id


def build_context_text(df: pd.DataFrame,
                       conv_id,
                       up_to_turn: int) -> str:

    subset = df[(df[cfg.CONV_ID_COL] == conv_id) &
                (df[cfg.TURN_COL] < up_to_turn)].sort_values(cfg.TURN_COL)
    utts = subset[cfg.TEXT_COL].astype(str).tolist()
    return " ".join(utts)


In [19]:
class CorpusIndex:
    def __init__(self, train_df: pd.DataFrame, model_name: str, all_polarity_labels: List = None):
        self.model_name = model_name
        self.model = SentenceTransformer(model_name)
        self.texts = train_df[cfg.TEXT_COL].astype(str).tolist()
        print(f"[CorpusIndex] Encoding {len(self.texts)} training utterances...")
        self.embeddings = self.model.encode(
            self.texts,
            batch_size=32,
            convert_to_tensor=True,
            show_progress_bar=True,
        )
        self.emotion = normalize_series_to_0_1(train_df[cfg.EMO_INT_COL]).to_numpy().reshape(-1, 1)
        self.empathy = normalize_series_to_0_1(train_df[cfg.EMPATHY_COL]).to_numpy().reshape(-1, 1)
        self.polarity_oh, self.polarity_map = encode_polarity(train_df, all_labels_for_mapping=all_polarity_labels)

    def encode_query_text(self, text: str):
        return self.model.encode(text, convert_to_tensor=True)

    def compute_similarity(self,
                           q_emb,
                           q_emotion: float,
                           q_empathy: float,
                           q_polarity_oh: np.ndarray) -> np.ndarray:
        s_text = util.cos_sim(q_emb, self.embeddings).cpu().numpy().flatten()
        q_emo = np.array([[q_emotion]], dtype=float)
        s_emotion = 1.0 - np.abs(self.emotion - q_emo).flatten()
        q_emp = np.array([[q_empathy]], dtype=float)
        s_empathy = 1.0 - np.abs(self.empathy - q_emp).flatten()
        s_polarity = (self.polarity_oh * q_polarity_oh).sum(axis=1)

        S_total = (
            cfg.W_TEXT * s_text +
            cfg.W_EMOTION * s_emotion +
            cfg.W_EMPATHY * s_empathy +
            cfg.W_POLARITY * s_polarity
        )
        return S_total

    def get_best_match(self,
                       query_text: str,
                       q_emotion: float,
                       q_empathy: float,
                       q_polarity_label) -> str:

        if cfg.POLARITY_IS_INT:
            label2idx = self.polarity_map
            idx = label2idx[int(q_polarity_label)]
        else:
            label2idx = self.polarity_map
            idx = label2idx[str(q_polarity_label)]

        num_classes = len(self.polarity_map)
        q_polarity_oh = np.eye(num_classes)[idx]

        q_emb = self.encode_query_text(query_text)
        scores = self.compute_similarity(q_emb, q_emotion, q_empathy, q_polarity_oh)
        best_idx = int(scores.argmax())
        return self.texts[best_idx]


def generate_corpus_responses(train_df: pd.DataFrame,
                              target_df: pd.DataFrame,
                              all_polarity_labels: List = None) -> pd.DataFrame:

    idx = CorpusIndex(train_df, cfg.ST_MODEL_NAME, all_polarity_labels=all_polarity_labels)

    emo_norm = normalize_series_to_0_1(target_df[cfg.EMO_INT_COL]).to_numpy()
    emp_norm = normalize_series_to_0_1(target_df[cfg.EMPATHY_COL]).to_numpy()

    generations = []
    for i, row in tqdm(target_df.iterrows(), total=len(target_df),
                       desc="Generating (corpus-based)"):
        conv_id = row[cfg.CONV_ID_COL]
        turn = row[cfg.TURN_COL]

        context_text = build_context_text(target_df, conv_id, turn)
        if not context_text.strip():
            context_text = str(row[cfg.TEXT_COL])

        q_emo = emo_norm[i]
        q_emp = emp_norm[i]
        q_pol = row[cfg.POLARITY_COL]

        gen = idx.get_best_match(context_text, q_emo, q_emp, q_pol)
        generations.append(gen)

    out_df = target_df.copy()
    out_df["generated"] = generations
    return out_df


In [16]:
def compute_text_metrics(refs: List[str], preds: List[str]) -> Dict[str, float]:
    rouge = evaluate.load("rouge")
    bleu = evaluate.load("bleu")
    bertscore = evaluate.load("bertscore")

    rouge_res = rouge.compute(predictions=preds, references=refs)
    bleu_res = bleu.compute(predictions=preds, references=refs)
    bert_res = bertscore.compute(predictions=preds, references=refs, lang="en")

    metrics = {
        "rouge1": rouge_res["rouge1"],
        "rouge2": rouge_res["rouge2"],
        "rougeL": rouge_res["rougeL"],
        "bleu": bleu_res["bleu"],
        "bertscore_precision": float(np.mean(bert_res["precision"])),
        "bertscore_recall": float(np.mean(bert_res["recall"])),
        "bertscore_f1": float(np.mean(bert_res["f1"])),
    }
    return metrics



In [23]:
def load_split(path: str) -> pd.DataFrame:
    if not os.path.exists(path):
        raise FileNotFoundError(f"File not found: {path}")
    try:
        df = pd.read_csv(path)
    except pd.errors.ParserError:
        print(f"Warning: ParserError encountered while loading {path}. Attempting to load with 'engine=python' and 'on_bad_lines=skip'.")
        df = pd.read_csv(path, engine='python', on_bad_lines='skip')
    print(f"[load_split] {path} -> shape {df.shape}")
    return df

def main():

    train_df = load_split(cfg.TRAIN_PATH)
    dev_df = load_split(cfg.DEV_PATH)
    test_df = load_split(cfg.TEST_PATH)
    print("\nTrain DataFrame columns:", train_df.columns.tolist())
    print("Dev DataFrame columns:", dev_df.columns.tolist())
    print("Test DataFrame columns:", test_df.columns.tolist())

    all_polarity_values_raw = pd.concat([
        train_df[cfg.POLARITY_COL].astype(str),
        dev_df[cfg.POLARITY_COL].astype(str),
        test_df[cfg.POLARITY_COL].astype(str)
    ]).unique()

    if cfg.POLARITY_IS_INT:
        processed_values = []
        for x in all_polarity_values_raw:
            if pd.notna(x) and str(x).replace('.', '', 1).isdigit():
                processed_values.append(int(float(x)))
        all_polarity_values = sorted(list(set(processed_values)))
    else:
        all_polarity_values = sorted([str(x) for x in all_polarity_values_raw if pd.notna(x)])

    print("\n[Q1] Generating for DEV...")
    dev_gen = generate_corpus_responses(train_df, dev_df, all_polarity_labels=all_polarity_values)

    metrics = compute_text_metrics(
        refs=dev_gen[cfg.TEXT_COL].astype(str).tolist(),
        preds=dev_gen["generated"].astype(str).tolist()
    )
    print("\n[Q1] DEV metrics:")
    for k, v in metrics.items():
        print(f"  {k}: {v:.4f}")

    print("\n[Q1] Generating for TEST...")
    test_gen = generate_corpus_responses(train_df, test_df, all_polarity_labels=all_polarity_values)
    out_cols = [cfg.CONV_ID_COL, cfg.TURN_COL, "generated"]
    if cfg.ID_COL in test_gen.columns:
        out_cols.insert(0, cfg.ID_COL)

    test_gen[out_cols].to_csv(cfg.CORPUS_GEN_OUT, index=False)
    print(f"\n[Q1] Saved corpus generations to {cfg.CORPUS_GEN_OUT}")


if __name__ == "__main__":
    main()


[load_split] /content/trac2_CONVT_train.csv -> shape (11090, 12)
[load_split] /content/trac2_CONVT_dev.csv -> shape (965, 13)
[load_split] /content/goldstandard_CONVT_test.csv -> shape (2316, 10)

Train DataFrame columns: ['id', 'article_id', 'conversation_id', 'turn_id', 'speaker', 'text', 'person_id_1', 'person_id_2', 'Emotion', 'EmotionalPolarity', 'Empathy', 'SelfDisclosure']
Dev DataFrame columns: ['id', 'article_id', 'conversation_id', 'turn_id', 'speaker_id', 'text', 'person_id', 'person_id_1', 'person_id_2', 'Emotion', 'EmotionalPolarity', 'Empathy', 'SelfDisclosure']
Test DataFrame columns: ['article_id', 'conversation_id', 'turn_id', 'speaker_id', 'text', 'Emotion', 'EmotionalPolarity', 'Empathy', 'Unnamed: 8', 'Unnamed: 9']

[Q1] Generating for DEV...
[CorpusIndex] Encoding 11090 training utterances...


Batches:   0%|          | 0/347 [00:00<?, ?it/s]

Generating (corpus-based): 100%|██████████| 965/965 [00:15<00:00, 62.87it/s]


Downloading builder script: 0.00B [00:00, ?B/s]

Downloading extra modules: 0.00B [00:00, ?B/s]

Downloading builder script: 0.00B [00:00, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



[Q1] DEV metrics:
  rouge1: 0.1442
  rouge2: 0.0167
  rougeL: 0.1170
  bleu: 0.0063
  bertscore_precision: 0.8513
  bertscore_recall: 0.8513
  bertscore_f1: 0.8511

[Q1] Generating for TEST...
[CorpusIndex] Encoding 11090 training utterances...


Batches:   0%|          | 0/347 [00:00<?, ?it/s]

Generating (corpus-based): 100%|██████████| 2316/2316 [00:39<00:00, 58.40it/s]


[Q1] Saved corpus generations to /content/generations_corpus.csv





In [24]:
!pip install -q transformers accelerate sentencepiece


In [25]:
import os
import math
import numpy as np
import pandas as pd
from tqdm import tqdm
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

In [26]:
class Config:
    TRAIN_PATH = "/content/trac2_CONVT_train.csv"
    DEV_PATH   = "/content/trac2_CONVT_dev.csv"
    TEST_PATH  = "/content/goldstandard_CONVT_test.csv"
    ICL_GEN_OUT = "/content/generations_icl.csv"
    CONV_ID_COL = "conversation_id"
    TURN_COL    = "turn_id"
    TEXT_COL    = "text"
    EMO_INT_COL   = "Emotion"
    EMPATHY_COL   = "Empathy"
    POLARITY_COL  = "EmotionalPolarity"
    MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
    MAX_NEW_TOKENS = 256
    TEMPERATURE    = 0.7
    TOP_P          = 0.9
    FEW_SHOT_K = 3
    MAX_DEV_CONVS_FOR_EVAL = 40
    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
cfg = Config()

In [27]:
def load_split(path: str) -> pd.DataFrame:

    if not os.path.exists(path):
        raise FileNotFoundError(path)
    df = pd.read_csv(path, engine="python", on_bad_lines="skip")
    print(f"[load_split] {path} -> {df.shape}")
    return df


def group_by_conversation(df: pd.DataFrame):

    groups = {}
    for cid, g in df.groupby(cfg.CONV_ID_COL):
        g_sorted = g.sort_values(cfg.TURN_COL).reset_index(drop=True)
        groups[cid] = g_sorted
    return groups

In [28]:
def load_icl_model():
    print(f"[model] Loading {cfg.MODEL_NAME} on {cfg.DEVICE} ...")
    tok = AutoTokenizer.from_pretrained(cfg.MODEL_NAME)
    model = AutoModelForCausalLM.from_pretrained(
        cfg.MODEL_NAME,
        device_map="auto" if cfg.DEVICE == "cuda" else None,
        torch_dtype=torch.bfloat16 if cfg.DEVICE == "cuda" else torch.float32,
    )
    if tok.pad_token is None:
        tok.pad_token = tok.eos_token
    return tok, model

In [29]:
def format_turns_for_prompt(conv_df: pd.DataFrame, upto_turn: int):

    rows = conv_df[conv_df[cfg.TURN_COL] <= upto_turn]
    lines = []
    for _, r in rows.iterrows():
        t = int(r[cfg.TURN_COL])
        txt = str(r[cfg.TEXT_COL]).strip()
        if txt:
            lines.append(f"Turn {t}: {txt}")
    return lines


def format_future_turns(conv_df: pd.DataFrame, start_turn: int, end_turn: int):

    rows = conv_df[(conv_df[cfg.TURN_COL] >= start_turn) &
                   (conv_df[cfg.TURN_COL] <= end_turn)]
    lines = []
    for _, r in rows.sort_values(cfg.TURN_COL).iterrows():
        t = int(r[cfg.TURN_COL])
        txt = str(r[cfg.TEXT_COL]).strip()
        if txt:
            lines.append(f"Turn {t}: {txt}")
    return lines




In [30]:
def build_example_block(train_conv_df: pd.DataFrame,
                        max_hist_turn: int = 5,
                        future_start: int = 6,
                        future_end: int = 15) -> str:

    hist_lines = format_turns_for_prompt(train_conv_df, upto_turn=max_hist_turn)
    fut_lines  = format_future_turns(train_conv_df, future_start, future_end)

    if len(hist_lines) == 0 or len(fut_lines) == 0:
        return ""
    fut6 = train_conv_df[train_conv_df[cfg.TURN_COL] == future_start]
    if fut6.empty:
        return ""

    fut6 = fut6.iloc[0]
    emo = fut6[cfg.EMO_INT_COL]
    emp = fut6[cfg.EMPATHY_COL]
    pol = fut6[cfg.POLARITY_COL]

    block = []
    block.append("Example:")
    block.append("Context:")
    block.extend(hist_lines)
    block.append(
        f"Target emotion intensity: {emo}, "
        f"Target empathy: {emp}, "
        f"Target polarity: {pol}."
    )
    block.append("Expected future responses:")
    block.extend(fut_lines)
    block.append("")
    return "\n".join(block)


def build_prompt_for_conv(
    train_convs,
    target_conv_df: pd.DataFrame,
    few_shot_k: int = 3,
    max_hist_turn: int = 5,
    future_start: int = 6,
    future_end: int = 15,
) -> str:

    train_conv_ids = list(train_convs.keys())
    np.random.seed(42)
    chosen_ids = np.random.choice(
        train_conv_ids,
        size=min(few_shot_k, len(train_conv_ids)),
        replace=False,
    )

    example_blocks = []
    for cid in chosen_ids:
        block = build_example_block(
            train_convs[cid],
            max_hist_turn=max_hist_turn,
            future_start=future_start,
            future_end=future_end,
        )
        if block.strip():
            example_blocks.append(block)
    hist_lines = format_turns_for_prompt(target_conv_df, upto_turn=max_hist_turn)
    if len(hist_lines) == 0:
        return ""
    fut6 = target_conv_df[target_conv_df[cfg.TURN_COL] == future_start]
    if fut6.empty or cfg.EMO_INT_COL not in target_conv_df.columns:
        emo = "unknown"
        emp = "unknown"
        pol = "unknown"
    else:
        fut6 = fut6.iloc[0]
        emo = fut6[cfg.EMO_INT_COL]
        emp = fut6[cfg.EMPATHY_COL]
        pol = fut6[cfg.POLARITY_COL]

    query_block = []
    query_block.append("Now continue the following new conversation.")
    query_block.append("Context:")
    query_block.extend(hist_lines)
    query_block.append(
        f"Target emotion intensity: {emo}, "
        f"Target empathy: {emp}, "
        f"Target polarity: {pol}."
    )
    query_block.append(
        "Generate the next 10 utterances, from Turn 6 to Turn 15, "
        "one complete, natural sentence per line."
    )
    query_block.append("Only output the utterances, each on a new line.")

    full_prompt = "\n\n".join(example_blocks + ["\n".join(query_block)])
    return full_prompt

In [31]:
def generate_continuation(tokenizer, model, prompt: str) -> str:
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        out = model.generate(
            **inputs,
            max_new_tokens=cfg.MAX_NEW_TOKENS,
            temperature=cfg.TEMPERATURE,
            top_p=cfg.TOP_P,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
        )
    full = tokenizer.decode(out[0], skip_special_tokens=True)
    if prompt in full:
        return full.split(prompt, 1)[1].strip()
    return full.strip()

In [32]:
def parse_utterances_from_continuation(text: str, n_utterances: int = 10):
    lines = [l.strip() for l in text.split("\n") if l.strip()]
    cleaned = []
    for l in lines:
        l = l.lstrip("-•*")
        if l.lower().startswith("turn "):
            parts = l.split(":", 1)
            if len(parts) == 2:
                l = parts[1].strip()
        if l:
            cleaned.append(l)
    return cleaned[:n_utterances]

In [33]:
def collect_gold_future(conv_df: pd.DataFrame,
                        start_turn: int = 6,
                        end_turn: int = 15):
    rows = conv_df[(conv_df[cfg.TURN_COL] >= start_turn) &
                   (conv_df[cfg.TURN_COL] <= end_turn)]
    return rows.sort_values(cfg.TURN_COL)[cfg.TEXT_COL].astype(str).tolist()


def simple_bleu(refs, hyps) -> float:
    total_ref_tokens = 0
    total_hyp_tokens = 0
    matched_tokens   = 0

    for ref_seq, hyp_seq in zip(refs, hyps):
        for r, h in zip(ref_seq, hyp_seq):
            r_toks = r.split()
            h_toks = h.split()
            total_ref_tokens += len(r_toks)
            total_hyp_tokens += len(h_toks)
            matched_tokens   += sum(1 for t in h_toks if t in r_toks)

    if total_hyp_tokens == 0:
        return 0.0

    p1 = matched_tokens / total_hyp_tokens
    if total_hyp_tokens < total_ref_tokens:
        bp = math.exp(1 - total_ref_tokens / total_hyp_tokens)
    else:
        bp = 1.0
    return bp * p1

In [34]:
def main():
    train_df = load_split(cfg.TRAIN_PATH)
    dev_df   = load_split(cfg.DEV_PATH)
    test_df  = load_split(cfg.TEST_PATH)
    train_convs = group_by_conversation(train_df)
    dev_convs   = group_by_conversation(dev_df)
    test_convs  = group_by_conversation(test_df)
    tokenizer, model = load_icl_model()
    dev_ids = list(dev_convs.keys())
    if cfg.MAX_DEV_CONVS_FOR_EVAL is not None:
        dev_ids = dev_ids[:cfg.MAX_DEV_CONVS_FOR_EVAL]

    all_refs = []
    all_hyps = []

    print(f"\n[DEV] Generating for {len(dev_ids)} conversations...")
    for cid in tqdm(dev_ids):
        conv_df = dev_convs[cid]
        prompt = build_prompt_for_conv(
            train_convs,
            conv_df,
            few_shot_k=cfg.FEW_SHOT_K,
            max_hist_turn=5,
            future_start=6,
            future_end=15,
        )
        if not prompt.strip():
            continue

        cont    = generate_continuation(tokenizer, model, prompt)
        gen_utts = parse_utterances_from_continuation(cont, n_utterances=10)
        gold_utts = collect_gold_future(conv_df, 6, 15)

        m = min(len(gen_utts), len(gold_utts))
        if m == 0:
            continue

        all_hyps.append(gen_utts[:m])
        all_refs.append(gold_utts[:m])

    if all_refs:
        bleu_like = simple_bleu(all_refs, all_hyps)
        print(f"\n[DEV] Simple BLEU-like score (unigram): {bleu_like:.4f}")
    else:
        print("\n[DEV] No usable dev conversations for eval – check config.")


    print(f"\n[TEST] Generating for {len(test_convs)} conversations...")
    rows_out = []
    for cid, conv_df in tqdm(test_convs.items()):
        prompt = build_prompt_for_conv(
            train_convs,
            conv_df,
            few_shot_k=cfg.FEW_SHOT_K,
            max_hist_turn=5,
            future_start=6,
            future_end=15,
        )
        if not prompt.strip():
            continue

        cont     = generate_continuation(tokenizer, model, prompt)
        gen_utts = parse_utterances_from_continuation(cont, n_utterances=10)

        start_turn = 6
        for i, utt in enumerate(gen_utts):
            rows_out.append({
                "id": cid,
                "turn": start_turn + i,
                "generated_response": utt,
            })

    gen_df = pd.DataFrame(rows_out)
    gen_df.to_csv(cfg.ICL_GEN_OUT, index=False)
    print(f"\n[TEST] Saved ICL generations to {cfg.ICL_GEN_OUT}")


if __name__ == "__main__":
    main()

[load_split] /content/trac2_CONVT_train.csv -> (11090, 12)
[load_split] /content/trac2_CONVT_dev.csv -> (965, 13)
[load_split] /content/goldstandard_CONVT_test.csv -> (2316, 10)
[model] Loading microsoft/Phi-3-mini-4k-instruct on cuda ...


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/306 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/599 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/967 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.67G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]


[DEV] Generating for 33 conversations...


100%|██████████| 33/33 [05:36<00:00, 10.19s/it]



[DEV] Simple BLEU-like score (unigram): 0.0590

[TEST] Generating for 67 conversations...


100%|██████████| 67/67 [11:19<00:00, 10.14s/it]


[TEST] Saved ICL generations to /content/generations_icl.csv



