In [1]:
import pandas as pd
import re
from collections import Counter

# --- Files ---
paths = {
    "Gemma-3-4B-it": "/workspace/gemma_generations.csv",
    "Qwen2.5-3B-Instruct": "/workspace/qwen_generations.csv",
    "Llama-3.2-3B-Instruct": "/workspace/llama_generations.csv",
}

# --- Text stats helper ---
def text_stats(df):
    answers = df["answer"].fillna("").astype(str)
    # total words
    word_counts = answers.apply(lambda t: len(re.findall(r"\w+(?:'\w+)?", t)))
    avg_words = word_counts.mean()

    # typeâ€“token ratio (unique words / total words per answer, then mean)
    def ttr(text):
        tokens = re.findall(r"\w+(?:'\w+)?", text.lower())
        return len(set(tokens)) / len(tokens) if tokens else 0
    ttrs = answers.apply(ttr)
    avg_ttr = ttrs.mean()

    # empty answers %
    empties_pct = 100 * (answers.str.strip() == "").mean()

    # duplicate exact answers %
    duplicate_pct = 100 * (1 - answers.nunique() / len(answers)) if len(answers) else 0

    return {
        "avg_words": avg_words,
        "avg_type_token_ratio": avg_ttr,
        "answer_empties_%": empties_pct,
        "exact_duplicate_answers_%": duplicate_pct,
        "n_rows": len(answers)
    }

# --- Run for each model ---
rows = []
for name, path in paths.items():
    try:
        df = pd.read_csv(path)
        if "answer" not in df.columns:
            raise ValueError(f"'answer' column not found in {path}")
        metrics = text_stats(df)
        metrics["model"] = name
        rows.append(metrics)
    except Exception as e:
        print(f"[skip] {name}: {e}")

summary = pd.DataFrame(rows)
summary = summary[
    ["model", "n_rows", "avg_words", "avg_type_token_ratio", "answer_empties_%", "exact_duplicate_answers_%"]
]

print(summary.round(3))

# Optionally save
summary.to_csv("/workspace/model_text_metrics_summary.csv", index=False)
print("\nSaved /workspace/model_text_metrics_summary.csv")


                   model  n_rows  avg_words  avg_type_token_ratio  \
0          Gemma-3-4B-it      96    485.333                 0.429   
1    Qwen2.5-3B-Instruct      96    338.812                 0.545   
2  Llama-3.2-3B-Instruct      96    242.875                 0.441   

   answer_empties_%  exact_duplicate_answers_%  
0               0.0                      0.000  
1               0.0                      0.000  
2               0.0                      1.042  

Saved /workspace/model_text_metrics_summary.csv
