In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install transformers




In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

model_name = "distilgpt2"  # or "gpt2-medium"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
model.eval()


Using device: cuda


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/762 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/353M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-5): 6 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [None]:
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = tokenizer.eos_token_id


In [None]:
import torch.nn as nn

def compute_fluency_scores(texts, max_length=256, batch_size=8):
    """
    Returns a list of dicts with:
      - 'nll': average negative log-likelihood per token
      - 'ppl': perplexity
      - 'fluency': 1 / (1 + ppl)  (higher = more fluent)
    """
    scores = []
    loss_fct = nn.CrossEntropyLoss(reduction="none")

    for i in range(0, len(texts), batch_size):
        batch = texts[i:i+batch_size]
        enc = tokenizer(
            batch,
            return_tensors="pt",
            padding=True,
            truncation=True,
            max_length=max_length,
        ).to(device)

        input_ids = enc["input_ids"]
        attn_mask = enc["attention_mask"]

        with torch.no_grad():
            logits = model(input_ids).logits  # (B, T, V)

        # Shift for next-token prediction
        shift_logits = logits[:, :-1, :].contiguous()      # (B, T-1, V)
        shift_labels = input_ids[:, 1:].contiguous()       # (B, T-1)
        shift_mask   = attn_mask[:, 1:].contiguous()       # (B, T-1)

        vocab_size = shift_logits.size(-1)
        loss = loss_fct(
            shift_logits.view(-1, vocab_size),
            shift_labels.view(-1),
        )  # (B*(T-1),)

        loss = loss.view(shift_labels.size())  # (B, T-1)

        # mask out padding
        loss = loss * shift_mask

        # avg NLL per token for each sequence
        token_counts = shift_mask.sum(dim=1)  # (B,)
        seq_nll = loss.sum(dim=1) / token_counts  # (B,)

        seq_ppl = torch.exp(seq_nll)

        for nll_val, ppl_val in zip(seq_nll.cpu().tolist(), seq_ppl.cpu().tolist()):
            fluency = 1.0 / (1.0 + ppl_val)  # squashed to (0,1)
            scores.append({
                "nll": nll_val,
                "ppl": ppl_val,
                "fluency": fluency,
            })

    return scores


In [None]:
import pandas as pd

def add_fluency_column(csv_path, text_col, new_col="fluency_gpt2"):
    df = pd.read_csv(csv_path)

    mask = df[text_col].notna() & df[text_col].astype(str).str.strip().ne("")
    texts = df.loc[mask, text_col].astype(str).tolist()

    score_dicts = compute_fluency_scores(texts)
    fluency_vals = [d["fluency"] for d in score_dicts]

    df.loc[mask, new_col] = fluency_vals

    df.to_csv(csv_path, index=False)
    print(f"Saved {new_col} to {csv_path}. Mean = {df[new_col].mean():.4f}")
    return df

# GPT-4
add_fluency_column(
    "/content/drive/MyDrive/eval_style_pairs_with_outputs_gpt4_with_sim.csv",
    text_col="output_gpt4",
    new_col="fluency_gpt2",
)

# LoRA
add_fluency_column(
    "/content/drive/MyDrive/eval_with_lora_with_sim.csv",
    text_col="output",
    new_col="fluency_gpt2",
)

# Qwen base
add_fluency_column(
    "/content/drive/MyDrive/external_genre_validation_400_with_tragets_qwen_base_with_sim.csv",
    text_col="output_qwen_base",
    new_col="fluency_gpt2",
)


Saved fluency_gpt2 to /content/drive/MyDrive/eval_style_pairs_with_outputs_gpt4_with_sim.csv. Mean = 0.0099
Saved fluency_gpt2 to /content/drive/MyDrive/eval_with_lora_with_sim.csv. Mean = 0.0149
Saved fluency_gpt2 to /content/drive/MyDrive/external_genre_validation_400_with_tragets_qwen_base_with_sim.csv. Mean = 0.0146


Unnamed: 0,source_style,raw_content,target_style,output_qwen_base,content_similarity,rnn_pred_style,rnn_style_match,spacy_entity_retention,fluency_gpt2
0,goth,"She traced the cracks in the ceiling, noticing...",science_fiction,"A sealed trunk sits at the foot of the bed, it...",0.572070,romance,0,0.0,0.016541
1,comedy,The coffee machine hissed like an offended dra...,detective_mystery,As the coffee machine hissed like an angry dra...,0.862334,comedy,0,1.0,0.010867
2,romance,She began to imagine future events with the qu...,comedy,"She started to fantasize about the future, ass...",0.929555,comedy,1,1.0,0.014914
3,fantasy,"The mountain spoke only in storms, and lately ...",detective_mystery,"As the storm clouds gathered outside, the cast...",0.774109,fantasy,0,0.0,0.024728
4,science_fiction,The first true artificial intelligence announc...,goth,"She floated through the corridors of her home,...",0.389416,fantasy,0,0.0,0.036954
...,...,...,...,...,...,...,...,...,...
395,goth,Footsteps echoed in the corridor long after an...,fantasy,"In the dimly lit corridors, the echoes of foot...",0.954172,romance,0,1.0,0.012780
396,detective_mystery,Streetlights cast long shadows across the alle...,goth,The streetlights stretched their long shadows ...,0.917089,detective_mystery,0,0.0,0.024541
397,romance,He practiced what he wanted to say in the refl...,fantasy,"In the shadowed recesses of his mind, he rehea...",0.796026,fantasy,1,1.0,0.013843
398,science_fiction,The newest model of spacecraft navigated not b...,detective_mystery,The latest iteration of interstellar craft emp...,0.884496,science_fiction,0,1.0,0.008045


In [None]:
import pandas as pd

def add_fluency_column(csv_path, text_col, new_col="fluency_gpt2"):
    df = pd.read_csv(csv_path)

    mask = df[text_col].notna() & df[text_col].astype(str).str.strip().ne("")
    texts = df.loc[mask, text_col].astype(str).tolist()

    score_dicts = compute_fluency_scores(texts)
    fluency_vals = [d["fluency"] for d in score_dicts]

    df.loc[mask, new_col] = fluency_vals

    df.to_csv(csv_path, index=False)
    print(f"Saved {new_col} to {csv_path}. Mean = {df[new_col].mean():.4f}")
    return df

# GPT-4
add_fluency_column(
    "/content/drive/MyDrive/test_dataset_mode_transfer_cleaned_with_sim.csv",
    text_col="output_gpt4",
    new_col="fluency_gpt2",
)

# LoRA
add_fluency_column(
    "/content/drive/MyDrive/new_test_with_lora_with_sim.csv",
    text_col="output",
    new_col="fluency_gpt2",
)

# Qwen base
add_fluency_column(
    "/content/drive/MyDrive/test_eval_qwen_base_with_sim.csv",
    text_col="output_qwen_base",
    new_col="fluency_gpt2",
)


Saved fluency_gpt2 to /content/drive/MyDrive/test_dataset_mode_transfer_cleaned_with_sim.csv. Mean = 0.0159
Saved fluency_gpt2 to /content/drive/MyDrive/new_test_with_lora_with_sim.csv. Mean = 0.0226
Saved fluency_gpt2 to /content/drive/MyDrive/test_eval_qwen_base_with_sim.csv. Mean = 0.0223


Unnamed: 0,source_style,target_style,raw_content,output_qwen_base,content_similarity,rnn_pred_style,rnn_style_match,spacy_entity_retention,fluency_gpt2
0,science_fiction,fantasy,"It is upon these banks, and on these waters, s...","In this enchanted land, where old tales whispe...",0.679350,science_fiction,0,0.250000,0.023193
1,comedy,fantasy,The room was large and gloomy. A checquered ma...,In an enchanted forest filled with towering tr...,0.548362,fantasy,1,0.000000,0.021232
2,romance,science_fiction,"These points had been raised, as usual, at Mrs...",In the quaint setting of Mrs. Archer’s Thanksg...,0.947154,romance,0,0.900000,0.029553
3,science_fiction,comedy,“Oh! I could manufacture the air necessary for...,Oh! I can make the air I need by manufacturing...,0.684691,science_fiction,0,0.000000,0.021246
4,fantasy,science_fiction,"‘Dear lady, do not refuse me what I have come ...","'Lady, if you decline what I am determined to ...",0.857637,detective_mystery,0,0.666667,0.011823
...,...,...,...,...,...,...,...,...,...
187,comedy,science_fiction,"""Well, mother, I am sure I don't know. I could...","As always, my dear, I found myself unable to r...",0.735336,comedy,0,0.000000,0.025580
188,science_fiction,comedy,This operation did not last more than four and...,Here's the paragraph rewritten in a comedic st...,0.626005,comedy,1,0.333333,0.037069
189,comedy,goth,There are times when one asks oneself: Why thi...,Why does man labor endlessly? Why do we constr...,0.837635,goth,1,0.500000,0.008038
190,detective_mystery,fantasy,"According to Wolf, the attraction of the plane...","In accordance with Wolf's observations, the gr...",0.899829,detective_mystery,0,1.000000,0.029600


In [3]:
import pandas as pd

def add_overall_score(
    in_path,
    out_path,
    style_col,
    content_col,
    ne_col,
    ppl_col,
    score_col="overall_score"
):
    df = pd.read_csv(in_path)

    df[score_col] = (
        0.3 * df[style_col]
        + 0.3 * df[content_col]
        + 0.3 * df[ne_col]
        + 0.1 * df[ppl_col]
    )

    df.to_csv(out_path, index=False)
    print(f"Saved with {score_col} → {out_path}")
    return df


In [4]:
# 1) GPT-4 eval file
df_gpt4 = add_overall_score(
    in_path="/content/drive/MyDrive/eval_style_pairs_with_outputs_gpt4_with_sim.csv",
    out_path= "/content/drive/MyDrive/eval_style_pairs_with_outputs_gpt4_with_sim.csv",
    style_col="rnn_style_match",
    content_col="content_similarity",
    ne_col="spacy_entity_retention",
    ppl_col="fluency_gpt2",
)

# 2) LoRA eval file
df_lora = add_overall_score(
    in_path="/content/drive/MyDrive/eval_with_lora_with_sim.csv",
    out_path= "/content/drive/MyDrive/eval_with_lora_with_sim.csv",
    style_col="rnn_style_match",
    content_col="content_similarity",
    ne_col="spacy_entity_retention",
    ppl_col="fluency_gpt2",
)

# 3) External genre validation file
df_qwen = add_overall_score(
    in_path="/content/drive/MyDrive/external_genre_validation_400_with_tragets_qwen_base_with_sim.csv",
    out_path= "/content/drive/MyDrive/external_genre_validation_400_with_tragets_qwen_base_with_sim.csv",
    style_col="rnn_style_match",
    content_col="content_similarity",
    ne_col="spacy_entity_retention",
    ppl_col="fluency_gpt2",
)


Saved with overall_score → /content/drive/MyDrive/eval_style_pairs_with_outputs_gpt4_with_sim.csv
Saved with overall_score → /content/drive/MyDrive/eval_with_lora_with_sim.csv
Saved with overall_score → /content/drive/MyDrive/external_genre_validation_400_with_tragets_qwen_base_with_sim.csv


In [6]:
import pandas as pd

paths = {
    "GPT-4": "/content/drive/MyDrive/eval_style_pairs_with_outputs_gpt4_with_sim.csv",
    "LoRA": "/content/drive/MyDrive/eval_with_lora_with_sim.csv",
    "Qwen Base": "/content/drive/MyDrive/external_genre_validation_400_with_tragets_qwen_base_with_sim.csv",
}

means = {}

for name, path in paths.items():
    df = pd.read_csv(path)
    if "overall_score" not in df.columns:
        raise ValueError(f"'overall_score' column missing in {path}")

    means[name] = df["overall_score"].mean()

means


{'GPT-4': np.float64(0.5732362668430927),
 'LoRA': np.float64(0.4582439810003292),
 'Qwen Base': np.float64(0.4621973621052635)}

In [7]:
# 1) GPT-4 eval file
df_gpt4 = add_overall_score(
    in_path="/content/drive/MyDrive/test_dataset_mode_transfer_cleaned_with_sim.csv",
    out_path= "/content/drive/MyDrive/test_dataset_mode_transfer_cleaned_with_sim.csv",
    style_col="rnn_style_match",
    content_col="content_similarity",
    ne_col="spacy_entity_retention",
    ppl_col="fluency_gpt2",
)

# 2) LoRA eval file
df_lora = add_overall_score(
    in_path="/content/drive/MyDrive/new_test_with_lora_with_sim.csv",
    out_path= "/content/drive/MyDrive/new_test_with_lora_with_sim.csv",
    style_col="rnn_style_match",
    content_col="content_similarity",
    ne_col="spacy_entity_retention",
    ppl_col="fluency_gpt2",
)

# 3) External genre validation file
df_qwen = add_overall_score(
    in_path="/content/drive/MyDrive/test_eval_qwen_base_with_sim.csv",
    out_path= "/content/drive/MyDrive/test_eval_qwen_base_with_sim.csv",
    style_col="rnn_style_match",
    content_col="content_similarity",
    ne_col="spacy_entity_retention",
    ppl_col="fluency_gpt2",
)


Saved with overall_score → /content/drive/MyDrive/test_dataset_mode_transfer_cleaned_with_sim.csv
Saved with overall_score → /content/drive/MyDrive/new_test_with_lora_with_sim.csv
Saved with overall_score → /content/drive/MyDrive/test_eval_qwen_base_with_sim.csv


In [8]:
import pandas as pd

paths = {
    "GPT-4": "/content/drive/MyDrive/test_dataset_mode_transfer_cleaned_with_sim.csv",
    "LoRA": "/content/drive/MyDrive/new_test_with_lora_with_sim.csv",
    "Qwen Base": "/content/drive/MyDrive/test_eval_qwen_base_with_sim.csv",
}

means = {}

for name, path in paths.items():
    df = pd.read_csv(path)
    if "overall_score" not in df.columns:
        raise ValueError(f"'overall_score' column missing in {path}")

    means[name] = df["overall_score"].mean()

means


{'GPT-4': np.float64(0.5740624979632089),
 'LoRA': np.float64(0.4863153294939117),
 'Qwen Base': np.float64(0.4826184114983149)}