In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip uninstall -y bitsandbytes
!pip install -U bitsandbytes


Found existing installation: bitsandbytes 0.48.2
Uninstalling bitsandbytes-0.48.2:
  Successfully uninstalled bitsandbytes-0.48.2
Collecting bitsandbytes
  Using cached bitsandbytes-0.48.2-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Using cached bitsandbytes-0.48.2-py3-none-manylinux_2_24_x86_64.whl (59.4 MB)
Installing collected packages: bitsandbytes
Successfully installed bitsandbytes-0.48.2


In [3]:
!pip install -U "transformers" "accelerate"




In [4]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

MODEL_DIR = "/content/drive/MyDrive/qwen25_sft_merged"

tok = AutoTokenizer.from_pretrained(MODEL_DIR, use_fast=True)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_DIR,
    torch_dtype=torch.float16,   # or torch.bfloat16 if supported
    device_map="auto",
)

model.eval()


`torch_dtype` is deprecated! Use `dtype` instead!


Qwen2ForCausalLM(
  (model): Qwen2Model(
    (embed_tokens): Embedding(151936, 1536, padding_idx=151643)
    (layers): ModuleList(
      (0-27): 28 x Qwen2DecoderLayer(
        (self_attn): Qwen2Attention(
          (q_proj): Linear4bit(in_features=1536, out_features=1536, bias=True)
          (k_proj): Linear4bit(in_features=1536, out_features=256, bias=True)
          (v_proj): Linear4bit(in_features=1536, out_features=256, bias=True)
          (o_proj): Linear4bit(in_features=1536, out_features=1536, bias=False)
        )
        (mlp): Qwen2MLP(
          (gate_proj): Linear4bit(in_features=1536, out_features=8960, bias=False)
          (up_proj): Linear4bit(in_features=1536, out_features=8960, bias=False)
          (down_proj): Linear4bit(in_features=8960, out_features=1536, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): Qwen2RMSNorm((1536,), eps=1e-06)
        (post_attention_layernorm): Qwen2RMSNorm((1536,), eps=1e-06)
      )
    )
    (no

In [5]:
def generate_style_transfer(
    src_paragraph,
    source_style,
    target_style,
    temperature=0.7,
    top_p=0.9,
    max_new_tokens=220,
):
    # Build chat messages
    messages = [
        {
            "role": "system",
            "content": "You are a careful literary rewriting assistant that follows instructions exactly."
        },
        {
            "role": "user",
            "content": (
                f"Rewrite the paragraph from **{source_style}** into **{target_style}** style.\n"
                f"Rules: preserve character names and all events; "
                f"change only tone, narration, and imagery.\n\n"
                f"Paragraph:\n{src_paragraph.strip()}"
            ),
        },
    ]

    # Convert to Qwen chat template
    prompt = tok.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
    )

    inputs = tok(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        output = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            temperature=temperature,
            top_p=top_p,
            do_sample=True,
            eos_token_id=tok.eos_token_id,
            pad_token_id=tok.eos_token_id,
        )

    decoded = tok.decode(output[0], skip_special_tokens=True)
    return decoded


In [None]:
src = """
Elena waited beneath the lantern light, her heart racing as she spotted Adrian
walking down the quiet street. The air was cool and smelled faintly of roses.
When he smiled at her, everything else in the world seemed to fade away.
"""

print("=== Romance → Gothic Horror ===")
print(generate_style_transfer(src, "romance", "gothic horror"))


=== Romance → Gothic Horror ===
system
You are a careful literary rewriting assistant that follows instructions exactly.
user
Rewrite the paragraph from **romance** into **gothic horror** style.
Rules: preserve character names and all events; change only tone, narration, and imagery.

Paragraph:
Elena waited beneath the lantern light, her heart racing as she spotted Adrian
walking down the quiet street. The air was cool and smelled faintly of roses.
When he smiled at her, everything else in the world seemed to fade away.
assistant
The night held its breath under the lantern's soft glow, each shadow elongated by the flickering flame. Elena's pulse quickened with anticipation, as if every breath counted towards the moment when her eyes would finally meet those of Adrian. His smile brought life back to the world around them, a warmth that enveloped her like a shroud, washing over her with an ethereal mist.


In [None]:
from google.colab import output
output.no_vertical_scroll()

src = """
The elevator refused to open, so Max slammed the button fifteen more times.
Behind him someone coughed in irritation, and Max flashed an apologetic grin.
It had been one of those days—lost keys, spilled coffee, and now an elevator
that hated him personally.
"""

print("=== Comedy → Detective Mystery ===")
print(generate_style_transfer(src, "comedy", "detective mystery"))


=== Comedy → Detective Mystery ===
system
You are a careful literary rewriting assistant that follows instructions exactly.
user
Rewrite the paragraph from **comedy** into **detective mystery** style.
Rules: preserve character names and all events; change only tone, narration, and imagery.

Paragraph:
The elevator refused to open, so Max slammed the button fifteen more times.
Behind him someone coughed in irritation, and Max flashed an apologetic grin.
It had been one of those days—lost keys, spilled coffee, and now an elevator
that hated him personally.
assistant
Intriguing! The elevator remained stubbornly closed, refusing its rightful occupant with each frustrated push of the button. Behind him, a voice rose in exasperation as he continued his futile attempts. "Max, let me guess... you're experiencing a particularly challenging day? Lost keys, spilled coffee, and now this infuriating elevator that seems determined to give you a run for your money." Max's face softened slightly as he

In [None]:
from google.colab import output
output.no_vertical_scroll()

src = """
The crystal tower shimmered under the twin moons, humming softly with ancient magic.
Aelin tightened her grip on the sword at her hip and glanced at the glowing runes
carved along the path. Tonight, the gate would open, changing the kingdom forever.
"""

print("=== Fantasy → Science Fiction ===")
print(generate_style_transfer(src, "fantasy", "science fiction"))


=== Fantasy → Science Fiction ===
system
You are a careful literary rewriting assistant that follows instructions exactly.
user
Rewrite the paragraph from **fantasy** into **science fiction** style.
Rules: preserve character names and all events; change only tone, narration, and imagery.

Paragraph:
The crystal tower shimmered under the twin moons, humming softly with ancient magic.
Aelin tightened her grip on the sword at her hip and glanced at the glowing runes
carved along the path. Tonight, the gate would open, changing the kingdom forever.
assistant
In the twilight of the universe, the crystalline spire glowed dimly beneath twin moons, its surface resonating with arcane energy.
Aelin's fingers gripped the hilt of her sword tightly as she surveyed the luminous carvings etched upon the pathway. Tonight, the gateway would be unveiled, ushering profound transformation across the realm.


In [None]:
import os
import pandas as pd
from tqdm.auto import tqdm
import numpy as np

INPUT_PATH = "/content/drive/MyDrive/external_genre_validation_400_with_targets.csv"  # has source_style, raw_content, target_style
OUTPUT_PATH = "/content/drive/MyDrive/eval_with_lora.csv"              # where we will save with 'output' column
SAVE_EVERY = 10  # save every N generations

# 1. Load data (resume if OUTPUT_PATH already exists)
if os.path.exists(OUTPUT_PATH):
    print(f"Loading existing file to resume: {OUTPUT_PATH}")
    df = pd.read_csv(OUTPUT_PATH)
else:
    print(f"Loading original eval file: {INPUT_PATH}")
    df = pd.read_csv(INPUT_PATH)
    # initialize empty output column
    if "output" not in df.columns:
        df["output"] = ""

print("Total rows:", len(df))

# 2. Helper to decide if a row is already done
def is_done(val):
    if val is None:
        return False
    if isinstance(val, float) and np.isnan(val):
        return False
    return str(val).strip() != ""

# 3. Loop over rows and generate only for missing outputs
num_generated = 0

for idx, row in tqdm(df.iterrows(), total=len(df)):
    if is_done(row.get("output", "")):
        # already generated - skip
        continue

    src_paragraph = str(row["raw_content"])
    source_style = str(row["source_style"])
    target_style = str(row["target_style"])

    try:
        rewritten = generate_style_transfer(
            src_paragraph=src_paragraph,
            source_style=source_style,
            target_style=target_style,
            temperature=0.7,
            top_p=0.9,
            max_new_tokens=220,
        )
    except Exception as e:
        print(f"Error at index {idx}: {e}")
        df.at[idx, "output"] = f"[ERROR: {e}]"
        continue

    # ✅ actually store the output in the dataframe
    df.at[idx, "output"] = rewritten
    num_generated += 1  # ✅ increment counter

    # 4. Periodic saving for safety
    if num_generated > 0 and num_generated % SAVE_EVERY == 0:
        df.to_csv(OUTPUT_PATH, index=False)
        print(f"Saved progress after {num_generated} new generations.")

# 5. Final save
df.to_csv(OUTPUT_PATH, index=False)
print(f"Done! Total new generations this run: {num_generated}")
print(f"Saved to: {OUTPUT_PATH}")


Loading original eval file: /content/drive/MyDrive/external_genre_validation_400_with_targets.csv
Total rows: 400


  0%|          | 0/400 [00:00<?, ?it/s]

Saved progress after 10 new generations.
Saved progress after 20 new generations.
Saved progress after 30 new generations.
Saved progress after 40 new generations.
Saved progress after 50 new generations.
Saved progress after 60 new generations.
Saved progress after 70 new generations.
Saved progress after 80 new generations.
Saved progress after 90 new generations.
Saved progress after 100 new generations.
Saved progress after 110 new generations.
Saved progress after 120 new generations.
Saved progress after 130 new generations.
Saved progress after 140 new generations.
Saved progress after 150 new generations.
Saved progress after 160 new generations.
Saved progress after 170 new generations.
Saved progress after 180 new generations.
Saved progress after 190 new generations.
Saved progress after 200 new generations.
Saved progress after 210 new generations.
Saved progress after 220 new generations.
Saved progress after 230 new generations.
Saved progress after 240 new generations.
S

In [8]:
INPUT_PATH = "/content/drive/MyDrive/test_df_transfer_clean.csv"
df = pd.read_csv(INPUT_PATH)
#add column output to df
df["output"] = ""
df.to_csv(INPUT_PATH, index=False)

GENERATING FOR TEST SET

In [10]:
import os
import pandas as pd
from tqdm.auto import tqdm
import numpy as np

INPUT_PATH = "/content/drive/MyDrive/test_df_transfer_clean.csv"  # has source_style, raw_content, target_style
OUTPUT_PATH = "/content/drive/MyDrive/new_test_with_lora.csv"              # where we will save with 'output' column
SAVE_EVERY = 10  # save every N generations

# 1. Load data (resume if OUTPUT_PATH already exists)
if os.path.exists(OUTPUT_PATH):
    print(f"Loading existing file to resume: {OUTPUT_PATH}")
    df = pd.read_csv(OUTPUT_PATH)
else:
    print(f"Loading original eval file: {INPUT_PATH}")
    df = pd.read_csv(INPUT_PATH)
    # initialize empty output column
    if "output" not in df.columns:
        df["output"] = ""

print("Total rows:", len(df))

# 2. Helper to decide if a row is already done
def is_done(val):
    if val is None:
        return False
    if isinstance(val, float) and np.isnan(val):
        return False
    return str(val).strip() != ""

# 3. Loop over rows and generate only for missing outputs
num_generated = 0

for idx, row in tqdm(df.iterrows(), total=len(df)):
    if is_done(row.get("output", "")):
        # already generated - skip
        continue

    src_paragraph = str(row["raw_content"])
    source_style = str(row["source_style"])
    target_style = str(row["target_style"])

    try:
        rewritten = generate_style_transfer(
            src_paragraph=src_paragraph,
            source_style=source_style,
            target_style=target_style,
            temperature=0.7,
            top_p=0.9,
            max_new_tokens=220,
        )
    except Exception as e:
        print(f"Error at index {idx}: {e}")
        df.at[idx, "output"] = f"[ERROR: {e}]"
        continue

    # ✅ actually store the output in the dataframe
    df.at[idx, "output"] = rewritten
    num_generated += 1  # ✅ increment counter

    # 4. Periodic saving for safety
    if num_generated > 0 and num_generated % SAVE_EVERY == 0:
        df.to_csv(OUTPUT_PATH, index=False)
        print(f"Saved progress after {num_generated} new generations.")

# 5. Final save
df.to_csv(OUTPUT_PATH, index=False)
print(f"Done! Total new generations this run: {num_generated}")
print(f"Saved to: {OUTPUT_PATH}")


Loading original eval file: /content/drive/MyDrive/test_df_transfer_clean.csv
Total rows: 192


  0%|          | 0/192 [00:00<?, ?it/s]

You are a careful literary rewriting assistant that follows instructions exactly.
user
Rewrite the paragraph from **science_fiction** into **fantasy** style.
Rules: preserve character names and all events; change only tone, narration, and imagery.

Paragraph:
It is upon these banks, and on these waters, says Michelet, that man is renewed in one of the most powerful climates of the globe. But, beautiful as it was, I could only take a rapid glance at the basin whose superficial area is two million of square yards. Even Captain Nemo’s knowledge was lost to me, for this puzzling person did not appear once during our passage at full speed. I estimated the course which the _Nautilus_ took under the waves of the sea at about six hundred leagues, and it was accomplished in forty-eight hours. Starting on the morning of the 16th of February from the shores of Greece, we had crossed the Straits of Gibraltar by sunrise on the 18th.
assistant
As per the ancient lore set forth by Michelet, these ter

Saved progress after 10 new generations.
Saved progress after 20 new generations.
Saved progress after 30 new generations.
Saved progress after 40 new generations.
Saved progress after 50 new generations.
Saved progress after 60 new generations.
Saved progress after 70 new generations.
Saved progress after 80 new generations.
Saved progress after 90 new generations.
Saved progress after 100 new generations.
Saved progress after 110 new generations.
Saved progress after 120 new generations.
Saved progress after 130 new generations.
Saved progress after 140 new generations.
Saved progress after 150 new generations.
Saved progress after 160 new generations.
Saved progress after 170 new generations.
Saved progress after 180 new generations.
Saved progress after 190 new generations.
Done! Total new generations this run: 192
Saved to: /content/drive/MyDrive/new_test_with_lora.csv
