In [None]:
# Cell 1: Setup and Installations

!pip install transformers datasets torch huggingface_hub



In [None]:

# Cell 2: Log In to Hugging Face

from huggingface_hub import notebook_login

# This will display a widget to enter your Hugging Face access token.
# You can get a token from your HF profile -> Settings -> Access Tokens.
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

# **Importing the Fine-Tuned Model**

In [None]:


import zipfile
import os
from google.colab import drive

# Mount Google Drive
print("Mounting Google Drive...")
drive.mount('/content/drive')

# --- IMPORTANT: UPDATE THIS PATH ---
# This path should point to your model's ZIP file in Google Drive.
drive_zip_path = '/content/drive/MyDrive/gpt2-poetry-finetuned_v2.zip'
extract_dir = './my_finetuned_model_v2' # This is a local folder in Colab

print(f"\nLooking for your model at: {drive_zip_path}")
if os.path.exists(drive_zip_path):
    print("Model found in Google Drive. Unzipping...")
    with zipfile.ZipFile(drive_zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_dir)
    print(f"✅ Model unzipped successfully into the '{extract_dir}' folder.")
else:
    print(f"❌ Error: The file '{os.path.basename(drive_zip_path)}' was not found in your Google Drive.")
    print("Please make sure the file is in your 'My Drive' folder and the name matches exactly.")

Mounting Google Drive...
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

Looking for your model at: /content/drive/MyDrive/gpt2-poetry-finetuned_v2.zip
Model found in Google Drive. Unzipping...
✅ Model unzipped successfully into the './my_finetuned_model_v2' folder.


# **The Poetry Generator Class**

In [None]:


import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel

class PoetryGenerator:
    """
    A wrapper for your fine-tuned GPT-2 model to generate poetic lines.
    """
    # This now points to the local folder where your fine-tuned model was unzipped.
    def __init__(self, model_name="./my_finetuned_model"):
        print(f"Loading your custom fine-tuned model from '{model_name}'...")
        self.tokenizer = GPT2Tokenizer.from_pretrained(model_name)
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token

        self.model = GPT2LMHeadModel.from_pretrained(model_name)

        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(self.device)
        print(f"✅ Custom model loaded successfully on device: {self.device}")

    def generate_candidates(self, prompt, num_candidates=5, max_length=20, **kwargs):
        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
        outputs = self.model.generate(
            **inputs,
            max_new_tokens=max_length,
            num_return_sequences=num_candidates,
            do_sample=True,
            top_k=50,
            top_p=0.95,
            pad_token_id=self.tokenizer.pad_token_id,
            **kwargs
        )
        generated_texts = [
            self.tokenizer.decode(output, skip_special_tokens=True)[len(prompt):].strip()
            for output in outputs
        ]
        candidates = [text for text in generated_texts if text]
        return candidates

print("✅ PoetryGenerator class defined.")

✅ PoetryGenerator class defined.


# **The RLHF Reward Model Class**

In [None]:


import torch.nn as nn
import torch.optim as optim
from transformers import DistilBertTokenizer, DistilBertModel

class RewardModel(nn.Module):
    def __init__(self, model_name="distilbert-base-uncased"):
        super().__init__()
        self.bert_base = DistilBertModel.from_pretrained(model_name)
        self.score_head = nn.Linear(self.bert_base.config.dim, 1)
    def forward(self, input_ids, attention_mask):
        outputs = self.bert_base(input_ids=input_ids, attention_mask=attention_mask)
        cls_output = outputs.last_hidden_state[:, 0]
        score = self.score_head(cls_output)
        return score

class RewardModelTrainer:
    def __init__(self):
        self.tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
        self.model = RewardModel()
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(self.device)
        self.optimizer = optim.Adam(self.model.parameters(), lr=1e-5)
        self.loss_function = nn.MSELoss() # Using Mean Squared Error for regression

    def train_on_ratings(self, rated_lines, epochs=5):
        if not rated_lines: return
        print(f"\n--- Training Reward Model on {len(rated_lines)} ratings ---")
        self.model.train()
        for epoch in range(epochs):
            total_loss = 0
            for text, user_score in rated_lines:
                inputs = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True).to(self.device)
                target_score = torch.tensor([[float(user_score)]], device=self.device)
                predicted_score = self.model(**inputs)
                loss = self.loss_function(predicted_score, target_score)
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()
                total_loss += loss.item()
            print(f"Epoch {epoch+1}/{epochs}, Average Loss: {total_loss / len(rated_lines):.4f}")

print("✅ RewardModel and RewardModelTrainer classes defined.")

✅ RewardModel and RewardModelTrainer classes defined.


# **Main interactive application**

In [None]:
# The final cell for your interactive application

import os
import torch

# --- Helper functions for saving/loading the REWARD model profile (unchanged) ---
def save_user_profile(user_id, trainer):
    profile_dir = f"profiles/{user_id}"
    os.makedirs(profile_dir, exist_ok=True)
    torch.save(trainer.model.state_dict(), f"{profile_dir}/reward_model.pt")
    print(f"\n[+] Profile for '{user_id}' saved successfully.")

def load_user_profile(user_id, trainer):
    model_path = f"profiles/{user_id}/reward_model.pt"
    if os.path.exists(model_path):
        trainer.model.load_state_dict(torch.load(model_path, map_location=trainer.device))
        print(f"[+] Welcome back, {user_id}! Your profile has been loaded.")
    else:
        print(f"[+] Welcome, {user_id}! Creating a new profile for you.")
    return trainer

# --- Main Application Logic (Fully Updated) ---
def main_session():
    print("--- Reciprocal Poetry Generation ---")
    user_id = input("Please enter your name to begin: ")

    # Initialization
    generator = PoetryGenerator()
    reward_trainer = RewardModelTrainer()
    reward_trainer = load_user_profile(user_id, reward_trainer)

    theme = input("\nEnter the theme for your poem:\n> ")
    start_line = input("\nEnter the first line of the poem:\n> ")
    poem_so_far = [start_line]
    session_feedback_buffer = []

    # The Creative Loop
    while True:
        print("\n" + "="*60)
        print("POEM SO FAR:")
        for line in poem_so_far: print(f"  {line}")
        print("="*60)

        context_lines = poem_so_far[-2:]
        prompt = f"Theme: {theme}\nPoem: ...{' '.join(context_lines)}"

        print(f"\n[AI is generating 5 options based on the context]...")
        candidates = generator.generate_candidates(prompt, num_candidates=5, temperature=0.9)

        # --- SAFEGUARD: Filter out any blank lines ---
        candidates = [line for line in candidates if line.strip()]

        if not candidates:
            print("[AI had trouble generating. Please write your own line.]")
            chosen_line = input("> ").strip()
            session_feedback_buffer.append((chosen_line, 5))
            poem_so_far.append(chosen_line)
            continue

        print("\nPlease rate each of the following lines from 0 (bad) to 5 (excellent):")
        rated_candidates = []
        for i, line in enumerate(candidates):
            while True:
                try:
                    rating = int(input(f"  {i+1}: {line}\n  Your rating (0-5): "))
                    if 0 <= rating <= 5:
                        rated_candidates.append({"text": line, "rating": rating})
                        session_feedback_buffer.append((line, rating))
                        break
                    else: print("Invalid rating. Please enter a number between 0 and 5.")
                except ValueError: print("Invalid input. Please enter a number.")

        valid_candidates = [c for c in rated_candidates if c["rating"] > 0]

        if not valid_candidates:
            print("\nAll lines were pruned. Please write your own line to continue.")
            chosen_line = input("> ").strip()
            session_feedback_buffer.append((chosen_line, 5))
            poem_so_far.append(chosen_line)
            continue

        # --- NEW LOGIC: Handle ties and ask for confirmation ---
        highest_rating = max(c["rating"] for c in valid_candidates)
        tied_highest = [c for c in valid_candidates if c["rating"] == highest_rating]

        highest_rated_line = ""
        if len(tied_highest) > 1:
            print(f"\nThere's a tie for the highest rating ({highest_rating}). Please choose one:")
            for i, candidate in enumerate(tied_highest):
                print(f"  {i+1}: {candidate['text']}")
            while True:
                try:
                    choice_idx = int(input("Your choice: ")) - 1
                    if 0 <= choice_idx < len(tied_highest):
                        highest_rated_line = tied_highest[choice_idx]['text']
                        break
                    else:
                        print("Invalid number.")
                except ValueError:
                    print("Please enter a number.")
        else:
            highest_rated_line = tied_highest[0]['text']

        print(f"\nThe highest-rated line is: '{highest_rated_line}'")

        # Ask for confirmation
        keep_choice = input("Do you want to keep this line? (yes/no): ").lower()
        if keep_choice == 'yes':
            print(f"\nAdding to the poem.")
            poem_so_far.append(highest_rated_line)
        else:
            print("\nSuggestion rejected.")
            inject_or_quit = input("Would you like to (i)nject your own line or (q)uit?: ").lower()
            if inject_or_quit == 'i':
                injected_line = input("Your line: ").strip()
                poem_so_far.append(injected_line)
                # This is strong feedback: your line was better than the AI's best
                session_feedback_buffer.append((injected_line, 5))
                session_feedback_buffer.append((highest_rated_line, 1)) # Give the rejected line a low score
            else:
                break # Quit the main loop

    # End of Session
    print("\n--- Session Over ---")
    if session_feedback_buffer:
        reward_trainer.train_on_ratings(session_feedback_buffer)
        save_user_profile(user_id, reward_trainer)
    else: print("No new feedback was provided. Profile not updated.")
    print("Thank you for creating poetry with me!")

# --- Start the application ---
main_session()

--- Reciprocal Poetry Generation ---
Loading your custom fine-tuned model from './my_finetuned_model'...
✅ Custom model loaded successfully on device: cpu
[+] Welcome, qwerty! Creating a new profile for you.

POEM SO FAR:
  twinkle twinkle little star

[AI is generating 5 options based on the context]...

Please rate each of the following lines from 0 (bad) to 5 (excellent):

The highest-rated line is: 'Hints, whirls, glints, whirls, glow
Of tiny stars'

Suggestion rejected.

POEM SO FAR:
  twinkle twinkle little star
  how i wonder what you are

[AI is generating 5 options based on the context]...

Please rate each of the following lines from 0 (bad) to 5 (excellent):

All lines were pruned. Please write your own line to continue.

POEM SO FAR:
  twinkle twinkle little star
  how i wonder what you are
  up above the world so high

[AI is generating 5 options based on the context]...

Please rate each of the following lines from 0 (bad) to 5 (excellent):

All lines were pruned. Please 