In [1]:
import torch
import pandas as pd
import numpy as np
from transformers import GPT2Tokenizer, GPT2LMHeadModel, get_linear_schedule_with_warmup
from torch.optim import AdamW

from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
import nltk 
import joblib
import os
import re
from tqdm import tqdm

2025-10-07 14:11:52.196460: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1759846312.445253      37 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1759846312.509935      37 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
MODEL_NAME = 'gpt2'

USER_A_TOKEN = "<|user_a|>"
USER_B_TOKEN = "<|user_b|>"

In [3]:
DATA_PATH = '/Desktop/Dataset/conversationfile.xlsx'

In [4]:
CONTEXT_WINDOW = 12

EPOCHS = 100
LEARNING_RATE = 5e-5
BATCH_SIZE = 4
MAX_LEN = 256

In [5]:
OUTPUT_DIR = './submission_folder/'
MODEL_SAVE_PATH = os.path.join(OUTPUT_DIR, 'Model.joblib')

os.makedirs(OUTPUT_DIR, exist_ok=True)

In [6]:
def load_and_prepare_data(file_path):
    try:
        df = pd.read_excel(file_path)
        
        df.columns = [col.strip() for col in df.columns]
        
        required_cols = ['Conversation ID', 'Timestamp', 'Sender', 'Message']
        for col in required_cols:
            if col not in df.columns:
                raise ValueError(f"Missing required column: {col}")
        
        # Clean data: remove rows with empty messages and strip whitespace
        df.dropna(subset=['Message'], inplace=True)
        df['Message'] = df['Message'].astype(str).str.strip()
        df['Sender'] = df['Sender'].str.strip()
        
        # Sort values to ensure chronological order within each conversation
        df.sort_values(by=['Conversation ID', 'Timestamp'], inplace=True)
        
        print("Data loaded and prepared successfully.")
        return df.reset_index(drop=True)
        
    except FileNotFoundError:
        print(f"Error: The file was not found at {file_path}")
        print("Please ensure the DATA_PATH in Section 2 is correct.")
        return None
    except Exception as e:
        print(f"An error occurred while loading data: {e}")
        return None

In [7]:
def create_training_examples(df, tokenizer, context_window):
    training_texts = []
    for convo_id, group in df.groupby('Conversation ID'):
        messages = group['Message'].tolist()
        senders = group['Sender'].tolist()
        
        for i in range(1, len(messages)):
            if senders[i] == 'User A' and senders[i-1] == 'User B':
                # Context is the preceding messages in the *same conversation*.
                start_index = max(0, i - context_window)
                context_slice = list(zip(senders[start_index:i], messages[start_index:i]))
                
                prompt = ""
                for sender, message in context_slice:
                    sender_token = USER_A_TOKEN if sender == 'User A' else USER_B_TOKEN
                    prompt += f"{sender_token} {message} "
                
                # The target reply is the current message from User A.
                reply = f"{USER_A_TOKEN} {messages[i]}"
                
                # Combine into a single string for the language model.
                training_texts.append(f"{prompt.strip()} {reply.strip()} {tokenizer.eos_token}")

    print(f"Created {len(training_texts)} training examples.")
    return training_texts

In [8]:
print("--- Starting Data Processing ---")
conversation_df = load_and_prepare_data(DATA_PATH)

if conversation_df is None:
    raise SystemExit("Stopping execution due to data loading failure.")

--- Starting Data Processing ---
Data loaded and prepared successfully.


In [9]:
tokenizer = GPT2Tokenizer.from_pretrained(MODEL_NAME)
model = GPT2LMHeadModel.from_pretrained(MODEL_NAME)

In [10]:
special_tokens_dict = {'additional_special_tokens': [USER_A_TOKEN, USER_B_TOKEN], 'pad_token': '<|pad|>'}
tokenizer.add_special_tokens(special_tokens_dict)
model.resize_token_embeddings(len(tokenizer))
print("Tokenizer and Model loaded and configured.")

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


Tokenizer and Model loaded and configured.


In [11]:
training_data = create_training_examples(conversation_df, tokenizer, CONTEXT_WINDOW)
if not training_data:
    raise ValueError("No valid training examples could be created. Please check the dataset for conversations where User A replies to User B.")

Created 9 training examples.


In [12]:
train_texts, val_texts = train_test_split(training_data, test_size=0.1, random_state=42)

In [13]:
class ChatDataset(Dataset):
    def __init__(self, texts, tokenizer, max_length):
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.examples = []
        for text in tqdm(texts, desc="Tokenizing data"):
            tokenized_output = self.tokenizer(
                text,
                truncation=True,
                max_length=self.max_length,
                padding="max_length",
                return_tensors="pt"
            )
            self.examples.append(tokenized_output)

    def __len__(self):
        return len(self.examples)

    def __getitem__(self, idx):
        item = self.examples[idx]
        # For a Causal Language Model like GPT-2, labels are the same as input_ids.
        return {
            "input_ids": item['input_ids'].flatten(),
            "attention_mask": item['attention_mask'].flatten(),
            "labels": item['input_ids'].flatten() 
        }

print("\n--- Creating Datasets and Dataloaders ---")
train_dataset = ChatDataset(train_texts, tokenizer, MAX_LEN)
val_dataset = ChatDataset(val_texts, tokenizer, MAX_LEN)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)
print("DataLoaders are ready.")


--- Creating Datasets and Dataloaders ---


Tokenizing data: 100%|██████████| 8/8 [00:00<00:00, 761.41it/s]
Tokenizing data: 100%|██████████| 1/1 [00:00<00:00, 791.08it/s]

DataLoaders are ready.





In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print(f"\nModel moved to device: {device}")

optimizer = AdamW(model.parameters(), lr=LEARNING_RATE)
total_steps = len(train_loader) * EPOCHS
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)


Model moved to device: cuda


In [15]:
print("--- Starting Model Fine-Tuning ---")
for epoch in range(EPOCHS):
    print(f"\n--- Epoch {epoch + 1}/{EPOCHS} ---")
    model.train()
    total_train_loss = 0
    for batch in tqdm(train_loader, desc=f"Training Epoch {epoch+1}"):
        model.zero_grad()
        
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model(
            input_ids,
            attention_mask=attention_mask,
            labels=labels
        )
        
        loss = outputs.loss
        total_train_loss += loss.item()
        loss.backward()
        optimizer.step()
        scheduler.step()
        
    avg_train_loss = total_train_loss / len(train_loader)
    print(f"Average Training Loss for Epoch {epoch+1}: {avg_train_loss:.4f}")

print("\nFine-tuning complete.")

--- Starting Model Fine-Tuning ---

--- Epoch 1/100 ---


Training Epoch 1:   0%|          | 0/2 [00:00<?, ?it/s]`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.
Training Epoch 1: 100%|██████████| 2/2 [00:01<00:00,  1.88it/s]


Average Training Loss for Epoch 1: 10.7743

--- Epoch 2/100 ---


Training Epoch 2: 100%|██████████| 2/2 [00:00<00:00,  4.94it/s]


Average Training Loss for Epoch 2: 9.9142

--- Epoch 3/100 ---


Training Epoch 3: 100%|██████████| 2/2 [00:00<00:00,  4.96it/s]


Average Training Loss for Epoch 3: 9.5998

--- Epoch 4/100 ---


Training Epoch 4: 100%|██████████| 2/2 [00:00<00:00,  4.95it/s]


Average Training Loss for Epoch 4: 9.3946

--- Epoch 5/100 ---


Training Epoch 5: 100%|██████████| 2/2 [00:00<00:00,  4.94it/s]


Average Training Loss for Epoch 5: 9.1977

--- Epoch 6/100 ---


Training Epoch 6: 100%|██████████| 2/2 [00:00<00:00,  4.94it/s]


Average Training Loss for Epoch 6: 9.0104

--- Epoch 7/100 ---


Training Epoch 7: 100%|██████████| 2/2 [00:00<00:00,  4.97it/s]


Average Training Loss for Epoch 7: 8.8758

--- Epoch 8/100 ---


Training Epoch 8: 100%|██████████| 2/2 [00:00<00:00,  4.95it/s]


Average Training Loss for Epoch 8: 8.7036

--- Epoch 9/100 ---


Training Epoch 9: 100%|██████████| 2/2 [00:00<00:00,  4.94it/s]


Average Training Loss for Epoch 9: 8.5469

--- Epoch 10/100 ---


Training Epoch 10: 100%|██████████| 2/2 [00:00<00:00,  4.96it/s]


Average Training Loss for Epoch 10: 8.3707

--- Epoch 11/100 ---


Training Epoch 11: 100%|██████████| 2/2 [00:00<00:00,  4.95it/s]


Average Training Loss for Epoch 11: 8.2020

--- Epoch 12/100 ---


Training Epoch 12: 100%|██████████| 2/2 [00:00<00:00,  4.95it/s]


Average Training Loss for Epoch 12: 7.9901

--- Epoch 13/100 ---


Training Epoch 13: 100%|██████████| 2/2 [00:00<00:00,  4.93it/s]


Average Training Loss for Epoch 13: 7.7227

--- Epoch 14/100 ---


Training Epoch 14: 100%|██████████| 2/2 [00:00<00:00,  4.96it/s]


Average Training Loss for Epoch 14: 7.4464

--- Epoch 15/100 ---


Training Epoch 15: 100%|██████████| 2/2 [00:00<00:00,  4.93it/s]


Average Training Loss for Epoch 15: 7.2032

--- Epoch 16/100 ---


Training Epoch 16: 100%|██████████| 2/2 [00:00<00:00,  4.95it/s]


Average Training Loss for Epoch 16: 6.9916

--- Epoch 17/100 ---


Training Epoch 17: 100%|██████████| 2/2 [00:00<00:00,  4.95it/s]


Average Training Loss for Epoch 17: 6.7734

--- Epoch 18/100 ---


Training Epoch 18: 100%|██████████| 2/2 [00:00<00:00,  4.94it/s]


Average Training Loss for Epoch 18: 6.5679

--- Epoch 19/100 ---


Training Epoch 19: 100%|██████████| 2/2 [00:00<00:00,  4.96it/s]


Average Training Loss for Epoch 19: 6.3773

--- Epoch 20/100 ---


Training Epoch 20: 100%|██████████| 2/2 [00:00<00:00,  4.95it/s]


Average Training Loss for Epoch 20: 6.1777

--- Epoch 21/100 ---


Training Epoch 21: 100%|██████████| 2/2 [00:00<00:00,  4.97it/s]


Average Training Loss for Epoch 21: 5.9898

--- Epoch 22/100 ---


Training Epoch 22: 100%|██████████| 2/2 [00:00<00:00,  4.93it/s]


Average Training Loss for Epoch 22: 5.7886

--- Epoch 23/100 ---


Training Epoch 23: 100%|██████████| 2/2 [00:00<00:00,  4.93it/s]


Average Training Loss for Epoch 23: 5.6153

--- Epoch 24/100 ---


Training Epoch 24: 100%|██████████| 2/2 [00:00<00:00,  4.96it/s]


Average Training Loss for Epoch 24: 5.4268

--- Epoch 25/100 ---


Training Epoch 25: 100%|██████████| 2/2 [00:00<00:00,  4.94it/s]


Average Training Loss for Epoch 25: 5.2637

--- Epoch 26/100 ---


Training Epoch 26: 100%|██████████| 2/2 [00:00<00:00,  4.97it/s]


Average Training Loss for Epoch 26: 5.0533

--- Epoch 27/100 ---


Training Epoch 27: 100%|██████████| 2/2 [00:00<00:00,  4.92it/s]


Average Training Loss for Epoch 27: 4.8749

--- Epoch 28/100 ---


Training Epoch 28: 100%|██████████| 2/2 [00:00<00:00,  4.95it/s]


Average Training Loss for Epoch 28: 4.6960

--- Epoch 29/100 ---


Training Epoch 29: 100%|██████████| 2/2 [00:00<00:00,  4.96it/s]


Average Training Loss for Epoch 29: 4.5084

--- Epoch 30/100 ---


Training Epoch 30: 100%|██████████| 2/2 [00:00<00:00,  4.91it/s]


Average Training Loss for Epoch 30: 4.3277

--- Epoch 31/100 ---


Training Epoch 31: 100%|██████████| 2/2 [00:00<00:00,  5.01it/s]


Average Training Loss for Epoch 31: 4.1351

--- Epoch 32/100 ---


Training Epoch 32: 100%|██████████| 2/2 [00:00<00:00,  4.96it/s]


Average Training Loss for Epoch 32: 3.9588

--- Epoch 33/100 ---


Training Epoch 33: 100%|██████████| 2/2 [00:00<00:00,  4.96it/s]


Average Training Loss for Epoch 33: 3.7674

--- Epoch 34/100 ---


Training Epoch 34: 100%|██████████| 2/2 [00:00<00:00,  4.91it/s]


Average Training Loss for Epoch 34: 3.5912

--- Epoch 35/100 ---


Training Epoch 35: 100%|██████████| 2/2 [00:00<00:00,  4.96it/s]


Average Training Loss for Epoch 35: 3.4160

--- Epoch 36/100 ---


Training Epoch 36: 100%|██████████| 2/2 [00:00<00:00,  4.95it/s]


Average Training Loss for Epoch 36: 3.2384

--- Epoch 37/100 ---


Training Epoch 37: 100%|██████████| 2/2 [00:00<00:00,  4.94it/s]


Average Training Loss for Epoch 37: 3.0719

--- Epoch 38/100 ---


Training Epoch 38: 100%|██████████| 2/2 [00:00<00:00,  4.96it/s]


Average Training Loss for Epoch 38: 2.8788

--- Epoch 39/100 ---


Training Epoch 39: 100%|██████████| 2/2 [00:00<00:00,  4.97it/s]


Average Training Loss for Epoch 39: 2.7310

--- Epoch 40/100 ---


Training Epoch 40: 100%|██████████| 2/2 [00:00<00:00,  4.96it/s]


Average Training Loss for Epoch 40: 2.5564

--- Epoch 41/100 ---


Training Epoch 41: 100%|██████████| 2/2 [00:00<00:00,  4.97it/s]


Average Training Loss for Epoch 41: 2.3916

--- Epoch 42/100 ---


Training Epoch 42: 100%|██████████| 2/2 [00:00<00:00,  4.95it/s]


Average Training Loss for Epoch 42: 2.2247

--- Epoch 43/100 ---


Training Epoch 43: 100%|██████████| 2/2 [00:00<00:00,  4.94it/s]


Average Training Loss for Epoch 43: 2.0658

--- Epoch 44/100 ---


Training Epoch 44: 100%|██████████| 2/2 [00:00<00:00,  4.95it/s]


Average Training Loss for Epoch 44: 1.9170

--- Epoch 45/100 ---


Training Epoch 45: 100%|██████████| 2/2 [00:00<00:00,  4.93it/s]


Average Training Loss for Epoch 45: 1.7670

--- Epoch 46/100 ---


Training Epoch 46: 100%|██████████| 2/2 [00:00<00:00,  4.96it/s]


Average Training Loss for Epoch 46: 1.6223

--- Epoch 47/100 ---


Training Epoch 47: 100%|██████████| 2/2 [00:00<00:00,  4.95it/s]


Average Training Loss for Epoch 47: 1.5078

--- Epoch 48/100 ---


Training Epoch 48: 100%|██████████| 2/2 [00:00<00:00,  4.93it/s]


Average Training Loss for Epoch 48: 1.3748

--- Epoch 49/100 ---


Training Epoch 49: 100%|██████████| 2/2 [00:00<00:00,  4.96it/s]


Average Training Loss for Epoch 49: 1.2543

--- Epoch 50/100 ---


Training Epoch 50: 100%|██████████| 2/2 [00:00<00:00,  4.96it/s]


Average Training Loss for Epoch 50: 1.1463

--- Epoch 51/100 ---


Training Epoch 51: 100%|██████████| 2/2 [00:00<00:00,  4.94it/s]


Average Training Loss for Epoch 51: 1.0521

--- Epoch 52/100 ---


Training Epoch 52: 100%|██████████| 2/2 [00:00<00:00,  4.94it/s]


Average Training Loss for Epoch 52: 0.9767

--- Epoch 53/100 ---


Training Epoch 53: 100%|██████████| 2/2 [00:00<00:00,  4.96it/s]


Average Training Loss for Epoch 53: 0.8782

--- Epoch 54/100 ---


Training Epoch 54: 100%|██████████| 2/2 [00:00<00:00,  4.95it/s]


Average Training Loss for Epoch 54: 0.8112

--- Epoch 55/100 ---


Training Epoch 55: 100%|██████████| 2/2 [00:00<00:00,  4.94it/s]


Average Training Loss for Epoch 55: 0.7351

--- Epoch 56/100 ---


Training Epoch 56: 100%|██████████| 2/2 [00:00<00:00,  4.97it/s]


Average Training Loss for Epoch 56: 0.6805

--- Epoch 57/100 ---


Training Epoch 57: 100%|██████████| 2/2 [00:00<00:00,  4.94it/s]


Average Training Loss for Epoch 57: 0.6245

--- Epoch 58/100 ---


Training Epoch 58: 100%|██████████| 2/2 [00:00<00:00,  4.94it/s]


Average Training Loss for Epoch 58: 0.5752

--- Epoch 59/100 ---


Training Epoch 59: 100%|██████████| 2/2 [00:00<00:00,  4.95it/s]


Average Training Loss for Epoch 59: 0.5395

--- Epoch 60/100 ---


Training Epoch 60: 100%|██████████| 2/2 [00:00<00:00,  4.96it/s]


Average Training Loss for Epoch 60: 0.4976

--- Epoch 61/100 ---


Training Epoch 61: 100%|██████████| 2/2 [00:00<00:00,  4.95it/s]


Average Training Loss for Epoch 61: 0.4644

--- Epoch 62/100 ---


Training Epoch 62: 100%|██████████| 2/2 [00:00<00:00,  4.93it/s]


Average Training Loss for Epoch 62: 0.4323

--- Epoch 63/100 ---


Training Epoch 63: 100%|██████████| 2/2 [00:00<00:00,  4.96it/s]


Average Training Loss for Epoch 63: 0.4099

--- Epoch 64/100 ---


Training Epoch 64: 100%|██████████| 2/2 [00:00<00:00,  4.96it/s]


Average Training Loss for Epoch 64: 0.3861

--- Epoch 65/100 ---


Training Epoch 65: 100%|██████████| 2/2 [00:00<00:00,  4.96it/s]


Average Training Loss for Epoch 65: 0.3720

--- Epoch 66/100 ---


Training Epoch 66: 100%|██████████| 2/2 [00:00<00:00,  4.96it/s]


Average Training Loss for Epoch 66: 0.3504

--- Epoch 67/100 ---


Training Epoch 67: 100%|██████████| 2/2 [00:00<00:00,  4.93it/s]


Average Training Loss for Epoch 67: 0.3373

--- Epoch 68/100 ---


Training Epoch 68: 100%|██████████| 2/2 [00:00<00:00,  4.94it/s]


Average Training Loss for Epoch 68: 0.3224

--- Epoch 69/100 ---


Training Epoch 69: 100%|██████████| 2/2 [00:00<00:00,  4.95it/s]


Average Training Loss for Epoch 69: 0.3115

--- Epoch 70/100 ---


Training Epoch 70: 100%|██████████| 2/2 [00:00<00:00,  4.95it/s]


Average Training Loss for Epoch 70: 0.3027

--- Epoch 71/100 ---


Training Epoch 71: 100%|██████████| 2/2 [00:00<00:00,  4.95it/s]


Average Training Loss for Epoch 71: 0.2870

--- Epoch 72/100 ---


Training Epoch 72: 100%|██████████| 2/2 [00:00<00:00,  4.91it/s]


Average Training Loss for Epoch 72: 0.2750

--- Epoch 73/100 ---


Training Epoch 73: 100%|██████████| 2/2 [00:00<00:00,  4.98it/s]


Average Training Loss for Epoch 73: 0.2719

--- Epoch 74/100 ---


Training Epoch 74: 100%|██████████| 2/2 [00:00<00:00,  4.96it/s]


Average Training Loss for Epoch 74: 0.2747

--- Epoch 75/100 ---


Training Epoch 75: 100%|██████████| 2/2 [00:00<00:00,  4.94it/s]


Average Training Loss for Epoch 75: 0.2558

--- Epoch 76/100 ---


Training Epoch 76: 100%|██████████| 2/2 [00:00<00:00,  4.94it/s]


Average Training Loss for Epoch 76: 0.2530

--- Epoch 77/100 ---


Training Epoch 77: 100%|██████████| 2/2 [00:00<00:00,  4.96it/s]


Average Training Loss for Epoch 77: 0.2516

--- Epoch 78/100 ---


Training Epoch 78: 100%|██████████| 2/2 [00:00<00:00,  4.96it/s]


Average Training Loss for Epoch 78: 0.2408

--- Epoch 79/100 ---


Training Epoch 79: 100%|██████████| 2/2 [00:00<00:00,  4.93it/s]


Average Training Loss for Epoch 79: 0.2415

--- Epoch 80/100 ---


Training Epoch 80: 100%|██████████| 2/2 [00:00<00:00,  4.97it/s]


Average Training Loss for Epoch 80: 0.2335

--- Epoch 81/100 ---


Training Epoch 81: 100%|██████████| 2/2 [00:00<00:00,  4.95it/s]


Average Training Loss for Epoch 81: 0.2313

--- Epoch 82/100 ---


Training Epoch 82: 100%|██████████| 2/2 [00:00<00:00,  4.95it/s]


Average Training Loss for Epoch 82: 0.2240

--- Epoch 83/100 ---


Training Epoch 83: 100%|██████████| 2/2 [00:00<00:00,  4.94it/s]


Average Training Loss for Epoch 83: 0.2234

--- Epoch 84/100 ---


Training Epoch 84: 100%|██████████| 2/2 [00:00<00:00,  4.94it/s]


Average Training Loss for Epoch 84: 0.2191

--- Epoch 85/100 ---


Training Epoch 85: 100%|██████████| 2/2 [00:00<00:00,  4.96it/s]


Average Training Loss for Epoch 85: 0.2191

--- Epoch 86/100 ---


Training Epoch 86: 100%|██████████| 2/2 [00:00<00:00,  4.95it/s]


Average Training Loss for Epoch 86: 0.2119

--- Epoch 87/100 ---


Training Epoch 87: 100%|██████████| 2/2 [00:00<00:00,  4.95it/s]


Average Training Loss for Epoch 87: 0.2131

--- Epoch 88/100 ---


Training Epoch 88: 100%|██████████| 2/2 [00:00<00:00,  4.96it/s]


Average Training Loss for Epoch 88: 0.2128

--- Epoch 89/100 ---


Training Epoch 89: 100%|██████████| 2/2 [00:00<00:00,  4.95it/s]


Average Training Loss for Epoch 89: 0.2030

--- Epoch 90/100 ---


Training Epoch 90: 100%|██████████| 2/2 [00:00<00:00,  4.96it/s]


Average Training Loss for Epoch 90: 0.2074

--- Epoch 91/100 ---


Training Epoch 91: 100%|██████████| 2/2 [00:00<00:00,  4.94it/s]


Average Training Loss for Epoch 91: 0.2060

--- Epoch 92/100 ---


Training Epoch 92: 100%|██████████| 2/2 [00:00<00:00,  4.98it/s]


Average Training Loss for Epoch 92: 0.2050

--- Epoch 93/100 ---


Training Epoch 93: 100%|██████████| 2/2 [00:00<00:00,  4.95it/s]


Average Training Loss for Epoch 93: 0.2019

--- Epoch 94/100 ---


Training Epoch 94: 100%|██████████| 2/2 [00:00<00:00,  4.95it/s]


Average Training Loss for Epoch 94: 0.2027

--- Epoch 95/100 ---


Training Epoch 95: 100%|██████████| 2/2 [00:00<00:00,  4.96it/s]


Average Training Loss for Epoch 95: 0.1966

--- Epoch 96/100 ---


Training Epoch 96: 100%|██████████| 2/2 [00:00<00:00,  4.93it/s]


Average Training Loss for Epoch 96: 0.1991

--- Epoch 97/100 ---


Training Epoch 97: 100%|██████████| 2/2 [00:00<00:00,  4.91it/s]


Average Training Loss for Epoch 97: 0.2013

--- Epoch 98/100 ---


Training Epoch 98: 100%|██████████| 2/2 [00:00<00:00,  5.02it/s]


Average Training Loss for Epoch 98: 0.1999

--- Epoch 99/100 ---


Training Epoch 99: 100%|██████████| 2/2 [00:00<00:00,  4.97it/s]


Average Training Loss for Epoch 99: 0.1945

--- Epoch 100/100 ---


Training Epoch 100: 100%|██████████| 2/2 [00:00<00:00,  4.95it/s]

Average Training Loss for Epoch 100: 0.1992

Fine-tuning complete.





In [16]:
model_and_tokenizer_to_save = {
    'model_state_dict': model.to('cpu').state_dict(),
    'tokenizer': tokenizer,
    'model_name': MODEL_NAME
}

joblib.dump(model_and_tokenizer_to_save, MODEL_SAVE_PATH)
print(f"Model and tokenizer saved to {MODEL_SAVE_PATH}")

Model and tokenizer saved to ./submission_folder/Model.joblib


In [17]:
def load_model_from_joblib(path):
    saved_data = joblib.load(path)
    
    tokenizer = saved_data['tokenizer']
    model = GPT2LMHeadModel.from_pretrained(saved_data['model_name'])
    model.resize_token_embeddings(len(tokenizer))
    model.load_state_dict(saved_data['model_state_dict'])
    
    return model, tokenizer

In [18]:
def generate_reply(model, tokenizer, history_df, new_message_from_b, device):
    model.to(device)
    model.eval()

    prompt = ""
    for _, row in history_df.iterrows():
        sender_token = USER_A_TOKEN if row['Sender'] == 'User A' else USER_B_TOKEN
        prompt += f"{sender_token} {row['Message']} "
    
    prompt += f"{USER_B_TOKEN} {new_message_from_b} {USER_A_TOKEN}"

    inputs = tokenizer(prompt, return_tensors='pt').to(device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=50,  # Limit the length of the new reply
            pad_token_id=tokenizer.eos_token_id,
            no_repeat_ngram_size=2,
            do_sample=True,     # Activate sampling
            top_k=50,           # Consider the top 50 tokens
            top_p=0.95,         # Use nucleus sampling
        )
    
    generated_tokens = outputs[0][inputs['input_ids'].shape[-1]:]
    reply = tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()
    
    return reply

In [24]:
print("\n--- Testing Reply Generation ---")
# Load the model back for a clean test
loaded_model, loaded_tokenizer = load_model_from_joblib(MODEL_SAVE_PATH)

# [cite_start]Context from Conversation ID 4 [cite: 1]
conversation_history = pd.DataFrame([
    {'Sender': 'User A', 'Message': "Finally watched that new sci-fi movie everyone's talking about."},
    {'Sender': 'User B', 'Message': "Nice! What did you think? I loved the visuals."},
    {'Sender': 'User A', 'Message': "Visuals were amazing, but the plot was a bit predictable for me."}
])
user_b_new_message = "I can see that. The ending felt a bit rushed. Still a fun watch though."

generated_reply = generate_reply(loaded_model, loaded_tokenizer, conversation_history, user_b_new_message, device)
print(f"Conversation Context:\n{conversation_history.to_string(index=False)}\n")
print(f"User B says: '{user_b_new_message}'")
print(f"Generated User A reply: '{generated_reply}'")
print(f"Actual User A reply from data: 'Definitely. Worth it just for the big screen experience.'")


--- Testing Reply Generation ---
Conversation Context:
Sender                                                          Message
User A  Finally watched that new sci-fi movie everyone's talking about.
User B                   Nice! What did you think? I loved the visuals.
User A Visuals were amazing, but the plot was a bit predictable for me.

User B says: 'I can see that. The ending felt a bit rushed. Still a fun watch though.'
Generated User A reply: '"Definitely. Worth it just for the big screen experience."'
Actual User A reply from data: 'Definitely. Worth it just for the big screen experience.'


In [25]:
def evaluate_model(model, val_loader, tokenizer, device):
    model.to(device)
    model.eval()
    
    total_perplexity_loss = 0
    bleu_scores = []
    chencherry = SmoothingFunction()
    
    print("\n--- Starting Final Evaluation ---")
    with torch.no_grad():
        for batch in tqdm(val_loader, desc="Calculating Perplexity"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            total_perplexity_loss += outputs.loss.item()
            
        avg_loss = total_perplexity_loss / len(val_loader)
        perplexity = torch.exp(torch.tensor(avg_loss))
        print(f"  - Perplexity on Validation Set: {perplexity.item():.4f}")

        for text in tqdm(val_texts, desc="Calculating BLEU Score"):
            prompt_end_index = text.rfind(USER_A_TOKEN)
            if prompt_end_index == -1: continue

            prompt = text[:prompt_end_index] + USER_A_TOKEN
            reference_reply = text[prompt_end_index:].replace(USER_A_TOKEN, "").replace(tokenizer.eos_token, "").strip()
            
            inputs = tokenizer(prompt, return_tensors='pt').to(device)
            
            outputs = model.generate(
                **inputs,
                max_new_tokens=50,
                pad_token_id=tokenizer.eos_token_id,
                do_sample=True,
                top_k=50,
                top_p=0.95,
            )
            
            generated_tokens = outputs[0][inputs['input_ids'].shape[-1]:]
            generated_reply = tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()

            reference_tokens = [reference_reply.split()]
            candidate_tokens = generated_reply.split()
            
            if candidate_tokens:
                bleu_score = sentence_bleu(reference_tokens, candidate_tokens, smoothing_function=chencherry.method1)
                bleu_scores.append(bleu_score)
            
    avg_bleu = np.mean(bleu_scores) if bleu_scores else 0
    print(f"  - Average BLEU Score on Validation Set: {avg_bleu:.4f}")
    
    return perplexity.item(), avg_bleu

In [26]:
final_perplexity, final_bleu = evaluate_model(loaded_model, val_loader, loaded_tokenizer, device)

print("\n--- Script Finished Successfully ---")


--- Starting Final Evaluation ---


Calculating Perplexity: 100%|██████████| 1/1 [00:00<00:00, 58.90it/s]


  - Perplexity on Validation Set: 1.1501


Calculating BLEU Score: 100%|██████████| 1/1 [00:00<00:00,  2.56it/s]

  - Average BLEU Score on Validation Set: 0.4098

--- Script Finished Successfully ---



