In [None]:
!pip install jupyterlab ipykernel ipywidgets datasets  torch torchvision torchaudio transformers peft evaluate

Collecting jupyterlab
  Downloading jupyterlab-4.4.3-py3-none-any.whl.metadata (16 kB)
Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting async-lru>=1.0.0 (from jupyterlab)
  Downloading async_lru-2.0.5-py3-none-any.whl.metadata (4.5 kB)
Collecting jupyter-lsp>=2.0.0 (from jupyterlab)
  Downloading jupyter_lsp-2.2.5-py3-none-any.whl.metadata (1.8 kB)
Collecting jupyter-server<3,>=2.4.0 (from jupyterlab)
  Downloading jupyter_server-2.16.0-py3-none-any.whl.metadata (8.5 kB)
Collecting jupyterlab-server<3,>=2.27.1 (from jupyterlab)
  Downloading jupyterlab_server-2.27.3-py3-none-any.whl.metadata (5.9 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu1

In [None]:
# AI Debate Partner - Argument Generation (Fixed Loss Issue)
# Fixes ValueError: The model did not return a loss

## Step 1: Import Libraries
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling
)
from peft import LoraConfig, get_peft_model
from datasets import Dataset
import pandas as pd
import torch
import numpy as np

## Step 2: Load and Preprocess Dataset
df = pd.read_csv('/content/train.csv')
df = df[['topic', 'argument', 'stance_WA']].dropna()
df['stance_label'] = df['stance_WA'].map({1: "pro", -1: "con"})
df['text'] = "Topic: " + df['topic'] + "; Stance: " + df['stance_label'] + "; Argument: " + df['argument'] + tokenizer.eos_token

# Create Dataset object
dataset = Dataset.from_pandas(df[['text']])

## Step 3: Tokenization
model_checkpoint = 'gpt2-medium'
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
tokenizer.pad_token = tokenizer.eos_token

def tokenize_function(examples):
    tokenized = tokenizer(
        examples['text'],
        truncation=True,
        max_length=256,
        padding="max_length"
    )
    # Create labels by copying input_ids
    tokenized['labels'] = tokenized['input_ids'].copy()
    return tokenized

tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=['text'])

## Step 4: Model Setup
model = AutoModelForCausalLM.from_pretrained(model_checkpoint)
model.config.use_cache = False  # Disable cache to prevent returning past_key_values

## Step 5: PEFT Configuration
peft_config = LoraConfig(
    r=8,
    lora_alpha=32,
    lora_dropout=0.05,
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

## Step 6: Training Setup
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False  # Causal language modeling
)

training_args = TrainingArguments(
    output_dir="ai-debate-generator",
    learning_rate=2e-4,
    per_device_train_batch_size=4,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_steps=100,
    save_steps=500,
    report_to="none",
    prediction_loss_only=True,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=data_collator,
)

## Step 7: Training
trainer.train()  # Should now compute loss

## Step 8: Argument Generation Function
def generate_argument(topic: str, stance: str):
    stance = stance.lower()
    if stance not in ["pro", "con"]:
        raise ValueError("Stance must be 'pro' or 'con'")

    prompt = f"Topic: {topic}; Stance: {stance}; Argument:"
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    outputs = model.generate(
        input_ids=inputs.input_ids,
        attention_mask=inputs.attention_mask,
        max_new_tokens=100,
        temperature=0.7,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id,
        eos_token_id=tokenizer.eos_token_id
    )

    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    if "Argument:" in generated_text:
        return generated_text.split("Argument:")[1].strip()
    return generated_text

## Step 9: Interactive Usage
if __name__ == "__main__":
    model.eval()
    model.to("cuda" if torch.cuda.is_available() else "cpu")

    user_topic = input("Enter debate topic: ")
    user_stance = input("Enter your stance (pro/con): ")

    try:
        argument = generate_argument(user_topic, user_stance)
        print("\nGenerated Argument:")
        print("-------------------")
        print(argument)
    except Exception as e:
        print(f"\nError: {str(e)}")

## Step 10: Save Model
model.save_pretrained("ai-debate-generator-model")
tokenizer.save_pretrained("ai-debate-generator-tokenizer")


Map:   0%|          | 0/20974 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


trainable params: 786,432 || all params: 355,609,600 || trainable%: 0.2212


`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


Step,Training Loss
100,2.6665


'/content/my_model.zip'

In [None]:


# Zip the folder (e.g., "my_model") into "my_model.zip"
shutil.make_archive("tokenizer", 'zip', "/content/ai-debate-generator-tokenizer")


'/content/tokenizer.zip'