In [None]:
!pip install jupyterlab ipykernel ipywidgets datasets  torch torchvision torchaudio transformers peft evaluate

Collecting jupyterlab
  Downloading jupyterlab-4.4.3-py3-none-any.whl.metadata (16 kB)
Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting async-lru>=1.0.0 (from jupyterlab)
  Downloading async_lru-2.0.5-py3-none-any.whl.metadata (4.5 kB)
Collecting jupyter-lsp>=2.0.0 (from jupyterlab)
  Downloading jupyter_lsp-2.2.5-py3-none-any.whl.metadata (1.8 kB)
Collecting jupyter-server<3,>=2.4.0 (from jupyterlab)
  Downloading jupyter_server-2.16.0-py3-none-any.whl.metadata (8.5 kB)
Collecting jupyterlab-server<3,>=2.27.1 (from jupyterlab)
  Downloading jupyterlab_server-2.27.3-py3-none-any.whl.metadata (5.9 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu1

In [None]:
# AI Debate Partner - Argument Generation (Fixed Training)
# Fixes ValueError: The model did not return a loss

## Step 1: Import Libraries
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling  # Changed from DataCollatorWithPadding
)
from peft import LoraConfig, get_peft_model
from datasets import Dataset
import pandas as pd
import torch

## Step 2: Load and Preprocess Dataset
# Load CSV file
df = pd.read_csv('/content/train.csv')

# Preprocessing
df = df[['topic', 'argument', 'stance_WA']].dropna()

# Convert stance to readable labels
df['stance_label'] = df['stance_WA'].map({1: "pro", -1: "con"})

# Create prompt column
df['text'] = "Topic: " + df['topic'] + "; Stance: " + df['stance_label'] + "; Argument: " + df['argument'] + "<|endoftext|>"

# Create Dataset object
dataset = Dataset.from_pandas(df[['text']])

## Step 3: Tokenization
model_checkpoint = 'gpt2-medium'
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
tokenizer.pad_token = tokenizer.eos_token

def tokenize_function(examples):
    # Tokenize entire text sequence
    return tokenizer(
        examples['text'],
        truncation=True,
        max_length=256,
        padding="max_length"
    )

tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=['text'])

## Step 4: Model Setup
model = AutoModelForCausalLM.from_pretrained(model_checkpoint)

## Step 5: PEFT Configuration
peft_config = LoraConfig(
    r=8,
    lora_alpha=32,
    lora_dropout=0.05,
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

## Step 6: Training Setup (FIXED)
# Use language modeling data collator
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False  # For causal language modeling
)

training_args = TrainingArguments(
    output_dir="ai-debate-generator",
    learning_rate=2e-4,
    per_device_train_batch_size=4,
    num_train_epochs=3,  # Reduced for faster iteration
    weight_decay=0.01,
    logging_steps=100,
    save_steps=500,
    report_to="none",
    prediction_loss_only=True,  # Ensure loss is computed
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=data_collator,  # Critical fix
)

## Step 7: Training
trainer.train()  # Should now compute loss

## Step 8: Argument Generation Function
def generate_argument(topic: str, stance: str):
    stance = stance.lower()
    if stance not in ["pro", "con"]:
        raise ValueError("Stance must be 'pro' or 'con'")

    prompt = f"Topic: {topic}; Stance: {stance}; Argument:"
    inputs = tokenizer(prompt, return_tensors="pt")

    # Generate with attention to context length
    outputs = model.generate(
        input_ids=inputs.input_ids,
        attention_mask=inputs.attention_mask,
        max_new_tokens=100,  # Limit output length
        temperature=0.7,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id  # Ensure proper termination
    )

    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    argument = generated_text.split("Argument:")[1].strip()
    return argument

## Step 9: Interactive Usage

    user_topic = input("Enter debate topic: ")
    user_stance = input("Enter your stance (pro/con): ")
    argument = generate_argument(user_topic, user_stance)
    print("\nGenerated Argument:")
    print("-------------------")
    print(argument)

## Step 10: Save Model
model.save_pretrained("ai-debate-generator-model")
tokenizer.save_pretrained("ai-debate-generator-tokenizer")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/718 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Map:   0%|          | 0/20974 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/1.52G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]



trainable params: 786,432 || all params: 355,609,600 || trainable%: 0.2212


No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


Step,Training Loss
100,2.6827
200,2.0083
300,1.9852
400,1.9272
500,1.8321
600,1.8128
700,1.7773
800,1.7419
900,1.7447
1000,1.7621


Step,Training Loss
100,2.6827
200,2.0083
300,1.9852
400,1.9272
500,1.8321
600,1.8128
700,1.7773
800,1.7419
900,1.7447
1000,1.7621


('ai-debate-generator-tokenizer/tokenizer_config.json',
 'ai-debate-generator-tokenizer/special_tokens_map.json',
 'ai-debate-generator-tokenizer/vocab.json',
 'ai-debate-generator-tokenizer/merges.txt',
 'ai-debate-generator-tokenizer/added_tokens.json',
 'ai-debate-generator-tokenizer/tokenizer.json')

In [5]:
import shutil

# Zip the folder (e.g., "my_model") into "my_model.zip"
shutil.make_archive("my_model", 'zip', "/content/ai-debate-generator-model")


'/content/my_model.zip'

In [6]:


# Zip the folder (e.g., "my_model") into "my_model.zip"
shutil.make_archive("tokenizer", 'zip', "/content/ai-debate-generator-tokenizer")


'/content/tokenizer.zip'