In [1]:
"""
Fine-Tuning Llama 2 for Conversation Prediction (Partial→Remainder)

Steps:
1) Read CSV conversation data.
2) Build partial->remainder strings.
3) Concatenate partial + remainder into a single text sample so Llama can learn
   to predict remainder given partial in a causal LM fashion.
4) Use the 'LlamaTokenizer' and 'LlamaForCausalLM' from Hugging Face.
5) Fine-tune with Trainer (or custom loop).
6) Provide comments on parameter changes for easy tuning.
"""

# If needed, install requirements in a new environment:
# !pip install transformers accelerate bitsandbytes sentencepiece


"\nFine-Tuning Llama 2 for Conversation Prediction (Partial→Remainder)\n\nSteps:\n1) Read CSV conversation data.\n2) Build partial->remainder strings.\n3) Concatenate partial + remainder into a single text sample so Llama can learn\n   to predict remainder given partial in a causal LM fashion.\n4) Use the 'LlamaTokenizer' and 'LlamaForCausalLM' from Hugging Face.\n5) Fine-tune with Trainer (or custom loop).\n6) Provide comments on parameter changes for easy tuning.\n"

In [2]:
# ======================================
# 1) Imports
# ======================================
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
import math

# Hugging Face
from transformers import (
    LlamaTokenizer,
    LlamaForCausalLM,
    Trainer,
    TrainingArguments,
    DataCollatorForLanguageModeling
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


  from .autonotebook import tqdm as notebook_tqdm
  warn(



Using device: cpu


In [3]:
# ======================================
# 1) Imports
# ======================================
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
import math

# Hugging Face
from transformers import (
    LlamaTokenizer,
    LlamaForCausalLM,
    Trainer,
    TrainingArguments,
    DataCollatorForLanguageModeling
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


Using device: cpu


In [4]:
# ======================================
# 3) Prepare a Single-Text Format
# ======================================
# We'll define a small function that merges partial + remainder
# into a single text. The model will treat the partial as context/prompt,
# then learn to predict the remainder tokens.

def make_single_text(partial, remainder, sep="\n"):
    """
    We'll just put partial + newline + remainder for training.
    During inference, you'd prompt with partial and let the model generate the remainder.
    """
    return partial + sep + remainder

class ConversationPredictionDataset(Dataset):
    """
    Each item is a single 'text' that includes partial + remainder.
    We'll let Llama do causal LM training.
    """
    def __init__(self, pairs, tokenizer, max_length=512):
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.examples = []
        for (part, rem) in pairs:
            if len(part.strip())==0 or len(rem.strip())==0:
                continue
            full_text = make_single_text(part, rem)  
            self.examples.append(full_text)

    def __len__(self):
        return len(self.examples)

    def __getitem__(self, idx):
        return self.examples[idx]

# We'll define a data collator that does the tokenizing/truncation in __call__


In [5]:
# ======================================
# 4) Data Collator with LM
# ======================================
class ConversationCollator:
    """
    We'll let the collator do the tokenization on the fly
    so we only hold strings in memory, not big token lists.
    """
    def __init__(self, tokenizer, max_length=512):
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __call__(self, batch_texts):
        # batch_texts is a list of strings (partial+remainder)
        encoding = self.tokenizer(
            batch_texts,
            return_tensors='pt',
            padding=True,
            truncation=True,
            max_length=self.max_length
        )
        return encoding

# Then we can rely on DataCollatorForLanguageModeling or just do a standard approach:
# But for a causal LM approach, we do NOT do masked LM; we do full next-token prediction.

from transformers import default_data_collator

# We'll define a "DataCollatorForLanguageModeling" with mlm=False, so it sets up
# the appropriate labels for causal LM training.


In [6]:
# Actually, we can rely on a simpler approach: 
# The GPT-like approach is: input_ids = output_ids for causal LM. 
# If you want to specifically ignore the partial portion from the loss,
# you'd do a custom approach. But let's do standard approach that 
# trains on the entire sequence. The partial portion is also predicted,
# but the model can handle that. 
#
# We'll keep it straightforward: 
#   text -> encode -> (input_ids, labels are the same) 
# Let the standard "DataCollatorForLanguageModeling(mlm=False)" handle it.

from transformers import DataCollatorForLanguageModeling

def build_dataset_and_collator(pairs, tokenizer, max_length=512):
    dataset = ConversationPredictionDataset(pairs, tokenizer, max_length)
    collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
    return dataset, collator


In [7]:
# ======================================
# 5) Loading Llama2, Tokenizer, Building Datasets
# ======================================
# We'll pick a Llama 2 checkpoint on Hugging Face. For example:
# "meta-llama/Llama-2-7b-hf" or "meta-llama/Llama-2-7b-chat-hf" if you have rights to it.
# Make sure you have accepted the license on Hugging Face and have an access token if needed.

model_name = "meta-llama/Llama-2-7b-hf"  # or "meta-llama/Llama-2-7b-chat-hf"

# We load the Llama tokenizer
tokenizer = LlamaTokenizer.from_pretrained(model_name)
# Llama might not define a pad token by default, so let's set it:
tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.padding_side = "right"  # typical for causal LM
print("Tokenizer vocab size:", len(tokenizer))

# Build dataset + collator
max_length = 512  # <--- you can adjust this depending on GPU memory
ds, data_collator = build_dataset_and_collator(pairs, tokenizer, max_length)

# train_test_split if you want
train_size = int(0.9*len(ds))
eval_size = len(ds)-train_size
train_ds, eval_ds = torch.utils.data.random_split(ds, [train_size, eval_size])
print(f"Train size = {len(train_ds)}, Eval size = {len(eval_ds)}")


OSError: Can't load tokenizer for 'meta-llama/Llama-2-7b-hf'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'meta-llama/Llama-2-7b-hf' is the correct path to a directory containing all relevant files for a LlamaTokenizer tokenizer.

In [None]:
# ======================================
# 6) Llama2 Model for Causal LM
# ======================================
from transformers import LlamaForCausalLM

model = LlamaForCausalLM.from_pretrained(
    model_name,
    device_map="auto",   # <--- if using accelerate, bitsandbytes, etc.
    torch_dtype=torch.float16  # or "auto"
)
# We assume you have the GPU VRAM for it. Otherwise, consider LoRA or 4-bit etc.

# We'll define the training arguments

# Comments on param lines so you can tune them easily:
num_epochs = 1  # <--- Increase for better convergence
batch_size = 1  # <--- Adjust batch size for your GPU memory
lr = 1e-4       # <--- Tune your learning rate
