In [1]:
import pandas as pd

splits = {'test': 'plain_text/test-00000-of-00001.parquet', 'validation': 'plain_text/validation-00000-of-00001.parquet', 'train': 'plain_text/train-00000-of-00001.parquet'}
df = pd.read_parquet("hf://datasets/stanfordnlp/snli/" + splits["test"])

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
df

Unnamed: 0,premise,hypothesis,label
0,This church choir sings to the masses as they ...,The church has cracks in the ceiling.,1
1,This church choir sings to the masses as they ...,The church is filled with song.,0
2,This church choir sings to the masses as they ...,A choir singing at a baseball game.,2
3,"A woman with a green headscarf, blue shirt and...",The woman is young.,1
4,"A woman with a green headscarf, blue shirt and...",The woman is very happy.,0
...,...,...,...
9995,Two women are observing something together.,Two women are standing with their eyes closed.,2
9996,Two women are observing something together.,Two girls are looking at something.,0
9997,A man in a black leather jacket and a book in ...,A man is flying a kite.,2
9998,A man in a black leather jacket and a book in ...,A man is speaking in a classroom.,0


In [5]:
# Load the datasets
train_df = pd.read_parquet("hf://datasets/stanfordnlp/snli/" + splits['train'])
test_df = pd.read_parquet("hf://datasets/stanfordnlp/snli/" + splits['test'])
validation_df = pd.read_parquet("hf://datasets/stanfordnlp/snli/" + splits['validation'])

# Create the training dataset with 1000 samples, selecting every 550th sample
train_samples = train_df.iloc[::550].head(1000)

# Create the testing dataset with 100 samples, selecting every 100th sample
test_samples = test_df.iloc[::100].head(100)

# Create the validation dataset with 100 samples, selecting every 100th sample
validation_samples = validation_df.iloc[::100].head(100)

# Show the shapes of the new datasets
print("Training samples shape:", train_samples.shape)
print("Testing samples shape:", test_samples.shape)
print("Validation samples shape:", validation_samples.shape)

# Save the datasets as CSV files
train_samples.to_csv("train_samples.csv", index=False)
test_samples.to_csv("test_samples.csv", index=False)
validation_samples.to_csv("validation_samples.csv", index=False)

Training samples shape: (1000, 3)
Testing samples shape: (100, 3)
Validation samples shape: (100, 3)


In [1]:
# Use a pipeline as a high-level helper
from transformers import pipeline


pipe = pipeline("text-generation", model="microsoft/phi-2")

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████████████| 2/2 [00:01<00:00,  1.29it/s]
Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Check for GPU
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

device_0 = torch.device("cuda:0")  #
tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2")
model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2").to(device_0)

  from .autonotebook import tqdm as notebook_tqdm


Using device: cuda


Loading checkpoint shards: 100%|████████████████████████████████████████████████████████████████████████████████| 2/2 [00:01<00:00,  1.04it/s]


In [1]:
import os
import pandas as pd
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
from peft import get_peft_model, LoraConfig, TaskType
from torch.utils.data import DataLoader
from torch.cuda.amp import GradScaler, autocast

# Set GPU devices to use
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3"  # Specify which GPUs to use

# Check for GPU
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'  # Store the model on GPU 0
print(f"Using device: {device}")

# Load your dataset
train_df = pd.read_csv("train_samples.csv")
validation_df = pd.read_csv("validation_samples.csv")

# Convert DataFrames to Hugging Face Datasets
train_dataset = load_dataset('csv', data_files='train_samples.csv', split='train')
validation_dataset = load_dataset('csv', data_files='validation_samples.csv', split='train')

# Initialize the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2")
model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2").to(device)  # Load model to GPU 0

# Set pad token to eos token
tokenizer.pad_token = tokenizer.eos_token

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples['premise'], examples['hypothesis'],
                     padding="max_length", truncation=True, return_tensors="pt")

# Apply tokenization
tokenized_train = train_dataset.map(tokenize_function, batched=True)
tokenized_validation = validation_dataset.map(tokenize_function, batched=True)

# Custom dataset class
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, tokenized_data):
        self.input_ids = torch.tensor(tokenized_data['input_ids'])
        self.attention_mask = torch.tensor(tokenized_data['attention_mask'])
        self.labels = torch.tensor(tokenized_data['input_ids'])  # For language modeling

    def __len__(self):
        return len(self.input_ids)

    def __getitem__(self, idx):
        return {
            'input_ids': self.input_ids[idx],
            'attention_mask': self.attention_mask[idx],
            'labels': self.labels[idx],
        }

# Create custom datasets
custom_train_dataset = CustomDataset(tokenized_train)
custom_validation_dataset = CustomDataset(tokenized_validation)

# Define QLoRA configuration
lora_config = LoraConfig(
    r=16,  # rank
    lora_alpha=32,
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM,
)

# Wrap the model with QLoRA
model = get_peft_model(model, lora_config)

# Data Parallelism
model = torch.nn.DataParallel(model)  # Wrap the model for data parallelism

# Training arguments
training_args = TrainingArguments(
    output_dir="./phi2-finetuned",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=5,
    logging_dir='./logs',
    logging_steps=10,
    evaluation_strategy="epoch",  # Evaluate after every epoch
    save_strategy="epoch",
    save_total_limit=5,  # Keep only the last 5 saved models
    report_to="none",  # Disable reporting to avoid errors if not set up
    load_best_model_at_end=True,  # Optionally load the best model at the end of training
)

# Create DataLoader for batching
train_dataloader = DataLoader(custom_train_dataset, batch_size=training_args.per_device_train_batch_size, shuffle=True)
validation_dataloader = DataLoader(custom_validation_dataset, batch_size=training_args.per_device_eval_batch_size)

# Custom training loop with gradient accumulation
scaler = GradScaler()  # Initialize scaler for mixed precision
accumulation_steps = 4  # Adjust as needed
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)

for epoch in range(training_args.num_train_epochs):
    model.train()
    for i, batch in enumerate(train_dataloader):
        # Move input tensors to the appropriate device (GPU 0 for model, others for inputs)
        inputs = {
            'input_ids': batch['input_ids'].to(device),  # Move to GPU 0
            'attention_mask': batch['attention_mask'].to(device),
            'labels': batch['labels'].to(device)
        }

        with autocast():  # Enable mixed precision
            outputs = model(**inputs)  # Directly use the model
            loss = outputs.loss / accumulation_steps  # Normalize loss for gradient accumulation
        
        scaler.scale(loss).backward()  # Scale loss and backpropagate
        
        if (i + 1) % accumulation_steps == 0:  # Update weights every few batches
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()  # Reset gradients after update

    print(f"Epoch {epoch + 1}/{training_args.num_train_epochs}, Loss: {loss.item()}")

# Save the final model
model.module.save_pretrained("./phi2-finetuned-final")  # Use .module to access the original model for saving
tokenizer.save_pretrained("./phi2-finetuned-final")


  from .autonotebook import tqdm as notebook_tqdm


Using device: cuda:0


Loading checkpoint shards: 100%|████████████████████████████████████████████████████████████████████████████████| 2/2 [00:01<00:00,  1.16it/s]
  scaler = GradScaler()  # Initialize scaler for mixed precision
  with autocast():  # Enable mixed precision


OutOfMemoryError: Caught OutOfMemoryError in replica 1 on device 1.
Original Traceback (most recent call last):
  File "/home/arunb/anaconda3/envs/project1/lib/python3.12/site-packages/torch/nn/parallel/parallel_apply.py", line 96, in _worker
    output = module(*input, **kwargs)
             ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/arunb/anaconda3/envs/project1/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/arunb/anaconda3/envs/project1/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/arunb/anaconda3/envs/project1/lib/python3.12/site-packages/peft/peft_model.py", line 1859, in forward
    return self.base_model(
           ^^^^^^^^^^^^^^^^
  File "/home/arunb/anaconda3/envs/project1/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/arunb/anaconda3/envs/project1/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/arunb/anaconda3/envs/project1/lib/python3.12/site-packages/peft/tuners/tuners_utils.py", line 197, in forward
    return self.model.forward(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/arunb/anaconda3/envs/project1/lib/python3.12/site-packages/transformers/models/phi/modeling_phi.py", line 1235, in forward
    outputs = self.model(
              ^^^^^^^^^^^
  File "/home/arunb/anaconda3/envs/project1/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/arunb/anaconda3/envs/project1/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/arunb/anaconda3/envs/project1/lib/python3.12/site-packages/transformers/models/phi/modeling_phi.py", line 980, in forward
    layer_outputs = decoder_layer(
                    ^^^^^^^^^^^^^^
  File "/home/arunb/anaconda3/envs/project1/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/arunb/anaconda3/envs/project1/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/arunb/anaconda3/envs/project1/lib/python3.12/site-packages/transformers/models/phi/modeling_phi.py", line 720, in forward
    feed_forward_hidden_states = self.resid_dropout(self.mlp(hidden_states))
                                                    ^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/arunb/anaconda3/envs/project1/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/arunb/anaconda3/envs/project1/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/arunb/anaconda3/envs/project1/lib/python3.12/site-packages/transformers/models/phi/modeling_phi.py", line 225, in forward
    hidden_states = self.fc2(hidden_states)
                    ^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/arunb/anaconda3/envs/project1/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/arunb/anaconda3/envs/project1/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/arunb/anaconda3/envs/project1/lib/python3.12/site-packages/peft/tuners/lora/layer.py", line 584, in forward
    result = result + lora_B(lora_A(dropout(x))) * scaling
                             ^^^^^^^^^^^^^^^^^^
  File "/home/arunb/anaconda3/envs/project1/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/arunb/anaconda3/envs/project1/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/arunb/anaconda3/envs/project1/lib/python3.12/site-packages/torch/nn/modules/linear.py", line 125, in forward
    return F.linear(input, self.weight, self.bias)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 40.00 MiB. GPU 1 has a total capacity of 39.50 GiB of which 10.56 MiB is free. Process 1081122 has 20.40 GiB memory in use. Including non-PyTorch memory, this process has 19.05 GiB memory in use. Of the allocated memory 18.03 GiB is allocated by PyTorch, and 223.30 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
