In [None]:
#!pip install transformers deepspeed datasets accelerate
#!pip install deepspeed
#!pip install git+https://github.com/microsoft/DeepSpeed.git
#!pip install mpi4py

In [8]:
import torch

if torch.cuda.is_available():
    num_gpus = torch.cuda.device_count()
    for i in range(num_gpus):
        gpu_device = torch.cuda.get_device_name(i)
        gpu_memory = torch.cuda.get_device_properties(i).total_memory / (1024 ** 3)
        allocated_memory = torch.cuda.memory_allocated(i) / (1024 ** 3)
        cached_memory = torch.cuda.memory_reserved(i) / (1024 ** 3)
        
        print(f"GPU {i}: {gpu_device}")
        print(f"Total Memory: {gpu_memory:.2f} GB")
        print(f"Allocated Memory: {allocated_memory:.2f} GB")
        print(f"Cached Memory: {cached_memory:.2f} GB")
        print(f"CUDA Available: {torch.cuda.is_available()}")
        print(f"Number of GPUs: {torch.cuda.device_count()}")
        print(f"CUDA Device: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'None'}")
        print()
else:
    print("No GPU available.")

No GPU available.


In [None]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"  # Force synchronous execution
os.environ["CUDA_VISIBLE_DEVICES"] = ""  # Set to empty string to force CPU

# ... rest of your code ...

from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorWithPadding
from datasets import Dataset

# Load model and tokenizer
model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Add padding token if not present
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token  # Use eos_token if pad_token does not exist
    # Instead of adding a new special token, just set the pad_token
    # tokenizer.add_special_tokens({'pad_token': '[PAD]'}) 
    
# Sample dataset
data = {"text": ["Example sentence 1.", "Another example sentence."]}
train_dataset = Dataset.from_dict(data).map(
    lambda x: tokenizer(
        x["text"], truncation=True, padding="max_length", max_length=512
    ), # Specify max_length to ensure consistent sequence length
    batched=True,
)

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)


# Define DeepSpeed configuration as a dictionary
deepspeed_config = {
    "fp16": {
        "enabled": True
    },
    "optimizer": {
        "type": "AdamW",
        "params": {
            "lr": 5e-5
        }
    },
    "zero_optimization": {
        "stage": 2,
        "offload_optimizer": {
            "device": "cpu",
            "pin_memory": True
        },
        "offload_param": {
            "device": "cpu",
            "pin_memory": True
        }
    },
    "gradient_accumulation_steps": 1,
    "train_batch_size": 'auto'  # Set to 'auto' to match with Trainer settings
}

# Define training arguments with DeepSpeed
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=1,
    num_train_epochs=1,
    learning_rate=5e-5,
    fp16=True,
    deepspeed=deepspeed_config,  # Pass the dictionary directly
    logging_dir="./logs",
    save_strategy="epoch",
)

# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    data_collator=data_collator,
)

# Start training
trainer.train()