## Logging into Huggingface

In [3]:
torch.cuda.empty_cache()

In [2]:
import torch
# Check if CUDA (GPU support) is available
if torch.cuda.is_available():
    # Get the number of available GPUs
    num_gpus = torch.cuda.device_count()
    print(f"Number of available GPUs: {num_gpus}")

    # Iterate over each GPU and print its name
    for gpu_id in range(num_gpus):
        gpu_name = torch.cuda.get_device_name(gpu_id)
        print(f"GPU {gpu_id}: {gpu_name}")
else:
    print("CUDA is not available. Only CPU will be used.")

Number of available GPUs: 2
GPU 0: NVIDIA A100-SXM4-80GB
GPU 1: NVIDIA A100-SXM4-80GB


In [2]:
from huggingface_hub import login
login(token = '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')

  from .autonotebook import tqdm as notebook_tqdm


Token has not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /home/anthony.rahbany/.cache/huggingface/token
Login successful


In [3]:
import os

In [4]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments, pipeline, logging
from datasets import load_dataset

from peft import LoraConfig
from trl import SFTTrainer

from accelerate import Accelerator
accelerator = Accelerator()

Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [5]:
cache_dir = "/blue/azare/anthony.rahbany/cache/"
pretrained_model = "./Llama_Epoch_2_Med_Terms/LlamaModel/"
pretrained_tokenizer = "./Llama_Epoch_2_Med_Terms/LlamaTokenizer/"

In [6]:
compute_dtype = getattr(torch, "float16")

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=False,
)

In [7]:
model = AutoModelForCausalLM.from_pretrained(
    pretrained_model,
    quantization_config=quant_config,
    device_map='auto',
    cache_dir=cache_dir
)
model.config.use_cache = False
model.config.pretraining_tp = 1
model = accelerator.prepare(model)

Loading checkpoint shards: 100%|██████████| 2/2 [00:05<00:00,  2.58s/it]


In [8]:
tokenizer = AutoTokenizer.from_pretrained(pretrained_tokenizer, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [9]:
# dataset = load_dataset("Medilora/us_medical_license_exam_textbooks_en", split="train", cache_dir=cache_dir)

In [10]:
dataset = load_dataset("gamino/wiki_medical_terms", split="train", cache_dir=cache_dir)
dataset = dataset.rename_column("page_text", "text")
dataset = dataset.remove_columns(["page_title", "__index_level_0__"])

In [11]:
dataset

Dataset({
    features: ['text'],
    num_rows: 6861
})

In [12]:
peft_params = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
)

In [13]:
training_params = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_steps=200,
    logging_steps=25,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
    # report_to="tensorboard"
)

In [14]:
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_params,
    dataset_text_field="text",
    max_seq_length=2048,
    tokenizer=tokenizer,
    args=training_params,
    packing=False,
)

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [15]:
trainer.train()

OutOfMemoryError: CUDA out of memory. Tried to allocate 128.00 MiB. GPU 1 has a total capacity of 79.15 GiB of which 15.75 MiB is free. Process 4148410 has 31.00 GiB memory in use. Including non-PyTorch memory, this process has 48.11 GiB memory in use. Of the allocated memory 47.39 GiB is allocated by PyTorch, and 240.94 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [39]:
trainer.save_model("./LlamaModel")
trainer.tokenizer.save_pretrained("./LlamaTokenizer")

('./LlamaTokenizer/tokenizer_config.json',
 './LlamaTokenizer/special_tokens_map.json',
 './LlamaTokenizer/tokenizer.json')