In [1]:
%%capture
%pip install torch torchvision torchaudio accelerate peft bitsandbytes transformers trl

In [4]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
    Trainer
)
from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model,
)
import os, torch, wandb
from datasets import load_dataset
from sklearn.model_selection import train_test_split
from trl import SFTTrainer

In [6]:
import os
from dotenv import load_dotenv
from huggingface_hub import login
import wandb

# Load environment variables from .env file
load_dotenv()

# Hugging Face login
api_key = os.getenv('HUGGINGFACE_API_KEY')
login(api_key)
print("Hugging Face CLI login with token successful")

# Weights & Biases login
wandb_token = os.getenv('WANDB_API_KEY')
wandb.login(key=wandb_token)
print("Weights & Biases login successful")

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: C:\Users\subhe\_netrc


Hugging Face CLI login with token successful
Weights & Biases login successful


In [10]:
base_model = "meta-llama/Llama-3.2-1B"

In [18]:
# Check CUDA device capability and set torch dtype and attention implementation
if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8:
    try:
        import flash_attn
    except ImportError:
        subprocess.check_call(["pip", "install", "-qqq", "flash-attn"])
        import flash_attn
    torch_dtype = torch.bfloat16
    attn_implementation = "flash_attention_2"
    print("cuda")

    # QLoRA config -- 4bit quantization for GPU
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch_dtype,
        bnb_4bit_use_double_quant=True,
    )

    # Initialize the tokenizer
    tokenizer = AutoTokenizer.from_pretrained(base_model)
    tokenizer.pad_token = tokenizer.eos_token

    # Instantiate the model with the appropriate quantization configuration
    model = AutoModelForCausalLM.from_pretrained(
        base_model,
        quantization_config=bnb_config,
        device_map="auto",
        attn_implementation=attn_implementation
    )
else:
    torch_dtype = torch.float32
    attn_implementation = "eager"
    print("cpu")

    # QLoRA config -- adjusted for CPU
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=False  # Disable 4-bit quantization for CPU
    )

    # Initialize the tokenizer
    tokenizer = AutoTokenizer.from_pretrained(base_model)
    tokenizer.pad_token = tokenizer.eos_token

    # Instantiate the model without quantization configuration for CPU
    model = AutoModelForCausalLM.from_pretrained(
        base_model
    )

cpu


In [21]:
from peft import LoraConfig, TaskType

# Define the PEFT configuration
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "k_proj", "v_proj", "up_proj", "down_proj", "o_proj", "gate_proj"],  # Adapt to your model's architecture
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

# Apply PEFT to the model
model = get_peft_model(model, peft_config)

In [22]:
dataset_path = "dataset/adr-dataset-v1/versions/1/adverse_reactions_dataset.json"
dataset = load_dataset('json', data_files=dataset_path)

def preprocess_data(examples):
    inputs = [
        f"Input: {input_text} Response: {response_text}"
        for input_text, response_text in zip(examples["input_text"], examples["response_text"])
    ]
    # Tokenize
    model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding="max_length")
    return model_inputs

# Apply preprocessing
tokenized_dataset = dataset.map(preprocess_data, batched=True)

# Split the dataset into training and evaluation sets
train_test_split_ratio = 0.8
split_dataset = tokenized_dataset["train"].train_test_split(test_size=1 - train_test_split_ratio, seed=42)

# Access train and evaluation datasets
train_dataset = split_dataset["train"]
eval_dataset = split_dataset["test"]

# Print dataset sizes
print(f"Train dataset size: {len(train_dataset)}")
print(f"Evaluation dataset size: {len(eval_dataset)}")

Generating train split: 27901 examples [00:04, 6880.47 examples/s]
Map: 100%|██████████| 27901/27901 [00:20<00:00, 1344.42 examples/s]

Train dataset size: 22320
Evaluation dataset size: 5581





In [None]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./results",
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=8,
    save_steps=10_000,
    save_total_limit=2,
    logging_dir="./logs",
    logging_steps=500,
)

trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
)

trainer.train()

trainer.model.save_pretrained("llamaDrugLabel++")

  trainer = SFTTrainer(
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Step,Training Loss
