In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from datasets import load_dataset
from peft import LoraConfig, get_peft_model
import torch
from huggingface_hub import notebook_login

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
device = "mps" if torch.backends.mps.is_available() else "cpu"

In [5]:
model_name = "openai-community/gpt2"
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float32,  # MPS requires float32
    device_map={"": device} if device == "mps" else "cpu",
)

model.gradient_checkpointing_enable()
print("Model loaded successfully!")

Model loaded successfully!


In [6]:
dataset_path = "/Users/priyal/Documents/honours/datasets/qa_dataset_cleaned.csv"
dataset = load_dataset("csv", data_files=dataset_path, split="train")

Generating train split: 576 examples [00:00, 21636.00 examples/s]


In [7]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token  # Ensure padding token is set

In [8]:
def tokenize_function(examples):
    formatted_texts = [
        f"Instruction: {i}\nInput: {inp}\nOutput: {outp}"
        for i, inp, outp in zip(
            examples["instruction"], examples["input"], examples["output"]
        )
    ]

    tokenized_inputs = tokenizer(
        formatted_texts,
        padding="max_length",
        truncation=True,
        max_length=256,
        return_tensors="pt",
    )

    labels = tokenized_inputs["input_ids"].clone()
    labels[:, :-1] = tokenized_inputs["input_ids"][..., 1:].clone()
    labels[:, -1] = -100  # Ignore loss for last token
    tokenized_inputs["labels"] = labels

    return tokenized_inputs

In [9]:
tokenized_datasets = dataset.map(
    tokenize_function, batched=True, remove_columns=dataset.column_names
)

Map: 100%|██████████| 576/576 [00:00<00:00, 2483.25 examples/s]


In [10]:
peft_config = LoraConfig(
    task_type="CAUSAL_LM",
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=["c_attn"],  # Adjusted for GPT-2
    inference_mode=False,
)

model = get_peft_model(model, peft_config)
model.to(device)
print("LoRA applied successfully")

'NoneType' object has no attribute 'cadam32bit_grad_fp32'
LoRA applied successfully


  warn("The installed version of bitsandbytes was compiled without GPU support. "


In [14]:
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="no",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    num_train_epochs=3,
    logging_dir="./logs",
    logging_steps=5,
    save_total_limit=2,
    report_to="tensorboard",
    fp16=False,  # FP16 not supported on MPS
    max_grad_norm=1.0,
    dataloader_num_workers=4,
    remove_unused_columns=True,
)

# Trainer setup
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets,
    tokenizer=tokenizer,
)

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [15]:
print("Starting fine-tuning on MPS")
trainer.train()
print("Fine-tuning complete")

Starting fine-tuning on MPS


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

Step,Training Loss
5,9.1258
10,9.299
15,9.1085
20,9.0825
25,9.0779
30,9.2095
35,9.0238
40,9.0788
45,9.1029
50,8.9364


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

Fine-tuning complete


In [16]:
model.save_pretrained("./gpt2_finetuned")
tokenizer.save_pretrained("./gpt2_finetuned")
print("Model saved to ./gpt2_finetuned")

Model saved to ./gpt2_finetuned


In [24]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel, PeftConfig
import torch

# First check if the PEFT adapter exists and is valid
model_path = "./gpt2_finetuned"
try:
    config = PeftConfig.from_pretrained(model_path)
    print(f"Found PEFT config: {config}")
    print(f"Base model: {config.base_model_name_or_path}")
except Exception as e:
    print(f"Error loading PEFT config: {e}")
    print("This suggests the model wasn't properly saved as a PEFT model")

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2")
tokenizer.pad_token = tokenizer.eos_token

# Load base model
base_model = AutoModelForCausalLM.from_pretrained(
    "openai-community/gpt2",
    torch_dtype=torch.float32,
    device_map={"": "mps"} if torch.backends.mps.is_available() else "cpu",
)

# Load PEFT adapter
try:
    model = PeftModel.from_pretrained(base_model, model_path)
    print("Successfully loaded PEFT model")
except Exception as e:
    print(f"Error loading PEFT model: {e}")
    # Fallback to direct loading if PEFT loading fails
    print("Trying to load model directly...")
    model = AutoModelForCausalLM.from_pretrained(model_path)

model.eval()


def generate_response(instruction, input_text="", max_length=512):
    # Format the prompt exactly like during training
    prompt = f"Instruction: {instruction}\nInput: {input_text}\nOutput:"

    print(f"Input prompt length: {len(tokenizer.encode(prompt))}")
    inputs = tokenizer(prompt, return_tensors="pt").to(
        "mps" if torch.backends.mps.is_available() else "cpu"
    )

    print("Generating response...")
    # Generation with verbose parameters for better results
    output = model.generate(
        **inputs,
        max_new_tokens=300,  # Ensure we generate enough new tokens
        min_new_tokens=50,  # Force model to generate at least this many tokens
        num_beams=5,
        temperature=0.7,
        top_p=0.9,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id,
        no_repeat_ngram_size=3,
        early_stopping=False,
    )

    full_output = tokenizer.decode(output[0], skip_special_tokens=True)
    print(f"Full output: {full_output}")

    # Try to extract just the generated part
    if "Output:" in full_output:
        return full_output.split("Output:")[1].strip()
    return full_output


# Test with proper input context from your training data
instruction = (
    "How can Ayurvedic practices influence my gene expression for better health"
)
input_text = """Ayurveda is a comprehensive, natural health care system that originated in the ancient Vedic times of India. Epigenetics refers to the external modification of DNA that turns genes on and off, affecting gene expression. This occurs without changes in the basic structure of the DNA. This gene expression can have transgenerational effects. The major factors that cause epigenetic changes are lifestyle and behavior, diet and digestion, stress, and environmental factors. Ayurveda addresses these factors, thereby affecting the Deha body Prakriti psychophysiological constitution, which corresponds to the phenotype, and indirectly the Janma birth Prakriti, which corresponds to the genotype. Thus, it is proposed that epigenetics is an important mechanism of Ayurveda. This correlation and understanding will lead to better communication and understanding with the current medical system, and lead to better integration of both sciences in the management of optimal health. In addition, research on Ayurvedic modalities affecting gene expression will further increase correlation and understanding between the current medical system and Ayurveda."""

response = generate_response(instruction, input_text)
print("\n\nExtracted response:")
print(response)

Found PEFT config: LoraConfig(task_type='CAUSAL_LM', peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path='openai-community/gpt2', revision=None, inference_mode=True, r=8, target_modules={'c_attn'}, exclude_modules=None, lora_alpha=32, lora_dropout=0.1, fan_in_fan_out=True, bias='none', use_rslora=False, modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={}, megatron_config=None, megatron_core='megatron.core', loftq_config={}, eva_config=None, use_dora=False, layer_replication=None, runtime_config=LoraRuntimeConfig(ephemeral_gpu_offload=False), lora_bias=False)
Base model: openai-community/gpt2
Successfully loaded PEFT model
Input prompt length: 240
Generating response...
Full output: Instruction: How can Ayurvedic practices influence my gene expression for better health
Input: Ayurveda is a comprehensive, natural health care system that originated in the ancient Vedic times of India. Epig