Check out the Google Colab Notebook: [Open in Colab](https://colab.research.google.com/drive/1B7qX8crGP2Fb3gDqh7REI7brRz0QVsYR?usp=sharing)

In [None]:
pip install -U transformers datasets peft accelerate

In [None]:
!pip uninstall -y transformers
!pip install git+https://github.com/huggingface/transformers.git

In [None]:
pip install trl

In [None]:
pip install -U bitsandbytes

In [None]:
import json
import os
import pandas as pd
import torch
from datasets import Dataset
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
BitsAndBytesConfig,
DataCollatorForLanguageModeling
)
from peft import (
LoraConfig,
get_peft_model,
prepare_model_for_kbit_training
)
from trl import SFTTrainer, SFTConfig

In [None]:
def load_text_label_csv(file_path):
  try:
    df = pd.read_csv(file_path, encoding='utf-8')
  except UnicodeDecodeError:
    try:
      df = pd.read_csv(file_path, encoding='latin1')
    except UnicodeDecodeError:
      df = pd.read_csv(file_path, encoding='ISO-8859-1')


  df = df.iloc[:, :2]
  df.columns = ["text", "label"]


  df['formatted'] = df.apply(
  lambda row: f"### Text:\n{row['text']}\n\n### Label:\n{row['label']}",
  axis=1
  )


  return Dataset.from_pandas(df[['formatted']])


In [None]:
def fine_tune_phi2(dataset, base_model="microsoft/phi-2", save_path="./phi2-finetuned"):
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16
    )

    model = AutoModelForCausalLM.from_pretrained(
        base_model,
        quantization_config=bnb_config,
        device_map="auto",
        torch_dtype=torch.bfloat16
    )
    tokenizer = AutoTokenizer.from_pretrained(base_model)
    tokenizer.pad_token = tokenizer.eos_token

    model = prepare_model_for_kbit_training(model)
    peft_config = LoraConfig(
        r=16,
        lora_alpha=32,
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM",
        target_modules=["q_proj", "v_proj"]
    )
    model = get_peft_model(model, peft_config)

    # Tokenize dataset with max_length control
    def tokenize_function(example):
        return tokenizer(
            example["formatted"],
            truncation=True,
            padding="max_length",
            max_length=1024
        )

    tokenized_dataset = dataset.map(tokenize_function, batched=True)

    data_collator = DataCollatorForLanguageModeling(
        tokenizer=tokenizer,
        mlm=False
    )

    training_args = SFTConfig(
        output_dir="./results",
        per_device_train_batch_size=4,
        gradient_accumulation_steps=2,
        learning_rate=5e-5,
        num_train_epochs=3,
        save_strategy="epoch",
        logging_steps=10,
        bf16=torch.cuda.is_bf16_supported(),
        fp16=not torch.cuda.is_bf16_supported(),
        optim="adamw_torch",
        lr_scheduler_type="cosine",
        warmup_ratio=0.1,
    )

    trainer = SFTTrainer(
        model=model,
        train_dataset=tokenized_dataset,
        peft_config=peft_config,
        args=training_args,
        data_collator=data_collator,
        processing_class=tokenizer
    )

    trainer.train()

    merged_model = model.merge_and_unload()
    merged_model.save_pretrained(save_path)
    tokenizer.save_pretrained(save_path)

    config_path = os.path.join(save_path, "config.json")
    if os.path.exists(config_path):
        with open(config_path, 'r') as f:
            config = json.load(f)
        config['model_type'] = 'phi'
        with open(config_path, 'w') as f:
            json.dump(config, f, indent=2)

    return merged_model, tokenizer


In [None]:
import json
import os
import torch


class CodeGenAssistant:
    """
    Generates answers from prompt using the fine-tuned Phi-2 model.
    """
    def __init__(self, model_dir):
        self.tokenizer = AutoTokenizer.from_pretrained(model_dir)
        self.model = AutoModelForCausalLM.from_pretrained(
            model_dir,
            device_map="auto",
            torch_dtype=torch.float16
        )

    def generate_response(self, prompt, max_new_tokens=2048):
        """
        Generate response for a given instruction-style prompt.
        Example input:
        "Instruct:What are the treatments for Acanthoma \n Output:"
        """
        inputs = self.tokenizer(prompt, return_tensors="pt", return_attention_mask=False).to("cuda")
        outputs = self.model.generate(
            **inputs,
            max_length=max_new_tokens,
            temperature=0.7,
            top_p=0.9,
            repetition_penalty=1.1,
            do_sample=True
        )
        text = self.tokenizer.batch_decode(outputs[0], skip_special_tokens=True)
        return ''.join(text)

    def instruct(self, question, max_new_tokens=150):
        """
        Convenience wrapper: takes a plain question and formats it into
        "Instruct:... \n Output:" automatically.
        """
        formatted_prompt = f"Instruct:{question}\n Output:"
        return self.generate_response(formatted_prompt, max_new_tokens=max_new_tokens)


In [None]:
# Step 1: Load dataset
dataset = load_text_label_csv("/content/drive/MyDrive/SLM/qwen_multitask_finetune.csv")

# Step 2: Fine-tune
model, tokenizer = fine_tune_phi2(dataset)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Map:   0%|          | 0/210 [00:00<?, ? examples/s]



Truncating train dataset:   0%|          | 0/210 [00:00<?, ? examples/s]

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 50256}.
  | |_| | '_ \/ _` / _` |  _/ -_)


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mahana02biswas[0m ([33mahana02biswas-chennai-mathematical-institute[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss
10,2.1041
20,1.9019
30,1.6015
40,1.3073
50,1.0579
60,0.9219
70,0.8576
80,0.829




In [19]:
assistant = CodeGenAssistant("/content/drive/MyDrive/Phi-2 for medical/phi2-finetuned")
print(assistant.instruct("What are the treatments for Acanthoma?"))


The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Instruct:What are the treatments for Acanthoma?
 Output: Treatment options for Acanthoma include surgical removal, cryotherapy, and laser therapy. In some cases, topical medications may also be used to reduce inflammation and shrink the tumor. However, it is important to consult with a dermatologist to determine the best treatment plan based on individual circumstances.




In [20]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load base (pretrained) microsoft phi-2
model_name = "microsoft/phi-2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=torch.float16
)

def test_phi2_before_finetune(question, max_new_tokens=100):
    """
    Test microsoft/phi-2 model before finetuning
    with a given question in 'Instruct ... Output:' format.
    """
    prompt = f"Instruct:{question}\n Output:"
    inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=False).to("cuda")

    outputs = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        temperature=0.7,
        top_p=0.9,
        repetition_penalty=1.1,
        do_sample=True
    )
    text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return text.split("Output:")[-1].strip()


# Example usage
response = test_phi2_before_finetune("What are the treatments for Acanthoma?")
print("Model response before fine-tuning:\n", response)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Model response before fine-tuning:
 Acanthoma can be treated with various methods, such as excisional surgery to remove the tumor, cryotherapy (freezing), topical application of medications like 5-fluorouracil or imiquimod, radiation therapy, and systemic chemotherapy. However, it is important to consult a doctor before trying any treatment options.
