<a href="https://colab.research.google.com/github/WeiKuoLi/LLM_bookclub/blob/main/phi_1_5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install accelerate transformers einops datasets peft bitsandbytes --upgrade



In [None]:
from peft import PeftModel
from transformers import AutoModelForCausalLM
import torch
from datasets import load_dataset, Dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForLanguageModeling, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model
import os

In [None]:
tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-1_5", trust_remote_code=True)

tokenizer.pad_token = tokenizer.eos_token

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

model = AutoModelForCausalLM.from_pretrained(
    "microsoft/phi-1_5",
    device_map={"":0},
    trust_remote_code=True,
    quantization_config=bnb_config
)

In [None]:
model
device = next(model.parameters()).device
device.type

'cuda'

In [None]:
lora_config = LoraConfig(
    r=20,
    lora_alpha=16,
    target_modules=["q_proj", "k_proj", "v_proj", "dense", "fc1", "fc2"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)

model.print_trainable_parameters()

trainable params: 17,694,720 || all params: 1,435,965,440 || trainable%: 1.2322524976645677


In [None]:
def tokenize(sample):
    tokenized_text =  tokenizer(sample["text"], padding=True, truncation=True, max_length=512)
    return tokenized_text

data = load_dataset("BI55/MedText", "default", split="train")

data_df = data.to_pandas()
data_df["text"] = data_df[["Prompt", "Completion"]].apply(lambda x: "Prompt: " + x["Prompt"] + " Completion: " + x["Completion"], axis=1)


In [None]:
data_df.head(), len(data_df)

(                                              Prompt  \
 0  A 50-year-old male presents with a history of ...   
 1  A 7-year-old boy presents with a fever, headac...   
 2  A 35-year-old woman presents with a persistent...   
 3  A 50-year-old male presents with severe abdomi...   
 4  A newborn baby presents with eye redness and a...   
 
                                           Completion  \
 0  This patient's history of recurrent kidney sto...   
 1  This child's symptoms of a red, bulging tympan...   
 2  While the symptoms might initially suggest ast...   
 3  The patient's symptoms suggest an incarcerated...   
 4  The infant's symptoms suggest neonatal conjunc...   
 
                                                 text  
 0  Prompt: A 50-year-old male presents with a his...  
 1  Prompt: A 7-year-old boy presents with a fever...  
 2  Prompt: A 35-year-old woman presents with a pe...  
 3  Prompt: A 50-year-old male presents with sever...  
 4  Prompt: A newborn baby prese

In [None]:
data = Dataset.from_pandas(data_df)

tokenized_data = data.map(tokenize, batched=True, desc="Tokenizing data", remove_columns=data.column_names)

tokenized_data

Tokenizing data:   0%|          | 0/1412 [00:00<?, ? examples/s]

Dataset({
    features: ['input_ids', 'attention_mask'],
    num_rows: 1412
})

In [None]:
training_arguments = TrainingArguments(
        output_dir="phi-1_5-finetuned-med-text-high",
        per_device_train_batch_size=7,
        gradient_accumulation_steps=1,
        learning_rate=2e-4,
        lr_scheduler_type="cosine",
        save_strategy="epoch",
        logging_steps=50,
        max_steps=1000,
        num_train_epochs=1
    )

In [None]:
trainer = Trainer(
    model=model,
    train_dataset=tokenized_data,
    args=training_arguments,
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
trainer.train()

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss
50,1.5784
100,1.4446
150,1.4047
200,1.3806
250,1.2506
300,1.254
350,1.2417
400,1.252
450,1.0917
500,1.072


TrainOutput(global_step=1000, training_loss=1.1185302772521972, metrics={'train_runtime': 1098.4158, 'train_samples_per_second': 6.373, 'train_steps_per_second': 0.91, 'total_flos': 1.301133421215744e+16, 'train_loss': 1.1185302772521972, 'epoch': 4.95})

In [None]:
model.save_pretrained("phi-1_5-finetuned-med-text-high")

In [None]:
model

In [None]:
model = AutoModelForCausalLM.from_pretrained("microsoft/phi-1_5", trust_remote_code=True, torch_dtype=torch.float32)

peft_model = PeftModel.from_pretrained(model, "phi-1_5-finetuned-med-text-high", from_transformers=True)

model = peft_model.merge_and_unload()

In [None]:
model

PhiForCausalLM(
  (model): PhiModel(
    (embed_tokens): Embedding(51200, 2048)
    (embed_dropout): Dropout(p=0.0, inplace=False)
    (layers): ModuleList(
      (0-23): 24 x PhiDecoderLayer(
        (self_attn): PhiAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=True)
          (k_proj): Linear(in_features=2048, out_features=2048, bias=True)
          (v_proj): Linear(in_features=2048, out_features=2048, bias=True)
          (dense): Linear(in_features=2048, out_features=2048, bias=True)
          (rotary_emb): PhiRotaryEmbedding()
        )
        (mlp): PhiMLP(
          (activation_fn): NewGELUActivation()
          (fc1): Linear(in_features=2048, out_features=8192, bias=True)
          (fc2): Linear(in_features=8192, out_features=2048, bias=True)
        )
        (input_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
        (resid_dropout): Dropout(p=0.0, inplace=False)
      )
    )
    (final_layernorm): LayerNorm((2048,), e

In [None]:

model.save_pretrained("phi-1_5-finetuned-med-text-high")

# model.push_to_hub("llm-exp/phi-1_5-finetuned-med-text")

In [None]:
from transformers import AutoModel

# Replace 'path_to_your_model_directory' with the actual path to your model directory
model_path = "./phi-1_5-finetuned-med-text-high"

# Load the model from the local directory
model = AutoModel.from_pretrained(model_path)

# Now you can use the model as usual


In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

#model = AutoModelForCausalLM.from_pretrained("phi-1_5-finetuned-med-text", trust_remote_code=True, torch_dtype=torch.float32)


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-1_5", trust_remote_code=True)

inputs = tokenizer('Prompt: I am allergic to peanuts, can i eat cashew? Completion:', return_tensors="pt", return_attention_mask=False)
inputs.to(device)
outputs = model.generate(**inputs, max_length=512)

text = tokenizer.batch_decode(outputs)[0]

print(text)

Prompt: I am allergic to peanuts, can i eat cashew? Completion: Yes, you can eat cashews if you are not allergic to other nuts. What's important is to avoid peanuts if you are allergic. If you are not allergic, cashews are a safe option. Just be aware that cashews are still a nut and may contain traces of peanuts, so it's possible to have a reaction even if you are not allergic to peanuts. If you have a severe allergy, it's best to avoid all nuts. If you are unsure about the allergen content of a food, it's best to avoid it. If you do eat a food and have a reaction, carry your epinephrine auto-injector and seek medical attention immediately. If you have a known allergy, it's also important to carry an epinephrine auto-injector and to inform others about your allergy. If you have a severe allergy, you should carry an epinephrine auto-injector at all times. If you have a mild allergic reaction, you may be able to treat it at home with over-the-counter antihistamines. If you have a severe

In [None]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Token: 
Add token as git credential? (Y/n) Y
Token is valid (permission: write).
[1m[31mCannot authenticate through 

In [None]:
!huggingface-cli upload phi-1_5-finetuned-med-text-high

Consider using `hf_transfer` for faster uploads. This solution comes with some limitations. See https://huggingface.co/docs/huggingface_hub/hf_transfer for more details.

adapter_model.safetensors:   0% 0.00/70.8M [00:00<?, ?B/s]


rng_state.pth:   0% 0.00/14.2k [00:00<?, ?B/s][A[A[A

optimizer.pt:   0% 0.00/142M [00:00<?, ?B/s][A[A



Upload 27 LFS files:   0% 0/27 [00:00<?, ?it/s][A[A[A[A




adapter_model.safetensors:   0% 16.4k/70.8M [00:00<17:26, 67.6kB/s]
adapter_model.safetensors:   0% 16.4k/70.8M [00:00<18:02, 65.4kB/s][A

optimizer.pt:   0% 16.4k/142M [00:00<35:26, 66.7kB/s][A[A


rng_state.pth: 100% 14.2k/14.2k [00:00<00:00, 53.3kB/s][A[A[A




scheduler.pt: 100% 1.06k/1.06k [00:00<00:00, 4.05kB/s][A[A[A[A[A

optimizer.pt:   3% 4.72M/142M [00:00<00:08, 17.0MB/s][A[A
scheduler.pt: 100% 1.06k/1.06k [00:00<00:00, 2.74kB/s]
rng_state.pth: 100% 14.2k/14.2k [00:00<00:00, 34.0kB/s]
adapter_model.safetensors:  10% 7.11M/70.8M [00:00<00:03, 20.2MB/s]
adapter_mode