In [None]:
import torch
print(torch.cuda.is_available())
print(torch.cuda.device_count())

In [None]:
from datasets import load_dataset
dataset = load_dataset('ywchoi/pubmed_abstract_0')


In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [None]:
import torch
from transformers import AutoTokenizer , AutoModelForCausalLM , BitsAndBytesConfig

MODEL_HF_ID = "tiiuae/falcon-7b"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True , 
    bnb_4bit_use_double_quant=True  , 
    bnb_4bit_quant_type="nf4" , 
    bnb_4bit_compute_dtype=torch.bfloat16
)

In [None]:

tokenizer = AutoTokenizer.from_pretrained(MODEL_HF_ID , trust_remote_code = True )
model = AutoModelForCausalLM.from_pretrained(MODEL_HF_ID , quantization_config = bnb_config , device_map="auto" , trust_remote_code = True)


In [None]:
from peft import prepare_model_for_kbit_training

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [None]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [None]:
model.lm_head

In [None]:
from torch import nn
for param in model.parameters():
  param.requires_grad = False  # freeze the model - train adapters later
  if param.ndim == 1:
    # cast the small parameters (e.g. layernorm) to fp32 for stability
    param.data = param.data.to(torch.float32)

model.gradient_checkpointing_enable()  # reduce number of stored activations
model.enable_input_require_grads()

class CastOutputToFloat(nn.Sequential):
  def forward(self, x): return super().forward(x).to(torch.float32)
model.lm_head = CastOutputToFloat(model.lm_head)

In [None]:
model

In [None]:
from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r=8, 
    lora_alpha=32, 
    lora_dropout=0.05, 
    bias="none", 
    task_type="CAUSAL_LM" , 
     target_modules=[
        "query_key_value",
        "dense",
        "dense_h_to_4h",
        "dense_4h_to_h",
    ]
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

In [None]:
# from datasets import load_dataset
# dataset = load_dataset('csv', data_files={
#     "train": "./Dataset/train.csv" , 
#     'test': "./Dataset/test.csv" , 
#     'valid' : './Dataset/valid.csv'
#     })


In [None]:
dataset['train'][0]

In [None]:
from pprint import pprint
# def merge_columns(example):
#     example["prediction"] = example["Abstract"] + " ->: " + str(example["Relations"])
#     return example

# dataset['train'] = dataset['train'].map(merge_columns)
# dataset['test'] = dataset['test'].map(merge_columns)
# dataset['valid'] = dataset['valid'].map(merge_columns)
# pprint(dataset['train']["prediction"][:1])
# pprint(dataset['test']["prediction"][:1])
# pprint(dataset['valid']["prediction"][:1])

In [None]:
dataset = dataset.map(lambda samples: tokenizer(samples['text'] , max_length=2048 , truncation=True), batched=True)

In [None]:
dataset

In [None]:
print(dataset['train']['input_ids'][0])

In [None]:
import transformers

# needed for gpt-neo-x tokenizer
tokenizer.pad_token = tokenizer.eos_token

In [None]:
len(dataset['train'])

In [None]:

trainer = transformers.Trainer(
    model=model,
    train_dataset=dataset["train"],
    # eval_dataset = dataset['valid'] , 
    args=transformers.TrainingArguments(
        do_eval = False , 
        auto_find_batch_size = True , 
        # per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        warmup_steps=200,
        num_train_epochs=10,
        learning_rate=2e-4,
        bf16=True,
        logging_steps=10,
        output_dir="outputs",
        optim="paged_adamw_8bit" , 
        dataloader_drop_last= True , 
        # eval_steps = 100
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False  # silence the warnings. Please re-enable for inference!

In [None]:
trainer.train()

In [None]:
save_directory = "./Model_Medical"
model.save_pretrained(save_directory)
tokenizer.save_pretrained(save_directory)

Test Model

In [None]:
test_prompt = "Although clinical studies have not established a cause and effect relationship, physicians should be aware that variable effects an blood coagulation have been reported very rarely in patients receiving oral anticoagulants and chlordiazepoxide. The concomitant use of alcohol or other central nervous system depressants may have an additive effect."

In [None]:
inputs = tokenizer.encode_plus(test_prompt, return_tensors="pt" )

In [None]:
input_ids = inputs["input_ids"].to('cuda')
attention_mask = inputs["attention_mask"].to('cuda')

In [None]:
with torch.no_grad():
    output = model.generate(input_ids =input_ids , attention_mask = attention_mask , max_length=512, num_return_sequences=1)

In [None]:
output[0]

In [None]:
generated_text = tokenizer.decode(output[0])
print(generated_text)