In [None]:
!nvidia-smi

In [None]:
!pip install peft trl datasets accelerate evaluate bitsandbytes transformers loralib flash-attn

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Before training
model_name = "microsoft/phi-3-mini-128k-instruct"
tokenizer_name = model_name

tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)

# tokenizer.padding_side = "left"
# tokenizer.pad_token_id = tokenizer.eos_token_id

tokenizer.pad_token = tokenizer.unk_token  # use unk rather than eos token to prevent endless generation
tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)
tokenizer.padding_side = 'right'


model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True,
    use_cache=False,
    # use_flash_attention_2=True,
    attn_implementation="flash_attention_2",  # loading the model with flash-attenstion support
    torch_dtype=torch.bfloat16,
)

In [None]:
from peft import LoraConfig
from transformers import TrainingArguments

training_config = {
    "bf16": True,
    "do_eval": False,
    "learning_rate": 5.0e-06,
    "log_level": "info",
    "logging_steps": 20,
    "logging_strategy": "steps",
    "lr_scheduler_type": "cosine",
    "num_train_epochs": 1,
    "max_steps": -1,
    "output_dir": "./checkpoint_dir",
    "overwrite_output_dir": True,
    "per_device_eval_batch_size": 4,
    "per_device_train_batch_size": 4,
    "remove_unused_columns": True,
    "save_steps": 100,
    "save_total_limit": 1,
    "seed": 0,
    "gradient_checkpointing": True,
    "gradient_checkpointing_kwargs":{"use_reentrant": False},
    "gradient_accumulation_steps": 1,
    "warmup_ratio": 0.2,
    }

peft_config = {
    "r": 16,
    "lora_alpha": 32,
    "lora_dropout": 0.05,
    "bias": "none",
    "task_type": "CAUSAL_LM",
    "target_modules": "all-linear",
    "modules_to_save": None,
}
train_conf = TrainingArguments(**training_config)
peft_conf = LoraConfig(**peft_config)

In [None]:
from datasets import load_dataset
train_dataset = load_dataset("csv", data_files='./Fine-tuning instruction set, Jul 18.csv')['train']

def apply_chat_template(
    row,
    tokenizer,
):
    messages = []

    system = {
        "content": """
You are an AI trained to assist as an HIV drug resistance professional researcher. Your role involves providing in-depth knowledge on the mechanisms of HIV drug resistance, current research, treatment options, and emerging trends in the field. You should be able to:
1. Explain the mechanisms of action of various antiretroviral drugs and how resistance develops.
2. Provide insights into the latest research and developments in HIV drug resistance.
3. Offer guidance on treatment strategies for patients with drug-resistant HIV.
4. Discuss the implications of drug resistance on public health policies and treatment protocols.
5. Interpret and analyze scientific data and research findings related to HIV drug resistance.
6. Stay updated with recent publications, studies, and clinical trials in the field.
7. Answer questions with a focus on accuracy, clarity, and the latest scientific evidence.
8. Provide references to reputable sources and research papers to support your explanations.

Your responses should reflect a deep understanding of virology, pharmacology, and clinical practice related to HIV. Ensure that your language is precise, professional, and accessible to both healthcare professionals and researchers.
""",
        "role": "system"
    }
    messages.append(system)

    # Create a 'user' message dictionary with 'content' and 'role' keys.
    user = {
        "content": f"""
# See below is my request

Given a question in `Question` section below,
try to answer it using the content quoted in triple backticks,
explain how you found the answer from the content in details, store as `Rationale`.
format your answer as follows

'''
Answer: <answer>
Rationale: <explain>
'''

## Question

{row['Question']}

## Content

```
{row['Reference Sentences']}
```
        """,
        "role": "user"
    }

    # Append the 'user' message to the 'messages' list.
    messages.append(user)

    # Create an 'assistant' message dictionary with 'content' and 'role' keys.
    assistant = {
        "content": f"""
Answer: {row['Answer']}
Rationale: {row['Rationale']}
        """,
        "role": "assistant"
    }

    # Append the 'assistant' message to the 'messages' list.
    messages.append(assistant)

    # Return a dictionary with a 'messages' key and the 'messages' list as its value.
    # return {"messages": messages}
    # row['text'] = messages

    # row["text"] = tokenizer.apply_chat_template(
        # messages, tokenize=False, add_generation_prompt=False)
    return {'messages': messages}

column_names = list(train_dataset.features)

processed_train_dataset = train_dataset.map(
    apply_chat_template,
    fn_kwargs={"tokenizer": tokenizer},
    num_proc=10,
    remove_columns=column_names,
    desc="Applying chat template",
)

# print(processed_train_dataset['text'][0])

# the test set need change to another source
processed_test_dataset = processed_train_dataset

In [None]:
from trl import SFTTrainer

trainer = SFTTrainer(
    model=model,
    args=train_conf,
    peft_config=peft_conf,
    train_dataset=processed_train_dataset,
    # eval_dataset=processed_test_dataset,
    # dataset_text_field="text",
    tokenizer=tokenizer,
    max_seq_length=4096,
    # packing=True
)
train_result = trainer.train()
metrics = train_result.metrics
trainer.log_metrics("train", metrics)
trainer.save_metrics("train", metrics)
trainer.save_state()


In [None]:
trainer.model.save_pretrained('./hivdb')

In [None]:
model2 = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True,
    use_cache=False,
    # use_flash_attention_2=True,
    attn_implementation="flash_attention_2",  # loading the model with flash-attenstion support
    torch_dtype=torch.bfloat16,
)
model2.load_adapter('./hivdb')

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

model = AutoModelForCausalLM.from_pretrained('./hivdb')

tokenizer = AutoTokenizer.from_pretrained(model_name)

messages = [
    {"role": "system", "content": ""},
    {"role": "user", "content": ""},
]

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
)

generation_args = {
    "max_new_tokens": 1000,
    "return_full_text": True,
    "temperature": 0.0,
    "do_sample": False,
}

output = pipe(messages, **generation_args)
print(output[0]['generated_text'])