In [None]:
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer

base_model_name = "Qwen/Qwen2.5-0.5B" #path/to/your/model/or/name/on/hub"
adapter_model_name = "adapter"
device = "cuda" # or "cuda" if you have a GPU

model = AutoModelForCausalLM.from_pretrained(base_model_name).to(device)
model = PeftModel.from_pretrained(model, adapter_model_name)
tokenizer = AutoTokenizer.from_pretrained(base_model_name)

In [None]:
inputs = tokenizer.encode("What is glaucoma?", return_tensors="pt").to(device)
outputs = model.generate(inputs)
print(tokenizer.decode(outputs[0]))

In [None]:
# Initialize the DataProcessor with the path to the MLE screening dataset
from data_processing import DataProcessor


dp = DataProcessor("files/mle_screening_dataset.csv")    

# Load the datasets
mle_set = dp.load_data_screening()
# pub_set = dp.load_data_pubmedqa()

# Concatenate datasets
# dataset = dp.concatenate_datasets(mle_set, pub_set)

# Split the dataset into training and validation sets
train, validation, test = dp.train_test_validation_split(mle_set)
# Add an 'id' column to each dataset
train = train.map(lambda example, idx: {**example, "id": idx}, with_indices=True)
validation = validation.map(lambda example, idx: {**example, "id": idx}, with_indices=True)
test = test.map(lambda example, idx: {**example, "id": idx}, with_indices=True)
train, validation, test = dp.format_dataset_for_conversational_ai(train), dp.format_dataset_for_conversational_ai(validation), dp.format_dataset_for_conversational_ai(test)

In [None]:
chat = [
    {"role": "system", "content": "You are helpful"},
    {"role": "user", "content": "What is (are) Surviving Cancer ?"},
]

import torch
from transformers import pipeline

pipeline = pipeline(task="text-generation", model="qwen_0.5_mle/", torch_dtype=torch.bfloat16, device_map="cuda", tokenizer=tokenizer)
# response = pipeline(chat, max_new_tokens=512)


In [None]:
from evaluate_model import evaluator

task_evaluator = evaluator("text-generation")
results = task_evaluator.compute(
    model_or_pipeline=pipeline,
    data=validation.select(range(5)),
    metric=["f1", "rouge", "bleu"],
    input_column="messages",
)
print(results)