In [None]:
! pip install datasets

In [None]:
import os
import torch
import pandas as pd
import wandb
from datasets import Dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForLanguageModeling
from peft import LoraConfig, get_peft_model

# # Initialize Weights & Biases
# wandb.login()  # Ensure you're logged into WandB
# wandb.init(project="medical-chatbot", name="fine-tuning", config={})
os.environ["WANDB_DISABLED"] = "true"
# ---------------------------
# Step 1: Prepare Dummy Data
# ---------------------------
# Load and sample dataset
df = pd.read_csv("/kaggle/input/processed-medquad2/processed_medquad.csv")
df = df.sample(frac=1.0, random_state=42).reset_index(drop=True)
df_small = df.iloc[:100]  # ✅ Use small sample (change number if needed)

# Convert to Hugging Face Dataset
dataset = Dataset.from_pandas(df_small)

# Train/test split
split = dataset.train_test_split(test_size=0.2)
train_dataset = split["train"]
test_dataset = split["test"]


# ---------------------------
# Step 2: Model and Tokenizer Setup
# ---------------------------
MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
OUTPUT_DIR = "./models/medical_chatbot_finetuned"
USE_QLORA = False  # Set to False for LoRA, True for QLoRA if you want quantized weights

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token  # Set padding token

# ---------------------------
# Step 3: Prepare Dataset
# ---------------------------
def format_instruction(example):
    """Format each example into an instruction format for medical Q&A."""
    question = example["Question_Sentences"]
    answer = example["Answer_Sentences"]

    instruction = (
        f"### Instruction:\nAnswer the following medical question in a concise and accurate manner.\n\n"
        f"### Question:\n{question}\n\n"
        f"### Answer:\n{answer}"
    )
    return {"formatted_text": instruction}

print("Formatting dataset...")
train_formatted = train_dataset.map(format_instruction)
test_formatted = test_dataset.map(format_instruction)

def tokenize_function(examples):
    """Tokenize the dataset."""
    return tokenizer(
        examples["formatted_text"],
        truncation=True,
        max_length=512,
        padding="max_length",
    )

print("Tokenizing dataset...")
train_tokenized = train_formatted.map(
    tokenize_function,
    batched=True,
    remove_columns=train_formatted.column_names
)

test_tokenized = test_formatted.map(
    tokenize_function,
    batched=True,
    remove_columns=test_formatted.column_names
)

# ---------------------------
# Step 4: Apply LoRA or QLoRA
# ---------------------------
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="auto")

# Apply LoRA Configuration
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

# ---------------------------
# Step 5: Training Setup
# ---------------------------
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    num_train_epochs=3,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    warmup_steps=50,
    logging_dir='./logs',
    logging_steps=10,
    eval_steps=100,  # Evaluate every 100 steps
    save_steps=100,  # Save the model every 100 steps
    save_total_limit=1,
    fp16=True,  # Use if you're on a GPU that supports it
)


data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

# ---------------------------
# Step 6: Trainer Setup
# ---------------------------
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_tokenized,
    eval_dataset=test_tokenized,
    tokenizer=tokenizer,
    data_collator=data_collator
)

# ---------------------------
# Step 7: Train the Model
# ---------------------------
trainer.train()

# ---------------------------
# Step 8: Save the Model
# ---------------------------
trainer.save_model(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)

# # Log final model information to WandB
# wandb.log({"model_output_dir": OUTPUT_DIR})

print("✅ Fine-tuning complete and model saved.")


In [None]:
# Evaluate the fine-tuned model on the test dataset
eval_results = trainer.evaluate(eval_dataset=test_tokenized)

# Print the evaluation results (loss, metrics)
print(f"Evaluation results: {eval_results}")


In [None]:
!pip install evaluate

In [None]:
!pip install sacrebleu

In [None]:
!pip install rouge_score

In [None]:
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from evaluate import load
import sacrebleu
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer


In [None]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)


def generate_answer(question):
    prompt = (
        f"### Instruction:\nAnswer the following medical question in a concise and accurate manner.\n\n"
        f"### Question:\n{question}\n\n### Answer:"
    )
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(inputs["input_ids"], max_length=256)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

sample_q = "How to diagnose Crimean-Congo Hemorrhagic Fever (CCHF)?"

print("🔍 Sample Question:", sample_q)
print("💬 Model Answer:", generate_answer(sample_q))


# Ensure NLTK tokenizer is ready
nltk.download("punkt")

# Expected answer (ground truth)
expected_answer = """Laboratory tests that are used to diagnose CCHF include antigen-capture enzyme-linked immunosorbent assay (ELISA), real time polymerase chain reaction (RT-PCR), virus isolation attempts, and detection of antibody by ELISA (IgG and IgM). Laboratory diagnosis of a patient with a clinical history compatible with CCHF can be made during the acute phase of the disease by using the combination of detection of the viral antigen (ELISA antigen capture), viral RNA sequence (RT-PCR) in the blood or in tissues collected from a fatal case and virus isolation. Immunohistochemical staining can also show evidence of viral antigen in formalin-fixed tissues. Later in the course of the disease, in people surviving, antibodies can be found in the blood. But antigen, viral RNA and virus are no more present and detectable."""

# Generate answer from fine-tuned model
sample_q = "How to diagnose Crimean-Congo Hemorrhagic Fever (CCHF)?"
generated_answer = generate_answer(sample_q)

# Clean decoded output
generated_answer_clean = generated_answer.replace(sample_q, "").replace("### Instruction:", "").replace("### Question:", "").replace("### Answer:", "").strip()

print("🔍 Sample Question:", sample_q)
print("💬 Generated Answer:\n", generated_answer_clean)
print("📘 Expected Answer:\n", expected_answer)

# Tokenization
ref_tokens = nltk.word_tokenize(expected_answer.lower())
gen_tokens = nltk.word_tokenize(generated_answer_clean.lower())

# BLEU Scores
smoothing = SmoothingFunction().method1
print("\n📊 BLEU Scores:")
print("BLEU-1:", sentence_bleu([ref_tokens], gen_tokens, weights=(1, 0, 0, 0), smoothing_function=smoothing))
print("BLEU-2:", sentence_bleu([ref_tokens], gen_tokens, weights=(0.5, 0.5, 0, 0), smoothing_function=smoothing))
print("BLEU-3:", sentence_bleu([ref_tokens], gen_tokens, weights=(0.33, 0.33, 0.33, 0), smoothing_function=smoothing))
print("BLEU-4:", sentence_bleu([ref_tokens], gen_tokens, weights=(0.25, 0.25, 0.25, 0.25), smoothing_function=smoothing))

# ROUGE
rouge = load("rouge")
rouge_score = rouge.compute(predictions=[generated_answer_clean], references=[expected_answer])
print("\n📊 ROUGE Score:", rouge_score)

# F1 Score (word-level overlap)
def f1_from_tokens(ref_tokens, gen_tokens):
    ref_set = set(ref_tokens)
    gen_set = set(gen_tokens)
    true_positives = len(ref_set & gen_set)
    precision = true_positives / len(gen_set) if gen_set else 0
    recall = true_positives / len(ref_set) if ref_set else 0
    return 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0

f1_score = f1_from_tokens(ref_tokens, gen_tokens)
print("📊 F1 Score:", round(f1_score, 4))

# SacreBLEU
sacrebleu_score = sacrebleu.corpus_bleu([generated_answer_clean], [[expected_answer]])
print("📊 SacreBLEU Score:", round(sacrebleu_score.score, 2))


In [None]:
pip install transformers datasets wandb


In [None]:
pip install --upgrade transformers
