In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install -U transformers datasets peft accelerate bitsandbytes


Collecting datasets
  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting peft
  Downloading peft-0.15.2-py3-none-any.whl.metadata (13 kB)
Collecting accelerate
  Downloading accelerate-1.6.0-py3-none-any.whl.metadata (19 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.13.0->peft)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-non

In [3]:
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
import torch
import os

base_model = "medalpaca/medalpaca-7b"
adapter_path = "/content/drive/MyDrive/openllama-lora-finetuned4"
offload_dir = "/tmp/offload"

# Create offload directory
os.makedirs(offload_dir, exist_ok=True)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    llm_int8_enable_fp32_cpu_offload=True
)

model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",               # Auto-dispatch modules
    offload_folder=offload_dir,      # Offload excess to CPU
    torch_dtype=torch.float16
)

model = PeftModel.from_pretrained(model, adapter_path)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/542 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/28.1k [00:00<?, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/7.18G [00:00<?, ?B/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/9.88G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/9.89G [00:00<?, ?B/s]



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

In [4]:
from transformers import AutoTokenizer, AutoModelForCausalLM
tokenizer = AutoTokenizer.from_pretrained(base_model)
tokenizer.pad_token = tokenizer.eos_token  # to prevent padding errors


tokenizer_config.json:   0%|          | 0.00/260 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/21.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/96.0 [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 - if you loaded a llama tokenizer from a GGUF file you can ignore this message
You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama_fast.LlamaTokenizerFast'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggin

In [5]:
def generate_response(drug_a, drug_b):
    prompt = f"You are a medical expert. Analyze the following drug interaction.\n\nDrug A: {drug_a}\nDrug B: {drug_b}\n\nAnswer:"

    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=200,
            temperature=0.7,
            top_p=0.9,
            repetition_penalty=1.2,
            do_sample=True,
            eos_token_id=tokenizer.eos_token_id
        )

    result = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(result.split("Answer:")[-1].strip())


In [None]:
!pip install gradio

In [28]:
import gradio as gr
import torch

# --- Medication advice tips ---
advice_map = {
    "nausea": "Try taking the medication with food. Stay hydrated.",
    "headache": "Ensure proper rest. Over-the-counter pain relief may help.",
    "rash": "Discontinue use and contact your doctor immediately.",
    "insomnia": "Avoid caffeine, maintain a regular sleep schedule.",
    "dizziness": "Avoid driving or heavy machinery. Sit/lie down until it passes.",
    "fatigue": "Ensure you're sleeping enough. Consider adjusting dosage."
}

# --- Extract tips from response ---
def extract_advice(response_text):
    effects = []
    for keyword in advice_map:
        if keyword in response_text.lower():
            effects.append(f"- **{keyword.capitalize()}**: {advice_map[keyword]}")
    return "\n".join(effects) if effects else "No specific advice found."

# --- Inference function ---
def analyze_drug_interaction(drug_a, drug_b):
    try:
        prompt = (
            f"You are a medical expert. Analyze the following drug interaction.\n\n"
            f"Drug A: {drug_a}\n"
            f"Drug B: {drug_b}\n\n"
            f"Provide the combined interaction outcome and common adverse effects.\n\n"
            "Answer:"
        )

        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=200,
                temperature=0.7,
                top_p=0.9,
                repetition_penalty=1.2,
                do_sample=True,
                eos_token_id=tokenizer.eos_token_id
            )

        decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
        answer = decoded.split("Answer:")[-1].split("Q:")[0].strip()
        advice = extract_advice(answer)

        return f"{answer}\n\n💡 Tips:\n{advice}"

    except Exception as e:
        return f"❌ Error: {str(e)}"

# --- Custom CSS for theme ---
css = """
body { background-color: #f0f4f8; font-family: 'Segoe UI', Arial, sans-serif; }
.gradio-container { max-width: 1000px !important; margin: 0 auto; }
#app-container {
    background-color: #e6f2ff;
    border-radius: 20px;
    overflow: hidden;
    padding: 20px !important;
    margin: 20px auto;
    max-width: 900px;
    box-shadow: 0 5px 15px rgba(0, 0, 0, 0.3);
}
.app-heading {
    color: #1a4d8f;
    font-size: 22px;
    font-weight: bold;
    text-align: center;
    margin-bottom: 20px;
}
.output-label {
    color: #1a4d8f;
    font-size: 18px;
    font-weight: 600;
    margin-bottom: 10px;
}
.output-box textarea {
    background-color: #f7fafd !important;
    color: #222 !important;
    padding: 12px !important;
    min-height: 280px !important;
    font-size: 15px !important;
}
.drug-container {
    background-color: #333;
    border-radius: 6px;
    padding: 6px 10px;
    margin-bottom: 10px;
}
.drug-label {
    color: #f0ad4e;
    font-size: 14px;
    margin-bottom: 3px;
}
.example-text {
    color: #aaa;
    font-size: 11px;
    margin-bottom: 6px;
}
.drug-input input {
    background-color: #3a404d !important;
    color: #ccc !important;
    padding: 6px 10px !important;
}
.button-container {
    margin-top: 12px;
    display: flex;
    gap: 10px;
}
.action-button {
    background-color: #1a4d8f !important;
    color: white !important;
    border-radius: 5px !important;
    padding: 12px !important;
    font-size: 15px !important;
    cursor: pointer;
    flex: 1;
}
"""

# --- Gradio UI ---
with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
    with gr.Column(elem_id="app-container"):
        gr.HTML('<div class="app-heading">Drug Interaction Analysis Using MedLLaMA</div>')

        with gr.Row():
            # Left Column: Drug A and B Inputs
            with gr.Column(scale=10):
                with gr.Row():
                    with gr.Column(scale=1, elem_classes="drug-container"):
                        gr.HTML('<div class="drug-label">Drug A</div>')
                        gr.HTML('<div class="example-text">e.g. Warfarin</div>')
                        drug_a = gr.Textbox(label="", placeholder="Enter drug name", elem_classes="drug-input")

                    with gr.Column(scale=1, elem_classes="drug-container"):
                        gr.HTML('<div class="drug-label">Drug B</div>')
                        gr.HTML('<div class="example-text">e.g. Verapamil</div>')
                        drug_b = gr.Textbox(label="", placeholder="Enter drug name", elem_classes="drug-input")

                with gr.Row(elem_classes="button-container"):
                    analyze_btn = gr.Button("Analyze Interaction", elem_classes="action-button")
                    clear_btn = gr.Button("Clear Chat", elem_classes="action-button")

            # Right Column: Output Box
            with gr.Column(scale=10):
                gr.HTML('<div class="output-label">Interaction Output</div>')
                interaction_output = gr.Textbox(
                    label="",
                    elem_classes="output-box",
                    lines=15,
                    max_lines=20,
                    interactive=False,
                    value=""
                )

        # Button functionality
        analyze_btn.click(
            fn=analyze_drug_interaction,
            inputs=[drug_a, drug_b],
            outputs=interaction_output
        )

        clear_btn.click(
            fn=lambda: ("", "", ""),
            inputs=[],
            outputs=[interaction_output, drug_a, drug_b]
        )

demo.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://10a8ebd63b17d104f3.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


