In [7]:
from transformers import pipeline

pipe = pipeline("text-generation", model="Aasher/TibbScholar", device=0)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0


In [9]:
prompt = """Question:
What are the risks in dental implant surgery?

Answer:
"""

response = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=1)
print(response[0]["generated_text"])

Question:
What are the risks in dental implant surgery?

Answer:
The risks in dental implant surgery include infection, which can occur after surgery due to the creation of a new surface and wound drainage through the skin. Additionally, the placement of screws with a pointed head and short handle creates a risk of bone fracturing, which can result in a fractured screw. To reduce the risks, the placement of implants can be performed in an upright and parallel manner, using a round head screw with a handle or a tapered screw with a handle that is straightened for insertion.


In [None]:
import gradio as gr
from transformers import pipeline
import torch

# --- 1. Load the Model and Pipeline (Done once on startup) ---

# Check if a GPU is available and set the device accordingly.
# This makes the app work on both GPU and CPU systems.
try:
    device = 0 if torch.cuda.is_available() else -1
    print(f"✅ Found GPU! Using device: cuda:0")
except Exception:
    device = -1
    print(f"⚠️ No GPU found. Using device: CPU")

# Load your fine-tuned model from the Hugging Face Hub.
# This pipeline object will be reused for every prediction.
print("Loading TibbScholar model... This may take a moment.")
pipe = pipeline(
    "text-generation",
    model="Aasher/TibbScholar",
    device=device,
    torch_dtype=torch.bfloat16 # Use bfloat16 for better performance on modern GPUs
)
print("✅ TibbScholar model loaded successfully!")


# --- 2. Define the Prediction Function ---

def get_tibb_response(user_question):
    """
    This function takes a user's question, formats it into the prompt,
    gets a prediction from the model, and cleans up the output.
    """

    prompt = f"""Question:
{user_question}

Answer:
"""

    # Generate the response using the pipeline
    response = pipe(
        prompt,
        max_new_tokens=256,
        do_sample=True,
        temperature=0.5,
        top_p=0.95
    )

    # The output includes the original prompt, so we need to extract only the generated part.
    full_text = response[0]["generated_text"]

    # Split the text at "Answer:" and take the second part.
    answer_part = full_text.split("Answer:")[1].strip()

    return answer_part


# --- 3. Create and Launch the Gradio Interface ---

with gr.Blocks(theme=gr.themes.Soft(), title="TibbScholar") as app:
    # Add a title and description in Markdown
    gr.Markdown(
        """
        # 🩺 TibbScholar: Your Medical AI Assistant
        """
    )

    # Input component
    question_input = gr.Textbox(
        label="Your Medical Question",
        placeholder="e.g., What is the difference between Type 1 and Type 2 diabetes?"
    )

    # Output component
    answer_output = gr.Textbox(
        label="TibbScholar's Answer",
        lines=10,
        interactive=False # User cannot edit the output
    )

    # Button to submit the question
    submit_button = gr.Button("Get Answer", variant="primary")

    # Define some example questions to make it easy for users to try
    gr.Examples(
        [
            "What risk factors are associated with abdominal aortic aneurysm rupture?",
            "Is tumor budding a factor in gastric cancer?",
            "What is the difference between Type 1 and Type 2 diabetes?",
            "What are the early symptoms of Parkinson's disease?",
        ],
        inputs=question_input,
        outputs=answer_output,
        fn=get_tibb_response,
    )

    # Connect the button click event to the prediction function
    submit_button.click(
        fn=get_tibb_response,
        inputs=question_input,
        outputs=answer_output
    )

# Launch the app! If you are running this in Google Colab,
# set share=True to get a public link.
app.launch(share=True)

✅ Found GPU! Using device: cuda:0
Loading TibbScholar model... This may take a moment.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0


✅ TibbScholar model loaded successfully!
Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://d822c82b79404138ac.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


