In [5]:
# Step 1: Install dependencies
!pip install -U huggingface_hub langchain chromadb transformers sentence-transformers langchain_community



In [6]:
# Step 2: Authenticate to Hugging Face
#!huggingface-cli login  # Enter your HF token

In [7]:
# Step 3: Download and save model locally
from transformers import AutoTokenizer, AutoModelForCausalLM

model_name = "gpt2"  # Or try "mistralai/Mistral-7B-Instruct-v0.1" or "google/medgemma-7b"
local_dir = "./my_local_model"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

tokenizer.save_pretrained(local_dir)
model.save_pretrained(local_dir)

print(f"Model and tokenizer saved to: {local_dir}")

Model and tokenizer saved to: ./my_local_model


In [8]:
#Step 4: RAG Pipeline using LangChain
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.schema import Document
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA

from transformers import pipeline
import os

In [9]:
# Step 4.1: Load the model as a text generation pipeline
from transformers import AutoModelForCausalLM, AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(local_dir)
model = AutoModelForCausalLM.from_pretrained(local_dir)

text_gen_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=256,
    do_sample=True,
    top_p=0.95,
    temperature=0.7
)

llm = HuggingFacePipeline(pipeline=text_gen_pipeline)

Device set to use cpu


In [10]:
# Step 4.2: Prepare an embedding model (sentence-transformers)
embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"
embedding = HuggingFaceEmbeddings(model_name=embedding_model_name)

In [11]:
# Step 4.3: Ingest sample documents
documents = [
    Document(page_content="Diabetes is a chronic condition caused by high blood sugar."),
    Document(page_content="Hypertension is also known as high blood pressure."),
    Document(page_content="The MedGemma model is specialized for biomedical tasks."),
]

In [12]:
# Step 4.4: Create or load Chroma vectorstore
vectorstore = Chroma.from_documents(documents, embedding=embedding, persist_directory="./chroma_db")

In [13]:
# Step 4.5: Create a retriever and build the RAG chain
retriever = vectorstore.as_retriever(search_kwargs={"k": 2})

rag_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True,
    chain_type="stuff"
)

In [14]:
# Step 4.6: Ask a question!
query = "What is MedGemma used for?"
response = rag_chain(query)

print("Answer:", response['result'])
print("Sources:", response['source_documents'])

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Answer: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

The MedGemma model is specialized for biomedical tasks.

The MedGemma model is specialized for biomedical tasks.

Question: What is MedGemma used for?
Helpful Answer: MedGemma is the name of a new type of biomedical tool that can be used to manipulate a protein and, in turn, to modify an enzyme. It's also the name of a new type of biomedical tool that can be used to manipulate a protein and, in turn, to modify an enzyme.

The MedGemma model is a type of biomedical tool that can be used to manipulate a protein. It can be used to manipulate an enzyme. It can be used to manipulate an enzyme.

MedGemma is used to manipulate a protein. It can be used to manipulate a enzyme.

Question: What is the purpose of MedGemma?

Helpful Answer: MedGemma is used to manipulate a protein. It can be used to manipulate a protein.

MedGem