In [None]:
import chromadb
client = chromadb.PersistentClient() 

In [None]:
collection = client.get_or_create_collection(name="drd2") #persistent collection had been previously created

In [None]:
import json
# Load collected papers
with open('processed_chunks.json', 'r') as f:
    chunks = json.load(f)
metadatas = [{"title": chunk["title"], "first_author": chunk["first_author"], "date_published": chunk["date_published"]} for chunk in chunks] 
ids = [chunk["chunk_id"] for chunk in chunks]
texts = [chunk["text"] for chunk in chunks]

In [None]:
collection.upsert(
    ids=ids,
    metadatas=metadatas,
    documents=texts,
) #takes about 7 minutes to update collection


In [None]:
import google.generativeai as genai

genai.configure(api_key="<GEMINI_API_KEY>"")

model = genai.GenerativeModel('gemini-2.0-flash-lite')

In [None]:
#Retrieve relevant chunks from Chroma, format them, and pass them to Gemini
def RAG(query: str, n_results = 8) -> str:
    retrieved_chunks = collection.query(
        query_texts=query,
        n_results=n_results
    )
    return_to_gemini = [f"Retrieved from \"{retrieved_chunks['metadatas'][0][i]['title']},\" {retrieved_chunks['metadatas'][0][i]['date_published']}, by {retrieved_chunks['metadatas'][0][i]['first_author']} et al.: {retrieved_chunks['documents'][0][i]}" for i in range(n_results)]
    response = generate_answer_with_context(query, retrieved_chunks=return_to_gemini)
    return response
    

def generate_answer_with_context(query: str, retrieved_chunks: list[str]) -> str:
    
    context = "\n\n".join(retrieved_chunks)  # Combine chunks into a single context string

    prompt = f"""You are a RAG system designed to provide useful genetic and genomic information for clinicians to aid their work
    diagnosis, etiology, treatment and anything else they need.
    A clinician has entered the query below and the context contains relevant chunks retrieved from a database of research on the DRD2 gene. 
    Use as many or as few of the sources as you need to answer the question accurately and concisely. You may also use existing knowledge from your training base.
    Please cite studies if applicable. If the provided context does not have information to answer the question, please state as much.

    Context:
    {context}

    Question: {query}
    """

    try:
        response = model.generate_content(prompt)
        return str(response.text)
    except Exception as e:
        return f"Error generating response: {e}"

In [None]:
query = "Are there known drug-drug interactions that specifically impact medications targeting the DRD2 receptor?"
print(RAG(query))