In [None]:
!pip install langchain langchain-community sentence-transformers chromadb langchain-google-genai pandas


: 

In [1]:
# --- 1. Install the necessary libraries ---
# You only need to run this cell once.

import os
import pandas as pd
import io
from getpass import getpass

# --- 2. Set up your Google API Key ---
# LangChain will use this key to talk to the Gemini model.
# Get your key from Google AI Studio: https://makersuite.google.com/app/apikey
if 'GOOGLE_API_KEY' not in os.environ:
    os.environ['GOOGLE_API_KEY'] = getpass("Enter your Google API Key: ")


# --- 3. Prepare Your Data (The part you already know!) ---
# We create our sample CSV data and convert the rows into text chunks.
csv_data = """Report_ID,Drug_Name,Adverse_Event,Patient_Age,Description
FAERS-001,Sertraline,Amenorrhea,28,"Patient experienced a loss of menstrual cycle after three months on Sertraline."
FAERS-002,Ibuprofen,Headache,45,"Standard headache reported after taking Ibuprofen for a fever."
FAERS-003,Apixaban,Menorrhagia,52,"Patient reported unusually heavy and prolonged menstrual bleeding (menorrhagia)."
FAERS-004,Sertraline,Insomnia,34,"Patient had difficulty sleeping after starting the medication."
FAERS-005,Sertraline,Amenorrhea,31,"Second report of amenorrhea linked to Sertraline, cycle returned after stopping treatment."
"""
df = pd.read_csv(io.StringIO(csv_data))

knowledge_chunks = []
for index, row in df.iterrows():
    chunk = (
        f"In report {row['Report_ID']}, the drug {row['Drug_Name']} "
        f"was associated with the adverse event '{row['Adverse_Event']}' "
        f"in a {row['Patient_Age']}-year-old patient. "
        f"Description: {row['Description']}"
    )
    knowledge_chunks.append(chunk)

print("✅ Data prepared into knowledge chunks.")


# --- 4. The LangChain Part (The new "Lego Blocks") ---
# Here we'll use LangChain's components to build the RAG system.

# Import the "Lego blocks" we need
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser

# BLOCK A: The Embedding Model and Vector Database
# We tell LangChain which model to use for embeddings and create the vector store.
print("🧠 Initializing embedding model and vector store...")
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = Chroma.from_texts(texts=knowledge_chunks, embedding=embedding_function)
retriever = vectorstore.as_retriever(search_kwargs={"k": 2}) # This will retrieve the top 2 most relevant chunks.
print("✅ Vector store is ready.")

# BLOCK B: The LLM and Prompt Template
# We set up the Gemini model and create a template for our prompt.
llm = ChatGoogleGenerativeAI(model="gemini-pro")
template = """
System Instruction: You are a helpful AI assistant. Answer the user's question based ONLY on the context provided below.

Context:
---
{context}
---

User's Question: {question}
"""
prompt = PromptTemplate.from_template(template)

# BLOCK C: The RAG Chain
# This is the magic of LangChain! We "chain" all our blocks together.
# The pipe symbol `|` passes the output of one block to the input of the next.
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

print("🔗 RAG chain created.")


# --- 5. Ask a Question! ---
# Now we can just "invoke" our chain with a question.
print("\n🤔 Asking the RAG chain a question...")
question = "What is the link between Sertraline and Amenorrhea?"
response = rag_chain.invoke(question)

print("\n✅ Final Answer:")
print(response)

✅ Data prepared into knowledge chunks.
🧠 Initializing embedding model and vector store...


  embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.3.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/Users/deliciamagdaline/Desktop/faers_menstrual_rag_project/venv/lib/python3.11/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/Users/deliciamagdaline/Desktop/faers_menstrual_rag_project/venv/lib/python3.11/site-packages/traitlets/config/application.py", line 1075, in launch_insta

RuntimeError: Numpy is not available

In [None]:
!pip install numpy

: 