In [None]:
%pwd

In [None]:
import os
os.chdir("../")

In [None]:
%pwd

In [None]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [None]:
#Extracting the Data from the PDF file
def load_pdf_file(data):
    loader=DirectoryLoader(data, glob="*.pdf", loader_cls=PyPDFLoader)
    documents=loader.load()

    return documents

In [None]:
extracted_data = load_pdf_file(data='g:/langchain projects/Med-AI-Gen-AI/Data/')

In [None]:
extracted_data

In [None]:
#Spliting the Data into Chunks
def text_split(extracted_data):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    text_chunks=text_splitter.split_documents(extracted_data)
    return text_chunks

In [None]:
text_chunks=text_split(extracted_data)
print("Length of the Text chunks: ", len(text_chunks))

In [None]:
%pip install sentence-transformers==2.2.2

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings

In [None]:
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [None]:
embeddings = download_hugging_face_embeddings()

In [None]:
query_result = embeddings.embed_query("Hello World!")
print("Length of the query result: ", len(query_result))

In [None]:
query_result

In [None]:
import os
print(os.path.exists('.env'))

In [None]:
print(os.getcwd())

In [None]:
from dotenv import load_dotenv
load_dotenv()

In [None]:
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

In [None]:
import os
from pinecone import Pinecone, ServerlessSpec

# I can also set my API key via environment variable or directly, this one is fine
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")

pc = Pinecone(api_key=PINECONE_API_KEY)

index_name = "medicalbot"

if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(
            cloud="aws",
            region="us-east-1"
        )
    )
else:
    print(f"Index '{index_name}' already exists. Skipping creation.")

In [None]:
import os
os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY

In [None]:
from langchain_pinecone import PineconeVectorStore

docsearch = PineconeVectorStore.from_documents(
    documents=text_chunks,
    index_name=index_name,
    embedding=embeddings,
)

In [None]:
# Loading Exisiting Index

from langchain_pinecone import PineconeVectorStore
# Embeding each chunk and then upserting the embeddings into my Pinecone index
docsearch = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embeddings
)

In [None]:
docsearch

In [None]:
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k": 3})

In [None]:
retrieved_docs = retriever.invoke("What is Acne?")

In [None]:
retrieved_docs

In [None]:
import google.generativeai as genai

# Configuring the API
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

# List of all available models
try:
    for m in genai.list_models():
        if 'generateContent' in m.supported_generation_methods:
            print(f"Name: {m.name}")
            print(f"Display Name: {m.display_name}")
            print(f"Description: {m.description}")
            print("------------------------")
except Exception as e:
    print(f"Error accessing models: {str(e)}")

In [None]:
# Testing basic generation, kaam kar raha hai ya nahi
try:
    model = genai.GenerativeModel('gemini-1.5-pro')
    response = model.generate_content('Hello, are you working?')
    print(response.text)
except Exception as e:
    print(f"Error generating content: {str(e)}")

In [None]:
from dotenv import load_dotenv
import os
import google.generativeai as genai
from langchain_google_genai import ChatGoogleGenerativeAI

# Loading the API key from .env, as I mentioned earlier, me dheere dheere samajhdaar hota ja raha hoon
load_dotenv()

# Configure the Gemini API kyuki Chatgpt paise maang raha tha, OpenAI is pay to use (what a shame)
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

# Gemini model ko initiate kar raha hoon, mujhe lagta hai ki yeh mujhe zyada achha response dega
# kyuki yeh Google ka hai aur mujhe Google pe zyada trust hai, kyuki mujhe Google ka data chahiye
llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    temperature=0.4,
    max_output_tokens=500,
    google_api_key=os.getenv("GOOGLE_API_KEY")
)

# Using it like OpenAI, OpenAI ka v shame command hai, mujhe nahi pata kyun
response = llm.invoke("What are the symptoms of diabetes?")
print(response.content)

#### Niche wala code jada robust and manageable hai but filhal ushe use nahi karunga, project ko lite rakha hai. Will soon feed more data into it and that knowledge base will be managed by this chunk of code.

In [None]:
# def create_medical_chat():
#     """Initialize and configure the medical chat assistant"""
#     from dotenv import load_dotenv
#     import os
#     import google.generativeai as genai
#     from langchain_google_genai import ChatGoogleGenerativeAI

#     # Load environment variables
#     load_dotenv()
#     api_key = os.getenv("GOOGLE_API_KEY")
#     if not api_key:
#         raise ValueError("GOOGLE_API_KEY not found in environment")

#     # Configure Gemini
#     genai.configure(api_key=api_key)

#     # Create chat model with medical-specific settings
#     llm = ChatGoogleGenerativeAI(
#         model="gemini-1.5-pro",
#         temperature=0.4,  # Lower temperature for more factual responses
#         max_output_tokens=1000,  # Increased for detailed medical responses
#         google_api_key=api_key,
#         streaming=True  # Enable streaming for faster initial responses
#     )

#     return llm

# # Initialize the medical chat assistant
# medical_chat = create_medical_chat()

# # Function to ask medical questions
# def ask_medical_question(question: str) -> str:
#     """Ask a medical question and get a response"""
#     try:
#         response = medical_chat.invoke(question)
#         return response.content
#     except Exception as e:
#         return f"Error: {str(e)}"

# # Example usage
# question = "What are the symptoms of diabetes?"
# print(ask_medical_question(question))

In [None]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate


system_prompt = (
    "You are a medical assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer the question. "
    " If you don't know the answer, just say 'I don't know' do not say anything else other than 'I don't know'. "
    "Be concise and accurate. You can answer in just 3 sentences maximum. "
    "\n\n"
    "{context}"
)


prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [None]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [None]:
response = rag_chain.invoke({"input": "What is the treatment for acne?"})
print(response["answer"])

In [None]:
response = rag_chain.invoke({"input": "What is Philosophy?"})
print(response["answer"])

In [None]:
response = rag_chain.invoke({"input": "What is Acromegaly and gigantism?"})
print(response["answer"])