In [1]:
#importing libraries 
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_mistralai import ChatMistralAI 
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_pinecone import PineconeVectorStore
from dotenv import load_dotenv
import os
import pinecone

In [3]:
#Load environment variables
load_dotenv()

True

In [4]:
#Retrieve API keys from environment variables
MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")

In [5]:
#Ensure API keys are set correctly
if not MISTRAL_API_KEY:
    raise ValueError(" MISTRAL_API_KEY is missing. Set it in your .env file.")

if not PINECONE_API_KEY:
    raise ValueError(" PINECONE_API_KEY is missing. Set it in your .env file.")

In [6]:
#Initialize Mistral AI LLM
llm = ChatMistralAI(model="mistral-medium", temperature=0.4, max_output_tokens=500)

In [6]:
#Extract data from PDF
def load_pdf_file(data):
    loader = DirectoryLoader(data, glob="*.pdf", loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents

extracted_data = load_pdf_file(data='C:/Users/aumpa/OneDrive/Desktop/Projects/Chatbot_Hackthon/Medical_Chatbot/Data/')


In [7]:
#Split the data into text chunks
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

text_chunks = text_split(extracted_data)
print("Length of text chunks:", len(text_chunks))

Length of text chunks: 7023


In [8]:
#Download HuggingFace embeddings
from langchain.embeddings import HuggingFaceEmbeddings

# Download HuggingFace embeddings
def download_hugging_face_embeddings():
    return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

embeddings = download_hugging_face_embeddings()
query_result = embeddings.embed_query("Hello World")
print("Embedding length:", len(query_result))

def download_hugging_face_embeddings():
    return HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

embeddings = download_hugging_face_embeddings()
query_result = embeddings.embed_query("Hello World")
print("Embedding length:", len(query_result))

  return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


Embedding length: 384
Embedding length: 384


In [9]:
#Initialize Pinecone
pc = pinecone.Pinecone(api_key=PINECONE_API_KEY)
index_name = "medicalbot"

In [10]:
#Check if the index already exists
existing_indexes = [index.name for index in pc.list_indexes()]
if index_name not in existing_indexes:
    pc.create_index(
        name=index_name,
        dimension=384,
        metric="cosine",
        spec=pinecone.ServerlessSpec(cloud="aws", region="us-east-1")
    )
    print(f"Index '{index_name}' created successfully.")
else:
    print(f"Index '{index_name}' already exists.")

Index 'medicalbot' created successfully.


In [11]:
#Embed each chunk and upsert embeddings into Pinecone
docsearch = PineconeVectorStore.from_documents(
    documents=text_chunks,
    embedding=embeddings,
    index_name=index_name
)

In [12]:
#Load existing Pinecone index
docsearch = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embeddings
)
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k": 3})

In [14]:
#Define the RAG prompt
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise.\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [15]:
#Create RAG chain
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [17]:
#testing 
response=rag_chain.invoke({"input":"medicine on sex ?"})
print(response["answer"])

Testosterone therapy can help women with low levels of this hormone, but potential side effects include deepening voice, hair growth, and acne. The EROS-CTD is a device approved by the FDA to increase blood flow to the clitoris and improve arousal. Therapy and lifestyle changes, such as quitting smoking and addressing any anxieties or fears about sexual intercourse, can also help resolve sexual dysfunction in women.


In [None]:
import os
from dotenv import load_dotenv
load_dotenv()
# Load API key
def load_dotenv():
    from dotenv import load_dotenv
    load_dotenv()
    MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")

from mistral import Mistral

def get_mistral_response(user_input):
    mistral = Mistral(api_key=os.getenv("MISTRAL_API_KEY"))
    response = mistral.chat(
        model="mistral-7b",  # Adjust based on your model
        messages=[
            {"role": "system", "content": "You are a medical assistant. Format responses in bullet points for clarity. Keep sentences short and informative."},
            {"role": "user", "content": user_input}
        ]
    )
    return response["choices"][0]["message"]["content"]