In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings, HuggingFaceInferenceAPIEmbeddings
from langchain.vectorstores import FAISS
from langchain.agents.agent_toolkits.conversational_retrieval.tool import create_retriever_tool
from langchain.docstore.document import Document
from langchain.text_splitter import CharacterTextSplitter
import json



In [2]:
docs = []
with open("final_data.json") as f:
    for line in f:
        row = json.loads(line)
        doc = Document(
            page_content = row["text"],
            metadata={"title": row["title"], "id": row["docid"], "source": row["text"]},
        )
        docs.append(doc)


In [3]:
docs[0]


Document(page_content="There are four main types of diabetic neuropathy. You can have one type or more than one type of neuropathy.Numbness or reduced ability to feel pain or temperature changes Tingling or burning feeling Sharp pains or cramps Muscle weakness Extreme sensitivity to touch — for some people, even a bedsheet's weight can be painful Serious foot problems, such as ulcers, infections, and bone and joint damage", metadata={'title': 'Diabetic neuropathy', 'id': 1, 'source': "There are four main types of diabetic neuropathy. You can have one type or more than one type of neuropathy.Numbness or reduced ability to feel pain or temperature changes Tingling or burning feeling Sharp pains or cramps Muscle weakness Extreme sensitivity to touch — for some people, even a bedsheet's weight can be painful Serious foot problems, such as ulcers, infections, and bone and joint damage"})

In [4]:
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(docs)


In [5]:
docs[0]


Document(page_content="There are four main types of diabetic neuropathy. You can have one type or more than one type of neuropathy.Numbness or reduced ability to feel pain or temperature changes Tingling or burning feeling Sharp pains or cramps Muscle weakness Extreme sensitivity to touch — for some people, even a bedsheet's weight can be painful Serious foot problems, such as ulcers, infections, and bone and joint damage", metadata={'title': 'Diabetic neuropathy', 'id': 1, 'source': "There are four main types of diabetic neuropathy. You can have one type or more than one type of neuropathy.Numbness or reduced ability to feel pain or temperature changes Tingling or burning feeling Sharp pains or cramps Muscle weakness Extreme sensitivity to touch — for some people, even a bedsheet's weight can be painful Serious foot problems, such as ulcers, infections, and bone and joint damage"})

In [6]:
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")


In [37]:
# db = FAISS.from_documents(docs, embeddings)
# db.save_local("symptomdb.faiss")


In [7]:
db = FAISS.load_local("symptomdb.faiss", embeddings)


In [8]:
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain, RetrievalQAWithSourcesChain
from langchain.chat_models import ChatOpenAI
from dotenv import load_dotenv
from langchain.prompts import PromptTemplate
import os


In [9]:
load_dotenv()
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")


In [10]:
llm = ChatOpenAI(temperature=0)


# Query Augmentation

In [11]:
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.callbacks.manager import CallbackManagerForRetrieverRun
import logging

logging.basicConfig()
logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO)
retriever = MultiQueryRetriever.from_llm(
    retriever=db.as_retriever(), llm=llm
)

In [60]:
run_manager = CallbackManagerForRetrieverRun

In [36]:
# question = ["I have a headache and fever with a dry cough?", "headache, fever, dry cough"]
question = "Which two are the main symptoms of albinism?"

QA_Docs = retriever.get_relevant_documents(query=question)
len(QA_Docs)

INFO:langchain.retrievers.multi_query:Generated queries: ['1. What are the primary symptoms associated with albinism?', '2. Can you identify the main signs of albinism?', '3. What are the two main indicators of albinism?', '4. Which symptoms are typically observed in individuals with albinism?', '', 'symptoms, albinism']


13

In [58]:
from typing import List
from langchain import LLMChain
from pydantic import BaseModel, Field
from langchain.prompts import PromptTemplate
from langchain.output_parsers import PydanticOutputParser

# Output parser will split the LLM result into a list of queries
class LineList(BaseModel):
    # "lines" is the key (attribute name) of the parsed output
    lines: List[str] = Field(description="Lines of text")

class LineListOutputParser(PydanticOutputParser):
    def __init__(self) -> None:
        super().__init__(pydantic_object=LineList)

    def parse(self, text: str) -> LineList:
        lines = text.strip().split("\n")
        return LineList(lines=lines)

output_parser = LineListOutputParser()

QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate five
    different versions of the given user question to retrieve relevant documents from a vector
    database. By generating multiple perspectives on the user question, your goal is to help
    the user overcome some of the limitations of the distance-based similarity search. The last query should just be the 5 most important words of the original question.
    Provide these alternative questions seperated by newlines.
    Original question: {question}""",
)
# llm = ChatOpenAI(temperature=0)

# Chain
llm_chain = LLMChain(llm=llm, prompt=QUERY_PROMPT, output_parser=output_parser)

# Other inputs
# question = "What are the approaches to Task Decomposition?"

# Run
retriever = MultiQueryRetriever(
    retriever=db.as_retriever(), llm_chain=llm_chain, parser_key="lines"
)  # "lines" is the key (attribute name) of the parsed output
# Results
unique_docs = retriever.get_relevant_documents(
    query=question
)
len(unique_docs)

INFO:langchain.retrievers.multi_query:Generated queries: ['1. What are the primary symptoms associated with albinism?', '2. Can you identify the two main symptoms commonly observed in individuals with albinism?', '3. What are the key signs of albinism, specifically the two main symptoms?', '4. Could you please list the primary symptoms that characterize albinism?', '5. What are the two main symptoms typically seen in individuals with albinism?', '', 'main symptoms, albinism']


12

In [40]:
queries_list= llm_chain.run(question)

In [43]:
queries_list.lines

['1. What are the primary symptoms associated with albinism?',
 '2. Can you identify the two main symptoms commonly observed in individuals with albinism?',
 '3. What are the key signs of albinism, specifically the two main symptoms?',
 '4. Could you please describe the primary symptoms of albinism?',
 '',
 'main symptoms albinism']

In [62]:
queries = retriever.generate_queries(question, run_manager())

TypeError: BaseRunManager.__init__() missing 3 required keyword-only arguments: 'run_id', 'handlers', and 'inheritable_handlers'

In [44]:
QA_Docs

[Document(page_content='At first, the flu may seem like a cold with a runny nose, sneezing and sore throat. Colds usually start slowly. But the flu tends to come on quickly. And while a cold can be miserable, you usually feel much worse with the flu.Headache. Dry, persistent cough. Shortness of breath. Tiredness and weakness. Runny or stuffy nose. Sore throat. Eye pain.', metadata={'title': 'Influenza (flu)', 'id': 466, 'source': 'At first, the flu may seem like a cold with a runny nose, sneezing and sore throat. Colds usually start slowly. But the flu tends to come on quickly. And while a cold can be miserable, you usually feel much worse with the flu.Headache. Dry, persistent cough. Shortness of breath. Tiredness and weakness. Runny or stuffy nose. Sore throat. Eye pain.'}),
 Document(page_content='Each infectious disease has its own specific signs and symptoms. General signs and symptoms common to a number of infectious diseases include:Fever Diarrhea Fatigue Muscle aches Coughing',

# Vector Database Retrieval 

In [45]:
# memory = ConversationBufferMemory(memory_key="chat_history", return_messages= True)
chain = RetrievalQAWithSourcesChain.from_chain_type(llm, chain_type="stuff", retriever=db.as_retriever())





In [46]:
query = question[0]
result = chain({"question": query}, return_only_outputs=True)
scored_docs = db.similarity_search_with_relevance_scores(query, top_k=3)


In [47]:
result

{'answer': 'Based on the symptoms described (headache, fever, dry cough), it is possible that you may have the flu. However, it is important to consult with a healthcare professional for an accurate diagnosis and appropriate treatment. \n',
 'sources': 'Symptoms of cough headaches:Begin suddenly with and just after coughing or other types of straining Typically last a few seconds to a few minutes — some can last up to two hours Cause sharp, stabbing, splitting or "bursting" pain Usually affect both sides of your head and may be worse in the back of your head May be followed by a dull, aching pain for hours; At first, the flu may seem like a cold with a runny nose, sneezing and sore throat. Colds usually start slowly. But the flu tends to come on quickly. And while a cold can be miserable, you usually feel much worse with the flu.Headache. Dry, persistent cough. Shortness of breath. Tiredness and weakness. Runny or stuffy nose. Sore throat. Eye pain.'}

In [48]:
scored_docs


[(Document(page_content='Symptoms of cough headaches:Begin suddenly with and just after coughing or other types of straining Typically last a few seconds to a few minutes — some can last up to two hours Cause sharp, stabbing, splitting or "bursting" pain Usually affect both sides of your head and may be worse in the back of your head May be followed by a dull, aching pain for hours', metadata={'title': 'Cough headaches', 'id': 382, 'source': 'Symptoms of cough headaches:Begin suddenly with and just after coughing or other types of straining Typically last a few seconds to a few minutes — some can last up to two hours Cause sharp, stabbing, splitting or "bursting" pain Usually affect both sides of your head and may be worse in the back of your head May be followed by a dull, aching pain for hours'}),
  0.45038376911259537),
 (Document(page_content='At first, the flu may seem like a cold with a runny nose, sneezing and sore throat. Colds usually start slowly. But the flu tends to come on

In [44]:
df = pd.read_csv("Unique Queries.csv")

In [48]:
unique_qs= df["Queries"].to_list()

In [50]:
len(unique_qs)

59

In [59]:
interior_dic = {}
for i in  unique_qs:
    interior_dic[i] = {"alt_qs": llm_chain.run(i).lines}

In [61]:
interior_dic

{'what is the normal appearance of the dengue virus?': {'alt_qs': ['1. Can you describe the typical visual characteristics of the dengue virus?',
   '2. How does the dengue virus usually look like?',
   '3. What are the common physical features of the dengue virus?',
   '4. Could you provide details about the typical appearance of the dengue virus?',
   '5. What is the usual visual presentation of the dengue virus?',
   '',
   'Normal appearance: dengue virus']},
 'What are the symptoms of astigmatism?': {'alt_qs': ['1. What are the common signs and symptoms of astigmatism?',
   '2. Can you list the typical symptoms associated with astigmatism?',
   '3. How can I recognize the symptoms of astigmatism?',
   '4. What are the indicators of astigmatism that I should be aware of?',
   '5. Are there any specific signs that indicate the presence of astigmatism?',
   '',
   'symptoms, astigmatism']},
 'What is the last thing you would want to do?': {'alt_qs': ['1. What are the activities you w

In [62]:
# for key in interior_dic:
#     interior_dic[key]["alt_qs"].remove("")

In [63]:
df1 = pd.read_csv("Taimoor data scored.csv")
df2 = pd.read_csv("Haashim data scored.csv")
df3 = pd.read_csv("Josh data scored.csv")
total_df = pd.concat([df1, df2, df3], ignore_index=True)

In [64]:
total_df.head()

Unnamed: 0.1,Unnamed: 0,Query,DocID,Text,Score
0,0,What part of the body is a growth plate fracture?,328,Most people have no symptoms associated with f...,1
1,1,What part of the body is a growth plate fracture?,276,Signs and symptoms of testicular torsion inclu...,1
2,2,What part of the body is a growth plate fracture?,433,Heart palpitations can feel like the heart is:...,1
3,3,What part of the body is a growth plate fracture?,119,Damage to the protective myelin covering the n...,1
4,4,What part of the body is a growth plate fracture?,1008,You might not have signs or symptoms if your h...,1


In [69]:
for i in unique_qs:
    interior_dic[i]["scored_docs"] = list(zip(total_df[total_df["Query"] == i]["DocID"].to_list() ,total_df[total_df["Query"] == i]["Score"].to_list()))

In [71]:
interior_dic["what is the normal appearance of the dengue virus?"]

{'alt_qs': ['1. Can you describe the typical visual characteristics of the dengue virus?',
  '2. How does the dengue virus usually look like?',
  '3. What are the common physical features of the dengue virus?',
  '4. Could you provide details about the typical appearance of the dengue virus?',
  '5. What is the usual visual presentation of the dengue virus?',
  '',
  'Normal appearance: dengue virus'],
 'scored_docs': [(886, 1),
  (191, 1),
  (1019, 1),
  (335, 1),
  (1057, 1),
  (95, 1),
  (79, 1),
  (1066, 1),
  (782, 1),
  (691, 1),
  (618, 1),
  (1004, 1),
  (630, 1),
  (97, 1),
  (577, 1),
  (38, 1),
  (355, 1),
  (859, 1),
  (704, 1),
  (581, 1),
  (521, 1),
  (340, 1),
  (144, 1),
  (806, 1),
  (74, 1),
  (477, 1),
  (324, 1),
  (569, 1),
  (694, 1),
  (575, 1),
  (113, 1),
  (439, 1),
  (104, 1),
  (590, 1),
  (857, 1),
  (285, 1),
  (709, 1),
  (253, 1),
  (378, 1),
  (606, 1),
  (911, 1),
  (500, 1),
  (890, 1),
  (848, 1),
  (271, 1),
  (456, 1),
  (326, 1),
  (924, 1),
  (1

In [72]:
with open("interior_dic.json", "w") as outfile:
    json.dump(interior_dic, outfile)