In [1]:
import os

os.environ["LITELLM_BASE_URL"] = "http://192.168.0.105:4000"
os.environ["LITELLM_API_KEY"] = "sk-235bfb24-7434-4223-84fc-6cfb88c74ec9"
os.environ["CUDA_VISIBLE_DEVICES"] = "2,3"

from concept_extractor.text_chunker import SimpleChunker, LineChunker
from concept_extractor.concept_extractor import ConceptExtractor, ConceptValidator

  from .autonotebook import tqdm as notebook_tqdm
2024-08-19 09:25:09,608	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.



In [2]:
import logging
import numpy as np
import pandas as pd

from uuid import uuid4
from typing import Any, Union, List, Dict

from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer

from nova.contextual_retrievers import DocumentRetriever
from concept_extractor.text_chunker import LineChunker

def simple_blitz(texts:List[str],
                 encoding_model:SentenceTransformer,
                 threshold:float=0.6) -> pd.DataFrame:

    text_embeddings = encoding_model.encode(texts)
    sim_matrix = cosine_similarity(text_embeddings, text_embeddings)

    communities = pd.DataFrame(columns=["node", "cls"])

    #secret sauce - soft mini-blitz clustering
    for idx, row in enumerate(sim_matrix):

        connected_items = np.where(row >= threshold)[0]
        if len(connected_items) < 3:
            continue

        sim_matrix[connected_items, connected_items] = 0.

        # sim_matrix[:, connected_items] = 0.
        # sim_matrix[connected_items, :] = 0.

        simple_community = pd.DataFrame({"node" : [texts[_] for _ in connected_items]})
        simple_community["cls"] = str(uuid4())

        communities = pd.concat([communities, simple_community], axis=0, ignore_index=True)

    #enable multi-label soft clustering
    text_nodes = pd.DataFrame({"node" : texts})
    unclustered_communities = text_nodes[~text_nodes["node"].isin(communities["node"])].copy()
    unclustered_communities["cls"] = "-1"

    communities = pd.concat([communities,
                             unclustered_communities],
                             axis=0, ignore_index=True)

    return communities

In [3]:
import ast
import networkx as nx

import spacy

from typing import Dict, List

from utils.llm_pipeline import AsyncLLMPipelineBuilder

nlp = spacy.load("en_core_web_sm")

class GraphIndexRetriever(DocumentRetriever):

    def __init__(self,
                 config:dict={},
                 logger:logging.Logger=None):
        
        super().__init__(config=config, logger=logger)

        self.concepts = None
        self.valid_concepts = None

        self.connected_nodes = pd.DataFrame(columns=["node", "originating_texts", "summary"])
        self.communities = pd.DataFrame(columns=["node", "cls", "type", "originating_texts", "summary"])

        self.graph_context = None
        self.sentence_window_separator = "\n [SEP] \n"
        
        self.encoding_model = SentenceTransformer("paraphrase-MiniLM-L6-v2")

    def _extract_keywords(self, documents: List[str]) -> List[Dict[str, Union[int, str, List[str]]]]:
        
        keywords = []

        if isinstance(documents, str):
            documents = [documents]

        for idx, doc in enumerate(nlp.pipe(documents, batch_size=128)):

            entities = [ent.text for ent in doc.ents]
            noun_chunks = [chunk.text for chunk in doc.noun_chunks if len(chunk.text.split()) > 1]

            doc_keywords = list(set(entities + noun_chunks))
            
            #get only the keywords which are not substrings of other keywords
            doc_keywords = [keyword for keyword in doc_keywords
                            if len([kw for kw in doc_keywords if kw != keyword and keyword in kw]) == 0]

            keywords.append({"index" : idx, "document" : doc.text, "keywords" : doc_keywords})
            
        return keywords

    def _get_keywords(self, document:str, concepts:List[Dict[str, Any]]) -> List[str]:
        
        keywords = []

        try:
            concepts = pd.DataFrame(concepts)
            
            #keywords are just nodes in the graph -- either a source node (node_1) or a target node (node_2)
            graph_lookup = concepts[concepts["originating_text"] == document]
            keywords = graph_lookup["node_1"].tolist() + graph_lookup["node_2"].tolist()

            keywords = list(set(keywords))
            keywords = [{"entity": keyword,
                         "score" : 1.0,
                         "extraction_type" : "concept_graph"}
                         for keyword in keywords]
        except:
            pass
        
        return keywords

    def _build_graph_context(self, graph:pd.DataFrame) -> str:

        if self.graph_context is None:

            self.summarizer = AsyncLLMPipelineBuilder(system_prompt="summarize the following texts. Be informative in your summary and mention key points/ideas. Keep the summary to 5 lines or less.",
                                                        model="gemini-1.5-flash")
            
            #limit to 500 edges
            self.graph_context = self.summarizer.batch_predict([", ".join(graph['edge'].tolist()[:500])])[-1]
        
        return self.graph_context
    
    def _find_connected_nodes(self, concepts:List[Dict[str, Any]]) -> pd.DataFrame:

        concepts = pd.DataFrame(concepts)
        G = nx.from_pandas_edgelist(concepts, 'node_1', 'node_2', edge_attr='weight')

        _min_connections = max(3, len(concepts)//1000) #minimum number of connections to be considered 'connected'
        connected_nodes = [node for node in G.nodes() if G.degree[node] >= _min_connections]
        connected_nodes = pd.DataFrame(connected_nodes,
                                    columns=["node"]).drop_duplicates(subset=["node"])
        
        connected_nodes["originating_texts"] = connected_nodes["node"].apply(lambda x: list(set(concepts[concepts["node_1"] == x]["originating_text"].unique().tolist() + concepts[concepts["node_2"] == x]["originating_text"].unique().tolist())))
        connected_nodes["originating_texts"] = connected_nodes["originating_texts"].apply(lambda x: self.sentence_window_separator.join(x))

        self.logger.info(f"generating summaries for {connected_nodes.shape[0]} connected nodes ...")

        summaries_to_generate = [f"summarize the following texts around ideas/opinions related to {row['node']}:\n{row['originating_texts']}"
                                for _, row in connected_nodes.iterrows()]
        
        summaries = self.summarizer.batch_predict(summaries_to_generate)
        connected_nodes["summary"] = summaries
        
        return connected_nodes

    def _find_communities(self, concepts:List[Dict[str, Any]]) -> pd.DataFrame:

        concepts = pd.DataFrame(concepts)

        if concepts.empty:
            return concepts
        
        entities_only_communities = concepts["node_1"].unique().tolist() + concepts["node_2"].unique().tolist()
        entities_only_communities = list(set(entities_only_communities))
        self.logger.info(f'number of entities : {len(entities_only_communities)}')

        edge_communities = concepts["edge"].unique().tolist()
        self.logger.info(f'number of edges : {len(edge_communities)}')

        relation_only_communities = [f"{row['node_1']} {row['relation']} {row['node_2']}"
                                    for _, row in concepts.iterrows()]
        relation_only_communities = list(set(relation_only_communities))
        
        self.logger.info(f'number of relations : {len(relation_only_communities)}')

        entities_only_communities = simple_blitz(entities_only_communities,
                                                self.encoding_model,
                                                threshold=0.7)
        entities_only_communities["type"] = "similar_nodes"
        entities_only_communities["originating_texts"] = entities_only_communities["node"].apply(lambda x: list(set(concepts[concepts["node_1"] == x]["originating_text"].unique().tolist() + concepts[concepts["node_2"] == x]["originating_text"].unique().tolist())))
        entities_only_communities["originating_texts"] = entities_only_communities["originating_texts"].apply(lambda x: "\n".join(x))
        
        edge_communities = simple_blitz(edge_communities,
                                        self.encoding_model,
                                        threshold=0.85)
        edge_communities["type"] = "similar_edges"
        edge_communities["originating_texts"] = edge_communities["node"].apply(lambda x: concepts[concepts["edge"] == x]["originating_text"].unique().tolist())
        edge_communities["originating_texts"] = edge_communities["originating_texts"].apply(lambda x: "\n".join(x))

        relation_only_communities = simple_blitz(relation_only_communities,
                                                self.encoding_model,
                                                threshold=0.9)
        relation_only_communities["type"] = "similar_relations"
        relation_only_communities["originating_texts"] = relation_only_communities["node"]
            
        communities = pd.concat([entities_only_communities[entities_only_communities["cls"] != "-1"],
                                edge_communities[edge_communities["cls"] != "-1"],
                                relation_only_communities[relation_only_communities["cls"] != "-1"]
                                ],
                                axis=0, ignore_index=True)
            
        #get cls level summaries
        # unique_clusters = communities.drop_duplicates(subset=["cls"])
        # unique_clusters["group_summaries"] = unique_clusters["cls"].apply(lambda x : communities[communities["cls"] == x]["originating_texts"].tolist())

        # print(unique_clusters.head(), unique_clusters.shape)
        # communities_to_summarize = [f"summarize the following texts around ideas/opinions related to the keywords {communities[communities['cls'] == row['cls']]['node'].unique().tolist()}:\n\n{', '.join(row['group_summaries'])}"
        #                             for idx, row in unique_clusters.iterrows()]
        
        # cls_summaries = self.summarizer.batch_predict(communities_to_summarize)
        # unique_clusters["summary"] = cls_summaries

        # communities = pd.merge(communities,
        #                         unique_clusters[["cls", "summary"]],
        #                         on="cls",
        #                         how="left")
        
        # communities.reset_index(inplace=True, drop=True)
        communities["summary"] = communities["node"].tolist()

        return communities
    
    def search_nodes(self,
                    query:str,
                    graph_context:str,
                    node_subgraph:pd.DataFrame) -> List[Dict[str, Union[str, List[str]]]]:
        """
        'search' nodes that are frequently mentioned, reason which of the nodes have relevant information for the query, summarize information

        Args:
            - query : str : the query to search for
            - graph_context : str : the summary of the graph
            - node_subgraph : pd.DataFrame : the subgraph of the nodes
            - index_builder : DocumentRetriever : the index builder object

        Returns:
            - relevant_to_query : List[Dict[str, Union[str, List[str]]] : the relevant nodes to the query
        """
        
        extracted_entity_keywords = self._extract_keywords(query)[0]["keywords"]
        extracted_entity_keywords = [{"node_1" : entity, "relation" : query, "node_2" : entity, "edge" : query}
                                for entity in extracted_entity_keywords]
        self.logger.info(f"Extracted entities and relations from the query : {extracted_entity_keywords}\n")
        
        try:

            er_query = f"""you are an expert analyst working on a transcript. A summary of the transcript is as below:\n{graph_context}\n
                        Given this context, extract the entities and relationships between them as a list of dictionaries. each dictionary
                        should have the keys 'node_1', 'relation', 'node_2', 'edge'."""
        
            graph_query_builder = AsyncLLMPipelineBuilder(system_prompt=er_query,
                                                        model="gemini-1.5-flash")
            query_entity_relations = graph_query_builder.batch_predict([f"only return a list of dictionaries with the keys 'node_1', 'relation', 'node_2', 'edge' for the following query : {query}"])[0]
            query_entity_relations = query_entity_relations.replace("\n", " ").replace("'", "").replace("`", "").replace("json", "").replace("python", "").strip()


            self.logger.info(f"Expanded query : {query_entity_relations}")
            query_entity_relations = ast.literal_eval(query_entity_relations)
            query_entity_relations += extracted_entity_keywords
        
        except:
            query_entity_relations = extracted_entity_keywords

        if query_entity_relations == []:
            return pd.DataFrame(columns=node_subgraph.columns).to_dict('records')
        
        entities_in_query = pd.DataFrame(query_entity_relations)["node_1"].tolist() + pd.DataFrame(query_entity_relations)["node_2"].tolist()

        entities_in_query = list(set(entities_in_query))

        relevant_to_query = node_subgraph.copy()

        #find entities that are in the node-subgraph that might be relevant to the query
        query_entity_embeddings = self.encoding_model.encode(entities_in_query)
        node_subgraph_embeddings = self.encoding_model.encode(node_subgraph["node"].tolist())
        query2node_sim_matrix = cosine_similarity(query_entity_embeddings, node_subgraph_embeddings)

        similar_subgraph_nodes = []
        for row in query2node_sim_matrix:
            connected_items = np.where(row >= 0.7)[0]
            similar_subgraph_nodes.extend(list(connected_items))

        similar_subgraph_nodes = list(set(similar_subgraph_nodes))
        similar_subgraph_nodes = node_subgraph.iloc[similar_subgraph_nodes]["node"].tolist()

        self.logger.info(f"found {len(similar_subgraph_nodes)} nodes in the subgraph that are similar to the query entities ...")
        self.logger.info(f"similar nodes : {similar_subgraph_nodes}\n")

        #get the originating texts for these nodes
        relevant_to_query = relevant_to_query[relevant_to_query["node"].isin(similar_subgraph_nodes)]
        relevant_to_query["type"] = "node_search"

        if relevant_to_query.empty:
            self.logger.info(f"no relevant nodes found in the subgraph for the query")

        return relevant_to_query.to_dict('records')
    
    def search_communities(self,
                           query:str,
                           graph_context:str,
                           communities:pd.DataFrame) -> List[Dict[str, Union[str, List[str]]]]:
        """
        search the communities formed in the graph for relevant information.

        - get relevant keywords to 'answer' a query using the graph context
        - search the community nodes for relevant information
        - for every unique 'cls' of the community, retrieve all 'connected' nodes

        Args:
            - query : str : the query to search for
            - graph_context : str : the summary of the graph
            - communities : pd.DataFrame : the communities formed in the graph

        Returns:
            - connected_nodes : List[Dict[str, Union[str, List[str]]] : the connected nodes in the communities
        """

        #simple query expansion -- ask an LLM what 'keywords' are required to answer the query
        query_expansion_prompt = f"you are an expert analyst working on a transcript. A summary of the transcript is as below:\n{graph_context}\n"
        with AsyncLLMPipelineBuilder(query_expansion_prompt, model="gemini-1.5-flash") as llm:
            expanded_query = llm.batch_predict([f"give keywords relevant to answering '{query}'. only return a single string of keywords, separated by commas"])[-1]

        try:
            expanded_query = expanded_query.replace("\n", " ").replace("'", "").replace("`", "").replace("json", "").replace("python", "").replace("indeterminate", "").strip()
            expanded_query = expanded_query.split(",")
        except:
            expanded_query = []

        if expanded_query == [] or expanded_query == ['']:
            return pd.DataFrame(columns=communities.columns).to_dict('records')
        
        self.logger.info(f"expanded query keywords : {expanded_query}")
        
        #search the communities for relevant information
        query_embeddings = self.encoding_model.encode(expanded_query)
        community_embeddings = self.encoding_model.encode(communities["node"].tolist())
        query2community_sim_matrix = cosine_similarity(query_embeddings, community_embeddings)

        relevant_communities = []
        for row in query2community_sim_matrix:
            connected_items = np.where(row >= 0.8)[0]
            relevant_communities.extend(list(connected_items))

        relevant_communities = list(set(relevant_communities))
        relevant_communities = [communities["node"].tolist()[rel_idx]
                                for rel_idx in relevant_communities]
        
        #for every connected node, get the cluster it belongs to
        connected_nodes = communities[communities["node"].isin(relevant_communities)]["cls"].unique().tolist()
        connected_nodes = communities[communities["cls"].isin(connected_nodes)]
        connected_nodes["score"] = 0.8

        connected_nodes.drop_duplicates(subset=["node", "cls"], inplace=True)

        return connected_nodes.to_dict('records')
    
    def _search_index(self, query:str):
        #simple semantic search in the index and community search
        pass

    def build_index(self, documents: List[str]) -> None:

        if self.index is None:

            self.logger.info(f"Building a graph-index using LLMs for {len(documents)} documents ...")

            data_chunks = LineChunker(docs=documents).get_output()
            self.logger.info(f"Chunked {len(documents)} documents into {len(data_chunks)} chunks ...\n")
            
            concept_extractor = ConceptExtractor(chunks=data_chunks, comprehensive=False)
            self.concepts = concept_extractor.get_output()

            self.logger.info(f"Extracted {len(self.concepts)} concepts from {len(documents)} documents ...\n")

            validator = ConceptValidator(concepts=self.concepts)
            self.valid_concepts = validator.validate()

            self.logger.info(f"Found {len(self.valid_concepts)} valid concepts from {len(documents)} documents ...\n")

            self.index = [{"index": idx,
                        "document": doc,
                        "keywords": self._get_keywords(doc, self.valid_concepts)}
                        for idx, doc in enumerate(documents)]
            
            self.logger.info(f"Built index for {len(documents)} documents | {len(self.index)} keyword pairs extracted")
            self.logger.info(f"Building inverse index ...")

            #build graph context -- this is a summary of the graph.
            self._build_graph_context(pd.DataFrame(self.valid_concepts))

            self.logger.info(f"Finding connected nodes ...")
            self.connected_nodes = self._find_connected_nodes(self.valid_concepts)

            self.logger.info(f"Finding communities ...")
            self.communities = self._find_communities(self.valid_concepts)

            self.inverse_index = self.build_inverse_index(self.index)
            self.logger.info(f"Built inverse index for {len(self.inverse_index)} keywords\n")
        
    def retrieve_topk(self, query: str, topk: int=5) -> List[Dict[str, Union[int, str, List[str]]]]:

        relevant_nodes, relevant_docs = [], []

        # relevant_nodes = self.search_nodes(query,
        #                                   graph_context=self.graph_context,
        #                                   node_subgraph=self.connected_nodes)
        
        relevant_nodes += self.search_communities(query,
                                                graph_context=self.graph_context,
                                                communities=self.communities)
        if relevant_nodes == []:
            return pd.DataFrame(columns=['index', 'document', 'node', 'type', 'keywords'])
        
        relevant_nodes = pd.merge(pd.DataFrame(relevant_nodes)[["originating_texts", "node", "type"]].rename(columns={"originating_texts" : "document"}),
                                pd.DataFrame(self.index)[["document", "keywords", "index"]],
                                on="document",
                                how="left").dropna(subset=["index"]).drop_duplicates(subset=["document", "index"])
        relevant_nodes['index'] = relevant_nodes['index'].astype(int)
        
        # summary_docs = [{'index' : 'aux_doc',
        #                  'document' : rel_document.get('summary', ''),
        #                  'keywords' : [],
        #                  'score' : 0.5}
        #                  for rel_document in relevant_nodes]
        # summary_docs = [doc for doc in summary_docs if doc.get('document') != '']
        
        #get all documents associated with the node -- overkill for number of documents retrieved

        # documents_for_nodes = [self.inverse_index.get(keyword['node'], [])
        #                        for keyword in relevant_nodes]
        # documents_for_nodes = [arr for sublist in documents_for_nodes for arr in sublist]
        # documents_for_nodes = [self.index[document_idx] for document_idx in documents_for_nodes]

        # documents_for_nodes += summary_docs

        # relevant_docs += self.search_communities(query, topk=topk)

        documents_for_nodes = relevant_nodes.to_dict('records')

        relevant_docs += documents_for_nodes
        
        return relevant_docs

In [4]:
# import pandas as pd

# input_data = pd.read_excel("../nova/Datasets/Western Wear/western_wear.xlsx")
# input_data["request_id"] = "western_wear"

input_data = pd.read_excel("./Datasets/med_research_transcript.xlsx")
input_data["request_id"] = "med_research"

input_data["sequence_number"] = range(len(input_data))

input_data

Unnamed: 0,text,text_id,request_id,sequence_number
0,MINI REVIEW ARTICLE\n,e951f600-99e5-499c-8e45-78b87ad76291,med_research,0
1,published: 09 November 2011\n,39e9fc72-cab9-48a2-8c92-fbca710d0e0a,med_research,1
2,doi: 10.3389/fphar.2011.00072\n,d4025a7c-18ba-46d0-8659-356942573265,med_research,2
3,Acetaminophen: beyond pain and fever-relieving\n,f9092f03-ba9f-47a9-9457-bc5a837dfa83,med_research,3
4,"Eric R. Blough 1,2,3,4,5 and MiaozongWu 1,2,3,...",de38d481-f0a4-44a7-8b88-864fc103ac4f,med_research,4
...,...,...,...,...
675,Acetaminophen normalizes glucose\n,e2adaa00-d883-4a70-8419-581aeaa74170,med_research,675
676,"G. L., Wehner, P., Mangiarua, E.\n",7dd20ebe-d9af-43e4-b61a-cb44712619f9,med_research,676
677,"S., Karkala, S. K., Rice, K. M., conditions ar...",937a9a4d-9a76-4e06-9a88-e01c72bf848a,med_research,677
678,Frontiers in Pharmacology | Experimental Pharm...,c570842d-77d7-43e4-83c9-0b8f8f1047bf,med_research,678


In [5]:
graph_index_builder = GraphIndexRetriever()

graph_index_builder.build_index(input_data["text"].tolist())

2024-08-19 09:25:18,505 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: cuda
2024-08-19 09:25:18,506 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: paraphrase-MiniLM-L6-v2
2024-08-19 09:25:22,548 - sentence_transformers.SentenceTransformer - INFO - Use pytorch device_name: cuda
2024-08-19 09:25:22,549 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: paraphrase-MiniLM-L6-v2
2024-08-19 09:25:24,696 - root - INFO - Building a graph-index using LLMs for 680 documents ...
2024-08-19 09:25:24,702 - root - INFO - Chunked 680 documents into 680 chunks ...



Using None quantization ...

INFO 08-19 09:25:25 llm_engine.py:100] Initializing an LLM engine (v0.4.2) with config: model='microsoft/Phi-3-mini-4k-instruct', speculative_config=None, tokenizer='microsoft/Phi-3-mini-4k-instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.float16, max_seq_len=2048, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), seed=0, served_model_name=microsoft/Phi-3-mini-4k-instruct)


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


INFO 08-19 09:25:25 utils.py:660] Found nccl from library /home/aamir_syed/.config/vllm/nccl/cu12/libnccl.so.2.18.1
INFO 08-19 09:25:26 selector.py:69] Cannot use FlashAttention-2 backend for Volta and Turing GPUs.
INFO 08-19 09:25:26 selector.py:32] Using XFormers backend.
INFO 08-19 09:25:28 weight_utils.py:199] Using model weights format ['*.safetensors']
INFO 08-19 09:25:30 model_runner.py:175] Loading model weights took 7.1183 GB
INFO 08-19 09:25:31 gpu_executor.py:114] # GPU blocks: 329, # CPU blocks: 682


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Processed prompts:  38%|███▊      | 259/680 [03:29<03:25,  2.04it/s]

In [None]:
#just to see what the documents talk about
graph_index_builder.graph_context

In [None]:
from utils.custom_llm import CustomLLMBuilder

def answer_query(query:str, pipeline=graph_index_builder):
    
    print(query)

    relevant_docs = pipeline.retrieve_topk(query=query,
                                             topk=5)
    relevant_docs = pd.DataFrame(relevant_docs)

    print(f"retrieved {len(relevant_docs)} documents relevant to the query")
    print(f"{relevant_docs.head()}\n")

    #get the reasoning for the query
    reasoner_query = f"""you are given a transcript, based on which you are required to reason and answer. here is a summary of the transcript:\n{pipeline.graph_context}\n
                        given this context, answer any of the questions that follow. your answers must be precise and concise."""

    with CustomLLMBuilder(system_prompt=reasoner_query,
                        model="gemini-1.5-flash") as reasoner:

        reasoner_response = reasoner.batch_predict([f"query : {query}\n you may use the following information to answer the question : {relevant_docs['document'].unique().tolist()}",
                                                    f"query : {query}\n you may use the following information to answer the question : {relevant_docs['node'].unique().tolist()}"])
        
        reasoned = {"full_response" : reasoner_response[0],
                    "summary_response" : reasoner_response[1]}
    
    return reasoned["summary_response"] if reasoned['full_response'] == '' else reasoned['full_response']

In [None]:
answer_query("summarize effects at different dosages of apap")