In [1]:
pip install langchain-openai==0.1.0


Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install langchain-openai==0.1.0

Note: you may need to restart the kernel to use updated packages.


In [3]:
import os
from dotenv import load_dotenv, find_dotenv

from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.vectorstores import Chroma, FAISS
from langchain_pinecone import PineconeVectorStore
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_openai import AzureChatOpenAI
from langchain_experimental.text_splitter import SemanticChunker
from langchain.text_splitter import RecursiveCharacterTextSplitter
from sentence_transformers import SentenceTransformer
from langchain.document_loaders.pdf import PyMuPDFLoader
from langchain.retrievers import MultiQueryRetriever
from typing import Dict, Any, List
import pinecone
from pinecone import ServerlessSpec

# Load environment variables from .env file
dotenv_path = find_dotenv()
if not dotenv_path:
    raise FileNotFoundError("Could not find .env file")
load_dotenv(dotenv_path)

# Initialize the SentenceTransformer model
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
pinecone_api_key = os.getenv('PINECONE_API_KEY')
index_name = "vladch"

# Azure GPT-4 model parameters
azure_api_key = os.getenv('AZURE_API_KEY')
azure_api_version = os.getenv('AZURE_API_VERSION')
azure_deployment = os.getenv('AZURE_DEPLOYMENT')
azure_endpoint = os.getenv('AZURE_ENDPOINT')

# Check if the environment variables are loaded correctly
if not all([pinecone_api_key, azure_api_key, azure_api_version, azure_deployment, azure_endpoint]):
    raise ValueError("One or more environment variables are missing.")

# Define an embedding function
class SentenceTransformerEmbeddings:
    def __init__(self, model):
        self.model = model

    def embed_documents(self, documents):
        return self.model.encode(documents, show_progress_bar=True).tolist()

    def embed_query(self, query):
        return self.model.encode(query, show_progress_bar=True).tolist()

    def __call__(self, text):
        return self.model.encode(text, show_progress_bar=True).tolist()

embedding_function = SentenceTransformerEmbeddings(model)

# Define the ChunkingHelper class
class ChunkingHelper:
    def __init__(self, file_path: str, chunking_method: str, embedding_function):
        self.file_path = file_path
        self.chunking_method = chunking_method
        self.embedding_function = embedding_function
        self.loader = PyMuPDFLoader(file_path=file_path)
        self.docs = self.loader.load()
        self.chunks = []

    def chunk_document(self):
        if self.chunking_method == 'standard_deviation':
            text_splitter = SemanticChunker(self.embedding_function, breakpoint_threshold_type='standard_deviation')
        elif self.chunking_method == 'recursive_character':
            text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
        else:
            raise ValueError(f"Unknown chunking method: {self.chunking_method}")
        
        self.chunks = text_splitter.split_documents(self.docs)

    def get_chunks(self):
        if not self.chunks:
            self.chunk_document()
        return self.chunks

# Paths to the directories containing the PDF files
directories = {
    'chroma': r"E:\RepoFisiereMulte\FisiereTotal",
    'faiss': r"E:\RepoFisiereMulte\FisiereTotal",
    'pinecone': r"E:\RepoFisiereMulte\FisiereTotal"
}

all_texts = {key: [] for key in directories}
all_metadatas = {key: [] for key in directories}

def process_pdfs(directory_path, all_texts, all_metadatas, chunking_method):
    for filename in os.listdir(directory_path):
        if filename.endswith(".pdf"):
            file_path = os.path.join(directory_path, filename)
            chunking_helper = ChunkingHelper(file_path, chunking_method, embedding_function)
            splits = chunking_helper.get_chunks()
            for i, split in enumerate(splits):
                split.metadata['chunk_id'] = i
                split.metadata['file'] = filename
                all_texts.append(split)
                all_metadatas.append(split.metadata)

# Process PDFs with the specified chunking method
chunking_method = 'standard_deviation'  # or 'recursive_character'
for key, directory in directories.items():
    process_pdfs(directory, all_texts[key], all_metadatas[key], chunking_method)

# Define Azure GPT-4 model
llm = AzureChatOpenAI(
    openai_api_version=azure_api_version,
    azure_deployment=azure_deployment,
    api_key=azure_api_key,
    azure_endpoint=azure_endpoint
)

def create_vector_store(store_type, documents, metadatas, index_name=None):
    if store_type == 'chroma':
        return Chroma.from_texts(texts=[doc.page_content for doc in documents], embedding=embedding_function, metadatas=metadatas)
    elif store_type == 'faiss':
        return FAISS.from_texts(texts=[doc.page_content for doc in documents], embedding=embedding_function, metadatas=metadatas)
    elif store_type == 'pinecone':
        return PineconeVectorStore.from_documents(documents, index_name=index_name, embedding=embedding_function)

vector_stores = {key: create_vector_store(key, all_texts[key], all_metadatas[key], index_name) for key in directories}

def create_retriever(store, llm):
    return MultiQueryRetriever.from_llm(retriever=store.as_retriever(), llm=llm)

retrievers = {key: create_retriever(store, llm) for key, store in vector_stores.items()}

contextualize_q_system_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is."""
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

history_aware_retrievers = {key: create_history_aware_retriever(llm, retriever, contextualize_q_prompt)
                            for key, retriever in retrievers.items()}

qa_system_prompt = """You are an assistant for question-answering tasks. \
Use the following pieces of retrieved context to answer the question. \
If you don't know the answer, just say that you don't know. \
Use three sentences maximum and keep the answer concise.\

{context}"""
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

rag_chains = {key: create_retrieval_chain(history_aware_retriever, question_answer_chain)
              for key, history_aware_retriever in history_aware_retrievers.items()}

store = {}

def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]

def create_conversational_rag_chain(rag_chain):
    return RunnableWithMessageHistory(
        rag_chain,
        get_session_history,
        input_messages_key="input",
        history_messages_key="chat_history",
        output_messages_key="answer",
    )

conversational_rag_chains = {key: create_conversational_rag_chain(rag_chain)
                             for key, rag_chain in rag_chains.items()}

def get_aggregated_answer_and_sources(input_question, session_id="abc123"):
    def is_invalid_answer(answer):
        return "I don't" in answer or "I'm sorry" in answer

    def get_response_and_sources(response, all_texts):
        answer = response["answer"]
        context = response["context"]
        used_chunks = [doc for doc in all_texts if any(chunk.page_content in doc.page_content for chunk in context)]
        sources = {(chunk.metadata['file'], chunk.metadata.get('page', 'Unknown')) for chunk in used_chunks}
        return answer, context, sources

    all_responses = {}
    all_contexts = []
    all_sources = set()

    for key in ['chroma', 'faiss', 'pinecone']:
        response = conversational_rag_chains[key].invoke(
            {"input": input_question},
            config={"configurable": {"session_id": session_id}},
        )
        if not is_invalid_answer(response["answer"]):
            answer, context, sources = get_response_and_sources(response, all_texts[key])
            all_responses[key] = answer
            all_contexts.extend(context)
            all_sources.update(sources)

    if not all_responses:
        return "I don't know the answer.", {}, [], set()

    # Combine all the valid responses into one, avoiding duplicates
    unique_answer_parts = set()
    for answer in all_responses.values():
        unique_answer_parts.update(answer.split(". "))

    aggregated_answer = ". ".join(sorted(unique_answer_parts))

    return aggregated_answer, all_responses, all_contexts, all_sources

def format_context(context: List[Any]) -> str:
    formatted_context = "\n\n".join([
        f"File: {doc.metadata['file']}, Page {doc.metadata.get('page', 'Unknown')}: {doc.page_content}"
        for doc in context
    ])
    return formatted_context



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/3 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/3 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/3 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/6 [00:00<?, ?it/s]

Batches:   0%|          | 0/6 [00:00<?, ?it/s]

`embedding_function` is expected to be an Embeddings object, support for passing in a function will soon be removed.


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

In [4]:
# Usage Example
question = "What are the primary tasks performed by broadcast technicians during radio and television broadcasts?"
aggregated_answer, all_responses, context, sources = get_aggregated_answer_and_sources(question)

# Format the output in a readable way
formatted_context = "\n\n".join([f"File: {doc.metadata['file']}, Page {doc.metadata.get('page', 'Unknown')}: {doc.page_content}" for doc in context])

output = f"""
Final Aggregated Answer: {aggregated_answer}

Intermediate Answers:
Chroma: {all_responses.get('chroma', 'No valid answer found')}
FAISS: {all_responses.get('faiss', 'No valid answer found')}
Pinecone: {all_responses.get('pinecone', 'No valid answer found')}

Context used for answer:
{formatted_context}
"""

print(output)


Parent run 9d5528e8-7a7b-4479-931d-c118d8b22c4a not found for run 16635682-29c1-44f1-9bd1-e292b76d5d17. Treating as a root run.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Parent run 97841829-aebe-412b-8b66-860bb9974779 not found for run fa5dc2f1-1539-416e-bc69-f6186ada7494. Treating as a root run.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Parent run fe8e1176-40da-4fa1-9f27-e2fe86a8b698 not found for run fae4e070-40bb-4895-b5eb-efea16d94d7b. Treating as a root run.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Final Aggregated Answer: Broadcast technicians perform tasks such as controlling audio equipment to regulate volume and sound quality, monitoring the strength, clarity, and reliability of incoming and outgoing signals and adjusting equipment as necessary, regulating the fidelity, brightness, and contrast of video transmissions, and ensuring that programs are airing correctly by observing monitors and communicating with station personnel. During radio and television broadcasts, broadcast technicians primarily maintain programming logs as required by station management and the Federal Communications Commission, control audio equipment to regulate volume and sound quality, monitor the strength, clarity, and reliability of incoming and outgoing signals and adjust equipment to maintain quality broadcasts, regulate the fidelity, brightness, and contrast of video transmissions, observe monitors to ensure programs are airing, preview scheduled programs for readiness, select sources for progra

In [5]:
# Use the get_aggregated_answer_and_sources function with the specified question
question = "What are the specific authorities and responsibilities of the Inspector General within the Department of Justice?"
aggregated_answer, all_responses, context, sources = get_aggregated_answer_and_sources(question)

# Format the output in a readable way
formatted_context = "\n\n".join([f"File: {doc.metadata['file']}, Page {doc.metadata.get('page', 'Unknown')}: {doc.page_content}" for doc in context])

output = f"""
Final Aggregated Answer: {aggregated_answer}

Intermediate Answers:
Chroma: {all_responses.get('chroma', 'No valid answer found')}
FAISS: {all_responses.get('faiss', 'No valid answer found')}
Pinecone: {all_responses.get('pinecone', 'No valid answer found')}

Context used for answer:
{formatted_context}
"""

print(output)

Parent run e7e52187-e4fa-41df-b8d9-7770f3c2f461 not found for run f08dcda8-f168-4ac7-8bf3-52f479b70115. Treating as a root run.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Parent run c52bb340-accb-4609-ae55-ddb71aeeae89 not found for run a0056d74-5b57-4c2d-8893-d1d126b67ad7. Treating as a root run.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Parent run 78d5d08a-394e-43b2-a2c3-2bc7a3b47d99 not found for run 6e1c3e20-7fa8-42f3-b7e9-d856741eda6c. Treating as a root run.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Final Aggregated Answer: Conduct investigations and issue reports relating to criminal wrongdoing and administrative misconduct of DOJ employees, and administration of the department's programs and operations.
2. Conducting investigations and issuing reports relating to criminal wrongdoing and administrative misconduct of Department employees and the administration of the programs and operations of the DOJ.
2. Ensure that no reprisals are taken against any employee for making a complaint or disclosing information to the OIG, unless the complaint or information was disclosed with knowledge of its falsity or with willful disregard for its truth or falsity.

These authorities and responsibilities enable the Inspector General to act as an independent and objective unit within the DOJ to conduct audits, inspections, and investigations aimed at promoting efficiency, effectiveness, and accountability.. Entering into contracts and other arrangements for audits, studies, analyses, and other se

In [6]:
# Use the get_aggregated_answer_and_sources function with the specified question
question = "What were the forecasts for U.S. crude oil production in the fourth quarter of 2016?"
aggregated_answer, all_responses, context, sources = get_aggregated_answer_and_sources(question)

# Format the output in a readable way
formatted_context = "\n\n".join([f"File: {doc.metadata['file']}, Page {doc.metadata.get('page', 'Unknown')}: {doc.page_content}" for doc in context])

output = f"""
Final Aggregated Answer: {aggregated_answer}

Intermediate Answers:
Chroma: {all_responses.get('chroma', 'No valid answer found')}
FAISS: {all_responses.get('faiss', 'No valid answer found')}
Pinecone: {all_responses.get('pinecone', 'No valid answer found')}

Context used for answer:
{formatted_context}
"""

print(output)


Parent run f597be77-62f4-421b-bb68-1b7da7624962 not found for run 631b5f53-334d-421c-8baf-e7f1663dbafe. Treating as a root run.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Parent run 0fa71677-4d72-40d0-864a-fe7f06464cbb not found for run 59b0a21d-63ec-46e0-b695-adc29b93fe78. Treating as a root run.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Parent run 068da267-8020-44c5-8410-791b3a78b714 not found for run d05250ae-20a3-4900-80a6-27d25c4ef5a9. Treating as a root run.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Final Aggregated Answer: The forecasts for U.S. crude oil from the previous to the current forecast for the fourth quarter of 2016.. crude oil production from the previous forecast to the current forecast for the fourth quarter of 2016.. crude oil production in the fourth quarter of 2016 were as follows:

- According to the current forecast (as of December 6, 2016): 8.94 million barrels per day.
- According to the previous forecast (as of November 8, 2016): 8.87 million barrels per day.

This indicates a slight increase in the forecasted production from the previous to the current forecast for that period.. crude oil production in the fourth quarter of 2016 were:

- According to the current forecast (as of December 6, 2016): 8.94 million barrels per day.
- According to the previous forecast (as of November 8, 2016): 8.87 million barrels per day.

This shows a slight increase in the forecasted U.S. crude oil production in the fourth quarter of 2016 were:

- Current forecast: 8.94 milli

In [7]:
# Use the get_aggregated_answer_and_sources function with the specified question
question = "How did the number of Chapter 11 filings in Connecticut change between 1994 and 2004?"
aggregated_answer, all_responses, context, sources = get_aggregated_answer_and_sources(question)

# Format the output in a readable way
formatted_context = "\n\n".join([f"File: {doc.metadata['file']}, Page {doc.metadata.get('page', 'Unknown')}: {doc.page_content}" for doc in context])

output = f"""
Final Aggregated Answer: {aggregated_answer}

Intermediate Answers:
Chroma: {all_responses.get('chroma', 'No valid answer found')}
FAISS: {all_responses.get('faiss', 'No valid answer found')}
Pinecone: {all_responses.get('pinecone', 'No valid answer found')}

Context used for answer:
{formatted_context}
"""

print(output)


Parent run ef5074a1-1b63-4024-9e84-35a56e8ddde2 not found for run 8314b3c7-7745-48f7-8cd3-fc1ec28b7ef8. Treating as a root run.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Parent run 49492cba-3077-4419-83b7-79b87206ebd9 not found for run 1f2bac12-8775-43df-b690-551307b96cee. Treating as a root run.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Parent run af4e45ea-e4b1-407b-b5d4-96b9cdf2d106 not found for run a9d3cee9-03a0-48a2-a707-0fc1b9b4745f. Treating as a root run.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Final Aggregated Answer: Based on the retrieved context indicating Chapter 11 filings in Connecticut, there was a general downward trend in the number of filings from 1994 to 2004. Between 1994 and 2004, the number of Chapter 11 filings in Connecticut decreased. If you have access to the data or can provide the numbers for those years, I could assist in analyzing the change.. In 1994, there were 216 filings, and by 2004 the number had gone down to 84 filings. In 1994, there were 216 filings, and by 2004, the number had decreased to 84 filings. The retrieved context does not provide specific numerical data on the number of Chapter 11 filings in Connecticut between 1994 and 2004. Therefore, I am unable to determine the change in Chapter 11 filings in Connecticut during that period based on the information provided. This represents a decrease of 132 filings over the 10-year period.. This shows a significant reduction in Chapter 11 filings over that ten-year period.

Intermediate Answers:

In [8]:
# Use the get_aggregated_answer_and_sources function with the specified question
question = "What were the immediate actions taken by Nancy Powell and other volunteers following the Cosco Busan oil spill in San Francisco Bay?"
aggregated_answer, all_responses, context, sources = get_aggregated_answer_and_sources(question)

# Format the output in a readable way
formatted_context = "\n\n".join([f"File: {doc.metadata['file']}, Page {doc.metadata.get('page', 'Unknown')}: {doc.page_content}" for doc in context])

output = f"""
Final Aggregated Answer: {aggregated_answer}

Intermediate Answers:
Chroma: {all_responses.get('chroma', 'No valid answer found')}
FAISS: {all_responses.get('faiss', 'No valid answer found')}
Pinecone: {all_responses.get('pinecone', 'No valid answer found')}

Context used for answer:
{formatted_context}
"""

print(output)

Parent run 5893feab-ccec-4034-b4c8-9b43b0cf8e9e not found for run 59746d31-7d67-4054-97b3-edd717f82cdd. Treating as a root run.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Parent run 54eb2435-557d-437d-b04f-699954b5cd8f not found for run a6436cd4-abbf-406e-a1fe-88460572a721. Treating as a root run.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Parent run 0ab17935-3e1b-4fce-9537-386f8e58019e not found for run 7b588928-ad30-489b-b459-e43685b0997d. Treating as a root run.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Final Aggregated Answer: At Hoffman Marsh, they found oiled birds being chased by dogs with no official response, so they took charge of the situation by organizing bird-rescue networks, closing off the shoreline, and reporting oiled birds along with catching and transporting them.

Their grassroots response supplemented official efforts and showcased the community's dedication to wildlife affected by the spill.. At Hoffman Marsh, they took charge of the situation by organizing a network for bird rescue, including activities such as closing off the shoreline, reporting oiled birds, and catching and transporting birds.

These immediate and self-initiated actions by Powell and her fellow volunteers played a critical role in the response to the environmental crisis.. Expanding their efforts to Albany, where they rescued additional birds and transported birds caught by homeless people.
6. Following the Cosco Busan oil spill in San Francisco Bay, Nancy Powell and other volunteers took imme