In [None]:
# import os

# os.environ["LOG_LEVEL"] = "debug"


In [None]:
from more_itertools import flatten

from tt_bot.retrievals import WebRetrieval
from tt_bot.search_engines import GoogleSearchEngine
from tt_bot.text_encoders import OpenAIEncoder
from tt_bot.llm_components import LgChainQA
from tt_bot.web_extractors import HTMLExtractor, WikiExtractor

from tt_bot.logger import get_logger
from tt_bot.utils.json_data import get_pretty


In [None]:
logger = get_logger(__name__)


In [None]:
openai_qa = LgChainQA()
search_engine = GoogleSearchEngine()
text_encoder = OpenAIEncoder()
extractors = {
    "html": HTMLExtractor(),
    "wikipedia": WikiExtractor(),
}

web_retrieval = WebRetrieval(
    search_engine=search_engine,
    text_encoder=text_encoder,
    extractors=extractors,
)


In [None]:
query_text = "Quien es Lionel Messi?"
retrieval_responses = await web_retrieval.retrieve(query_text=query_text)

text_chunks = (rr.texts for rr in retrieval_responses)
text_chunks = list(flatten(text_chunks))

qa_answer = await openai_qa.async_generate(text_chunks, query_text)
logger.info(get_pretty(qa_answer.dict()))
