In [1]:
from config import Config
from utils.scrapper_wikipedia import ScrapperWikipedia
from utils.chunking_models import ChunkingModel
from utils.index import Index
from typing import List, Dict
from utils.typedefs import ArticleInfo, Article, ArticleChunk, QueryScoresVectors, SearchResult, SearchResultsGroupedByDoc

In [2]:
config = Config()

In [3]:
def wikipedia_list_last_pt_articles(total_limit : int = config.SCRAPING_RESULTS_LIMIT) -> List[ArticleInfo]:
    return config.wiki_search.list_last_pt_articles(total_limit=total_limit)

def wikipedia_get_last_pt_articles(total_limit : int = config.SCRAPING_RESULTS_LIMIT,
                                   requests_per_second : int = config.SCRAPING_REQUESTS_PER_SECOND,
                                   processing_type : str = config.SCRAPING_TYPE,
                                   verbose = config.VERBOSE) -> List[Article]:
    return config.wiki_search.get_last_pt_articles(total_limit=total_limit, requests_per_second=requests_per_second,
                    processing_type = processing_type, verbose = verbose)

def wikipedia_get_article_chunks(articles : list[Article]):
    return config.wiki_search.get_articles_chunks(articles=articles, chunking_model=config.chunking_model)

def index_fit(docs : List[ArticleChunk]) -> Index:
    return config.index.fit(docs=docs)

def index_search(query : str, num_results : int = config.SEARCH_RESULTS_LIMIT) -> List[SearchResult]:
    return config.index.search(query, boost_dict={}, num_results=num_results)

def index_search_grouped_by_doc(query : str, num_results : int = config.SEARCH_RESULTS_LIMIT) -> List[SearchResultsGroupedByDoc]:
    return config.index.search_by_doc(query=query, boost_dict={}, num_results=num_results)

def index_refined_search(search_results : list[SearchResultsGroupedByDoc], positive : List[str], negative : List[str]) -> List[SearchResultsGroupedByDoc]:
    return config.index.refine_search(search_results = search_results,
                                      positive=positive,
                                      negative=negative,
                                      alpha=config.SEARCH_REFINED_ALPHA,
                                      beta=config.SEARCH_REFINED_BETA,
                                      gamma=config.SEARCH_REFINED_GAMMA)

def user_get_articles_chunks(query : str,
                           scrapping_total_limit : int = config.SCRAPING_RESULTS_LIMIT) -> List[ArticleChunk]:
    docs = wikipedia_get_last_pt_articles(total_limit=scrapping_total_limit)
    docs_chunks = wikipedia_get_article_chunks(docs)
    return docs_chunks

def user_get_query_results(query : str,
                           top_k : int  = config.SEARCH_RESULTS_LIMIT,
                           scrapping_total_limit : int = config.SCRAPING_RESULTS_LIMIT) -> List[SearchResultsGroupedByDoc]:
    docs = wikipedia_get_last_pt_articles(total_limit=scrapping_total_limit)
    docs_chunks = wikipedia_get_article_chunks(docs)
    index = index_fit(docs_chunks)
    search_docs = index_search_grouped_by_doc(query, top_k)
    return search_docs

In [None]:
user_get_query_results(query="Quero saber mais sobre hist√≥ria", top_k=10)