# RAG demo

In [2]:
%load_ext autoreload
%autoreload 2

import os
import sys
import pandas as pd

# Do this to enable importing modules
src_path = os.path.join(os.path.abspath(""), "..")
sys.path.insert(0, src_path)

from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv()) # read local .env file
import weaviate
import weaviate.classes as wvc
import os
import requests
import json
from llm_components.prompt_templates import QueryMetadata
from langchain_core.utils.function_calling import convert_to_openai_function
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
from langchain.globals import set_debug
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
from datetime import datetime
from langchain_openai import ChatOpenAI
from rag.query_expansion import QueryExpansion
from rag.query_meta_extractor import QueryMetaExtractor

## 1. Query metadata extraction

Given an user query, extract the metadata using OpenAI function calling.

In [8]:
openai_functions = [convert_to_openai_function(QueryMetadata)]
set_debug(False)
current_date = datetime.now().strftime(format="%Y-%m-%d")

template = ChatPromptTemplate.from_messages([
    ("system", "Today´s date is {current_date}."),
    ("human", "{user_query}"),
])

model = ChatOpenAI(temperature=0)
parser = JsonOutputFunctionsParser()
chain = template | model.bind(functions=openai_functions) | parser

query = "What is the price of btc today?"
filter_result = chain.invoke({"current_date":current_date,  "user_query": query})

## 2. Query Augmentation

Generating different queries can be beneficial to extend the search space of the initial query and find more relevant results.

In [5]:
user_query = "Regulatory news"
generated_queries = QueryExpansion().generate_response(user_query, to_expand_to_n=5)
generated_queries

['1. What are the latest updates in regulations?',
 '2. How can I stay informed about regulatory changes?',
 '3. Are there any recent developments in regulatory policies?',
 '4. What regulatory announcements have been made recently?',
 '5. Where can I find the most up-to-date regulatory news?']

The new queries generated above can be used to extend the search results. 

## 3. Filter + Vector Search with Weaviate

In [12]:
from weaviate.classes.query import Filter, MetadataQuery

weaviate_client = weaviate.connect_to_wcs(
    cluster_url=os.getenv("WCS_CLUSTER_URL"),
    skip_init_checks=True,
    auth_credentials=weaviate.auth.AuthApiKey(os.getenv("WCS_API_KEY")),
    headers={
        "X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"]
    }
)

def query_vectordb(generated_query, date, k=5):        
    articles = weaviate_client.collections.get("Articles")
    response = None

    try:
        if date != "":
            response = articles.query.near_text(
                query=generated_query,
                limit=k,
                filters=Filter.by_property("published_at").equal(date),
                return_metadata=MetadataQuery(distance=True)
            )
        else:
            
            response = articles.query.near_text(
                query=generated_query,
                limit=k,
                return_metadata=MetadataQuery(distance=True)
            )
    except Exception as e:
        weaviate_client.close()
        print("Unable to search", e)

    return [{**item.properties, "uuid": str(item.uuid), "distance": item.metadata.distance} for item in response.objects]

In [13]:
results = []
for query in generated_queries:
    results += query_vectordb(query, date="", k=5)

In [18]:
import utils
hits = utils.remove_duplicates(results)
hits = utils.sort_by_distance(hits)
hits = hits[:5]
hits[0]

{'title': ' Spot Ether ETFs are now officially legal in the US: Law Decoded  ',
 'published_at': '2024-05-27',
 'content': 'In a second landmark decision this year, the United States Securities and Exchange Commission has\xa0given the regulatory green light to spot Ether exchange-traded funds (ETFs) in the country. The SEC approved the 19b-4 filings from VanEck, BlackRock, Fidelity, Grayscale, Franklin Templeton, ARK 21Shares, Invesco Galaxy and Bitwise, approving the rule changes allowing spot Ether  ETH  $3,890  ETFs to be listed and traded on their respective exchanges.  ETH   $3,890  Unlike the spot Bitcoin  BTC  $68,460  ETFs approved via voting by a five-member committee including SEC Chair Gary Gensler, spot Ether ETFs were approved by the SEC’s Trading and Markets Division.  BTC   $68,460  Another major difference between the approval processes of the two crypto ETFs is that all 10 BTC ETFs started trading the day after their approval, as they also got S-1 form clearance. Spot 

## 4. Answer with context 

In [None]:
from rag.qa_context import QAContext
context = "/n".join([hit["content"] for hit in hits])

# Use the original query and provide the results of our search process.
result = QAContext().rag_query(user_query, context)
print("Answer", result)
print("Contex: \n", hits)