# RAG demo

In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys
import pandas as pd

# Do this to enable importing modules
src_path = os.path.join(os.path.abspath(""), "..")
sys.path.insert(0, src_path)

from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv()) # read local .env file
import weaviate
import weaviate.classes as wvc
import os
import requests
import json
from llm_components.prompt_templates import QueryMetadata
from langchain_core.utils.function_calling import convert_to_openai_function
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
from langchain.globals import set_debug
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
from datetime import datetime
from langchain_openai import ChatOpenAI
from rag.query_expansion import QueryExpansion
from rag.query_meta_extractor import QueryMetaExtractor

## 1. Query metadata extraction

In [None]:
openai_functions = [convert_to_openai_function(QueryMetadata)]
set_debug(False)
current_date = datetime.now().strftime(format="%Y-%m-%d")

template = ChatPromptTemplate.from_messages([
    ("system", "Today´s date is {current_date}."),
    ("human", "{user_query}"),
])

model = ChatOpenAI(temperature=0)
parser = JsonOutputFunctionsParser()
chain = template | model.bind(functions=openai_functions) | parser

query = "What is the price of btc today?"
chain.invoke({"current_date":current_date,  "user_query": query})

## 2. Query Augmentation

In [3]:
from weaviate.classes.query import Filter
client = weaviate.connect_to_wcs(
    cluster_url="news-db-h6x724lk.weaviate.network",
    skip_init_checks=True,
    auth_credentials=weaviate.auth.AuthApiKey(os.getenv("WCS_API_KEY")),
    headers={
        "X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"]
    }
)

In [14]:
from weaviate.classes.query import Filter, MetadataQuery
articles = client.collections.get("Articles")

user_query = "News about the bitcoin ETFs" 

response = articles.query.near_text(
    query=user_query,
    limit=5,
    filters=Filter.by_property("published_at").equal("2024-05-28"),
    return_metadata=MetadataQuery(distance=True)
)

print(response.objects[0].properties)  # Inspect the first object

{'title': ' Grayscale’s Ethereum ETF could bleed $110M daily in first month: Kaiko ', 'published_at': '2024-05-28', 'content': 'Grayscale’s yet-to-launch spot Ether  ETH  $3,890  exchange-traded fund (ETF) could bleed an average of $110 million per day if it follows a similar pattern to its Grayscale Bitcoin Trust in the first month.\xa0  ETH   $3,890  The Grayscale Bitcoin Trust (GBTC) converted from a closed-end fund to an ETF on Jan. 11, which saw 23% of its assets under management at launch flow out in the first month — totaling $6.5 billion, Kaiko analysts wrote in a May 27 report. ETHE has an AUM of $11 billion. If it has “a similar magnitude of outflows” as GBTC, “this would amount to $110 million of average daily outflows or 30% of ETH’s average daily volume on Coinbase,” according to Kaiko. In the past three months, Grayscale’s ETHE traded at up to a 26% discount to its net asset value (NAV). Kaiko researchers noted that once it becomes a spot ETF, it’s “reasonable to expect” 

In [13]:
str(response.objects[0].uuid)

'ffd6a14c-9fa2-4a96-a58f-d01a221202f4'

In [17]:
bar = [{**item.properties, "uuid": str(item.uuid), "distance": item.metadata.distance} for item in response.objects]
bar

[{'title': ' Grayscale’s Ethereum ETF could bleed $110M daily in first month: Kaiko ',
  'published_at': '2024-05-28',
  'content': 'Grayscale’s yet-to-launch spot Ether  ETH  $3,890  exchange-traded fund (ETF) could bleed an average of $110 million per day if it follows a similar pattern to its Grayscale Bitcoin Trust in the first month.\xa0  ETH   $3,890  The Grayscale Bitcoin Trust (GBTC) converted from a closed-end fund to an ETF on Jan. 11, which saw 23% of its assets under management at launch flow out in the first month — totaling $6.5 billion, Kaiko analysts wrote in a May 27 report. ETHE has an AUM of $11 billion. If it has “a similar magnitude of outflows” as GBTC, “this would amount to $110 million of average daily outflows or 30% of ETH’s average daily volume on Coinbase,” according to Kaiko. In the past three months, Grayscale’s ETHE traded at up to a 26% discount to its net asset value (NAV). Kaiko researchers noted that once it becomes a spot ETF, it’s “reasonable to exp

In [None]:
foo = [[{**item.properties, "distance": item.metadata.distance} for item in response.objects],
 [{**item.properties, "distance": item.metadata.distance} for item in response.objects]]
from utils import flatten, sort_by_distance
bar = sort_by_distance(flatten(foo))
bar

# Retriever class



In [25]:
from rag.retriever import DataRetriever
set_debug(False)
user_query = "recent developments"
result, hits = DataRetriever(client, user_query).retrieve_top_k(4, 1)

[2m2024-05-29 12:45:39[0m [[32m[1minfo     [0m] [1mSuccessfully generated queries for search.[0m [36mcls[0m=[35mrag.retriever[0m [36mnum_queries[0m=[35m3[0m
[2m2024-05-29 12:45:41[0m [[32m[1minfo     [0m] [1mUnable to extract metadata    [0m [36mcls[0m=[35mrag.retriever[0m
[2m2024-05-29 12:45:41[0m [[32m[1minfo     [0m] [1mSearching without date        [0m [36mcls[0m=[35mrag.retriever[0m
[2m2024-05-29 12:45:41[0m [[32m[1minfo     [0m] [1mSearching without date        [0m [36mcls[0m=[35mrag.retriever[0m
[2m2024-05-29 12:45:41[0m [[32m[1minfo     [0m] [1mSearching without date        [0m [36mcls[0m=[35mrag.retriever[0m
[2m2024-05-29 12:45:41[0m [[32m[1minfo     [0m] [1mAll documents retrieved successfully.[0m [36mcls[0m=[35mrag.retriever[0m [36mnum_documents[0m=[35m3[0m


In [22]:
result

"Recent developments in the cryptocurrency space include the argument made by Nick van Eck, CEO of Agora, that stablecoin issuers offering yield-bearing elements are missing the core mission of stablecoins. He believes that focusing on utility, liquidity, and means of transaction is more important for reaching a wider audience. Van Eck also highlighted the potential issues with yield-bearing stablecoins being classified as security products in many countries, limiting customer reach and liquidity. Agora plans to launch the Agora digital dollar (AUSD) on Ethereum next month, aiming to focus solely on utility, liquidity, and means of transaction. The stablecoin market is currently dominated by USDT and USDC, but there is still room for newcomers like Agora to offer alternative models. Van Eck expects the stablecoin industry to expand to $3 trillion by 2030. Additionally, Grayscale's upcoming spot Ether ETF could see significant outflows if it follows a similar pattern to the Grayscale Bi

In [26]:
hits

[{'title': ' ‘Yield-bearing stables’ are not money or stablecoins: Agora’s van Eck ',
  'published_at': '2024-05-28',
  'content': "Stablecoin issuers that offer a yield-bearing element to give holders passive income are missing the point of a stablecoin’s core mission, argues Nick van Eck, CEO of stablecoin issuer firm Agora. Instead, these firms should focus on utility, liquidity and means of transaction in a way that reaches as many individuals and businesses as possible, the son of investment management maestro Jan van Eck explained in a May 27 Medium post. Yield-bearing stablecoins have offered a new dimension for decentralized finance users looking to earn interest, but van Eck says such products will likely be classed as security products in many countries and, therefore, restrict customer reach. “Not only does this deprive you of customers, it also deprives you of liquidity providers, vendors, and a higher utility ceiling. Your product is not freely tradeable,” said van Eck, ad