In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys
from dotenv import load_dotenv

# Do this to enable importing modules
src_path = os.path.join(os.path.abspath(""), "..")
sys.path.insert(0, src_path)

env_path = os.path.join(src_path, "feature_pipeline/.env")

load_dotenv(env_path)  # take environment variables from feature pipeline subfolder .env.

True

In [10]:
from feature_pipeline.llm_components.prompt_templates import QueryExpansionTemplate

In [10]:
from langchain_community.utils.openai_functions import (
    convert_pydantic_to_openai_function,
)
from langchain_core.utils.function_calling import convert_to_openai_function
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field, validator
from langchain_openai import ChatOpenAI

## Function calling

In [18]:
class QueryMetadata(BaseModel):
    """Information to extract from the user query. 
    Dates should be transformed to yyyy-mm-dd and be relative to the given current date"""

    currency: str = Field(description="The cryptocurrency mentioned in the query.")
    date: str = Field(description="date from the text in the format yyyy-mm-dd")


openai_functions = [convert_to_openai_function(QueryMetadata)]
openai_functions

[{'name': 'QueryMetadata',
  'description': 'Information to extract from the user query. \nDates should be transformed to yyyy-mm-dd and be relative to the given current date',
  'parameters': {'type': 'object',
   'properties': {'currency': {'description': 'The cryptocurrency mentioned in the query.',
     'type': 'string'},
    'date': {'description': 'date from the text in the format yyyy-mm-dd',
     'type': 'string'}},
   'required': ['currency', 'date']}}]

In [None]:
from langchain.globals import set_debug
set_debug(True)
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
from datetime import datetime
current_date = datetime.now().strftime(format="%Y-%m-%d")

template = ChatPromptTemplate.from_messages([
    ("system", "Today´s date is {current_date}."),
    ("human", "{user_query}"),
])

model = ChatOpenAI(temperature=0)
parser = JsonOutputFunctionsParser()
chain = template | model.bind(functions=openai_functions) | parser

query = "What is the price of btc today?"
chain.invoke({"current_date":current_date,  "user_query": query})

In [80]:
# Same thing with functions

from openai import OpenAI

client = OpenAI(
  api_key=os.environ.get("OPENAI_API_KEY")
)

# Test the prompts with a small sample
messages = [
    {
        "role": "assistant",
        "content": f"Today´s date is {current_date}"
    },
    {
        "role": "user",
        "content": "What date was 5 days ago?"
    }
]

response = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=messages,
    functions=openai_functions
)
response.choices[0].message

## Filtering with extracted metadata

In [3]:
from feature_pipeline.rag.query_expanison import QueryExpansion
from feature_pipeline.rag.query_meta_extractor import QueryMetaExtractor

Init feature_pipeline


In [10]:
QueryExpansion().generate_response("Good news about ethereum today", 5)

['1. What are the latest positive developments in Ethereum as of today?',
 '2. Are there any recent optimistic reports or updates on Ethereum?',
 '3. Can you find any current favorable information or events related to Ethereum?',
 "4. What's the current upbeat news regarding Ethereum's progress or updates?",
 '5. Is there any encouraging and recent news about the Ethereum blockchain today?']

In [6]:
from langchain.globals import set_debug
set_debug(False)
QueryMetaExtractor().generate_response("What is the price of btc today?")

{'currency': 'btc', 'date': '2024-05-22'}

In [20]:
from feature_pipeline.rag.retriever import VectorRetriever

query = "Are there good news about bitcoin today?"
result = VectorRetriever(query).retrieve_top_k(k=10,to_expand_to_n_queries=5)

[2m2024-05-22 15:41:47[0m [[32m[1minfo     [0m] [1mSuccessfully generated queries for search.[0m [36mcls[0m=[35mfeature_pipeline.rag.retriever[0m [36mnum_queries[0m=[35m5[0m
[2m2024-05-22 15:41:48[0m [[32m[1minfo     [0m] [1mExtracted currency from query bitcoin[0m [36mcls[0m=[35mfeature_pipeline.rag.retriever[0m
[2m2024-05-22 15:41:48[0m [[32m[1minfo     [0m] [1mExtracted date from query 2024-05-22[0m [36mcls[0m=[35mfeature_pipeline.rag.retriever[0m
[2m2024-05-22 15:41:48[0m [[32m[1minfo     [0m] [1mAll documents retrieved successfully.[0m [36mcls[0m=[35mfeature_pipeline.rag.retriever[0m [36mnum_documents[0m=[35m15[0m


In [28]:
from feature_pipeline.rag.reranking import Reranker
hits = [item.payload["content"] for item in result]
reranked_results = Reranker().generate_response(query=query, passages=hits, keep_top_k=3)



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mYou are an AI language model assistant. Your task is to rerank passages related to a query
    based on their relevance. 
    The most relevant passages should be put at the beginning. 
    You should only pick at max 3 passages.
    The provided and reranked documents are separated by '
#next-document#
'.
    
    The following are passages related to this query: Are there good news about bitcoin today?.
    
    Passages: 
    the post top crypto events to watch next week : binance vs. nigeria, decision on ethereum etf, and crypto bill appeared first on coinpedia fintech newsas we approach the end of may, crypto traders are [ + 315 chars ]
#next-document#
ethereum news, bitcoin updates, and altcoin market. best crypto debit card [ url ] woofi... # bitcoin this story appeared on youtube. com,.
#next-document#
the post crypto market analysis : when will bitcoin and altcoins rally? appeared first on coinped

In [30]:
reranked_results[0]

'both bitcoin ( btc ) and ethereum appear to currently be the center of attention in the crypto market so far, with bitcoin recently hitting just above a 24 - hour high of $ 71, 650 mark, marking a 6. 4 % inc [ + 3493 chars ]'