In [5]:
import os
from dotenv import load_dotenv
load_dotenv()
import time

os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
os.environ["HUGGINGFACE_API_KEY"] = os.getenv("HUGGINGFACE_API_KEY")
os.environ['TAVILY_API_KEY'] = os.getenv('TAVILY_API_KEY')

Web-scraper

In [6]:
import requests

def scrape_jina_ai(url: str):
  response = requests.get("https://r.jina.ai/" + url)
  return response.text

# Trial
# print(requests.get("https://s.jina.ai/https://www.w3schools.com/python/ref_string_format.asp").text)

Declare the web search

In [7]:
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_community.retrievers import TavilySearchAPIRetriever
web_search_tool = TavilySearchResults(k=1)

def search_function(question : str):
    # web_search_tool = TavilySearchAPIRetriever(k=5, search_depth="advanced", max_results=5)
    x = web_search_tool.invoke(question)
    url_response = ""
    for i in x:
        url_response += scrape_jina_ai(i['url'])
    return url_response

In [8]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq

prompt = ChatPromptTemplate.from_template(
    """Answer the question based only on the context provided. If the answer is not in the context,
    respond with 'I do not know'.
    You are forbidden from answering any question unrelated to Stock Exchange and the Stock Market.
    The context you are given is data scraped from the web about the user question related to stocks.

Context: {context}

Question: {question}

If date and time is required, consider the date and time as {dates} in %Y-%m-%d %H:%M:%S format."""
)

question = "What is Nvidia stock price today ?"
url_response = search_function(question)

In [9]:
import datetime

now = datetime.datetime.now()
dates = now.strftime("%Y-%m-%d %H:%M:%S")
dates

'2024-08-09 00:12:29'

In [7]:
# ### OPTIONAL TRIAL
# result = requests.get("https://s.jina.ai/" + question).text

In [10]:
url_response



In [11]:
## Data infestion

from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
## Data infestion

from langchain_community.vectorstores.utils import filter_complex_metadata
from langchain.schema.document import Document
from langchain_community.vectorstores import Chroma
def get_text_chunks_langchain(url_response):
    
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=200,separators="\n")
    docs = [Document(page_content=x) for x in text_splitter.split_text(url_response)]
    return docs

docs = get_text_chunks_langchain(url_response)
# docs = get_text_chunks_langchain(result)
documents = filter_complex_metadata(docs)
documents

 Document(page_content='[](https://www.nasdaq.com/ "Nasdaq Homepage Logo")\n\n*   [Nasdaq+](https://www.nasdaq.com/plus/dashboard)\n*   [Weekly Macro+](https://www.nasdaq.com/nasdaqmacroplus)\n*   [Scorecard](https://www.nasdaq.com/plus/scorecard)\n*   Market Activity\n    \n    *   [Market Activity \\->](https://www.nasdaq.com/market-activity)\n        \n        *   [Stocks](https://www.nasdaq.com/market-activity/stocks)\n        *   [Options](https://www.nasdaq.com/market-activity/quotes/option-chain)'),
 Document(page_content='*   [Stocks](https://www.nasdaq.com/market-activity/stocks)\n        *   [Options](https://www.nasdaq.com/market-activity/quotes/option-chain)\n        *   [ETFs](https://www.nasdaq.com/market-activity/etf)\n        *   [Mutual Funds](https://www.nasdaq.com/market-activity/mutual-fund)\n        *   [Indexes](https://www.nasdaq.com/market-activity/indexes)\n        *   [Commodities](https://www.nasdaq.com/market-activity/commodities)'),
 Document(page_content='

In [12]:
## Vector embeddings and Vector Store
from langchain_community.embeddings import HuggingFaceEmbeddings
ollama_emb = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

db = Chroma()
try:
    db._collection.delete(db.get()['ids'])
    db = Chroma.from_documents(documents, ollama_emb)
except:
    db = Chroma.from_documents(documents, ollama_emb)

  from tqdm.autonotebook import tqdm, trange


In [13]:
result = db.similarity_search(question)
result

[Document(page_content='On Thursday morning 08/08/2024 the NVIDIA Corp. share started trading at the price of $98.72. Compared to the closing price on Wednesday 08/07/2024 on BTT of $98.74, this is a drop of 0.02%. There are 24.60 B shares outstanding in NVIDIA Corp., which values the company at $2,556.82 B.'),
 Document(page_content='NVIDIA Stock Snapshot\n---------------------\n\n104.93\n\nBid\n\n52.00\n\nBid Size\n\n104.92\n\nAsk\n\n100.00\n\nAsk Size\n\n8/8/2024\n\nDate\n\n2:42 PM\n\nTime\n\n8.33 M\n\nVolume\n\n98.74\n\nPrev. Close\n\n98.72\n\nOpen\n\n2,563.85 B\n\nMarket Cap in USD\n\n24.60 B\n\nNumber of Shares\n\n97.33\n\nDay Low\n\n105.50\n\nDay High\n\n104.92\n\n39.23\n\n52 Week Low\n\n140.82\n\n52 Week High\n\n104.92\n\n0.02\n\nDividend in USD\n\n0.03\n\nDividend Yield\n\n51.14\n\nP/E Ratio\n\n96.02\n\nFree Float in %\n\n1.21\n\nEPS in USD\n\n1.74\n\nBook Value per Share in USD\n\n1.13'),
 Document(page_content='$403.40\n\nMSFT1.25%\n\nDirexion Daily Semiconductor Bull 3X Sha

In [14]:
chain = (
    prompt
    | ChatGroq(model="llama3-70b-8192", temperature=0)
    | StrOutputParser()
)

chain.invoke({"question": question, "context": result, "dates": dates})

'According to the context, the current price of NVIDIA stock is $98.72.'