# Installing libraries

In [None]:
%pip install langchain-text-splitters langchain-community langgraph

Note: you may need to restart the kernel to use updated packages.


# Langsmith initialization
Setting up Langsmith for application tracing

In [2]:
import getpass
import os

# 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
os.environ['LANGSMITH_TRACING'] = 'true'
os.environ['LANGSMITH_API_KEY'] = getpass.getpass('Enter your Langsmith API Key:')

# Initializing Chat Model
Here, I am using groq to call for the llm chatbots

## Installing groq library from langchain

In [3]:
%pip install -qU "langchain[groq]" langchain

Note: you may need to restart the kernel to use updated packages.


In [3]:
from langchain.chat_models import init_chat_model

# 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
if not os.environ.get('GROQ_API_KEY') :
    os.environ['GROQ_API_KEY'] = getpass.getpass('Enter you groq API Key: ')
    
llm = init_chat_model(model = 'llama3-8b-8192', model_provider = 'groq')

# Initializing Embeddings Model
Here I am going for the Embedding model by hugging face which is 'all-MiniLM-L6-v2'

## Installing the hugging face library from langchain

In [5]:
pip install -qU langchain-huggingface

Note: you may need to restart the kernel to use updated packages.


In [6]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings_model = HuggingFaceEmbeddings(model_name = 'all-MiniLM-L6-v2')

  from .autonotebook import tqdm as notebook_tqdm


# Initializing Vector Store
From langchain core, currently I will use the in memory Vector Store. Later on will choose one of FAISS, Pinecone, ChromaDB, PGVector, ...

In [7]:
pip install -qU langchain-core

Note: you may need to restart the kernel to use updated packages.


In [8]:
from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embeddings_model)

# Creating the RAG Pipeline

## Importing the libraries

In [9]:
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import START, StateGraph
from typing import List, TypedDict

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [10]:
# Setting the USER_AGENT Environment Variable
if not os.environ.get('USER_AGENT'):
    os.environ['USER_AGENT'] = 'FinanceAgent-1.0'

## Web Scrapping the content

In [11]:
# Defining web scrapping content from the web
loader = WebBaseLoader(
    web_paths = (
        'https://www.cnbc.com/2025/02/27/stock-market-today-live-updates.html',
        'https://apnews.com/article/stocks-markets-rates-tariffs-ai-a80355efea6fd7454df04d68cf3a79a1'
    ),
    bs_kwargs = dict(
        parse_only = bs4.SoupStrainer(
            class_ = ('BrandPageWrapper-contentWrapper', 'Page-content')
        )
    ),
)

# Loading the scrapped content
docs = loader.load()
print(docs)

[Document(metadata={'source': 'https://www.cnbc.com/2025/02/27/stock-market-today-live-updates.html'}, page_content=''), Document(metadata={'source': 'https://apnews.com/article/stocks-markets-rates-tariffs-ai-a80355efea6fd7454df04d68cf3a79a1'}, page_content='\n\n\nBusiness\nStock market today: Wall Street rallies, and Dow jumps 600 to make a dreary February not so bad\n\n\n\n\n\nStock market today: Wall Street rallies, and Dow jumps 600 to make a dreary February not so bad\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n1 of 5\xa0|\xa0\nThe New York Stock Exchange is seen in New York, Wednesday, Feb. 26, 2025. (AP Photo/Seth Wenig)\nRead More\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n2 of 5\xa0|\xa0\nAn American flag is displayed on the outside of the New York Stock Exchange in New York, Wednesday, Feb. 26, 2025. (AP Photo/Seth Wenig)\nRead More\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n3 of 5\xa0|\xa0\nA person walks in front of an electronic stock board showin

## Splitting the loaded data from web scrapping

In [12]:
# Defining the splitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)

# Splitting the documents
all_splits = text_splitter.split_documents(docs)

## Indexing/Storing the chunks

In [13]:
_ = vector_store.add_documents(documents = all_splits)

## Defining the Prompt for question Answering

In [14]:
# Defining the prompt for question-answering use case
prompt = hub.pull('rlm/rag-prompt')

print(prompt)

input_variables=['context', 'question'] input_types={} partial_variables={} metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'} messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})]


## Defining the State Class

In [15]:
# States class comprises of 3 attributes :
# 1. question (Input)
# 2. context (Input)
# 3. answer (Output)
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str

## Defining application steps

In [16]:
def retrieve(state: State) -> dict:
    retrieved_docs = vector_store.similarity_search(state['question'])
    return {'context': retrieved_docs}

def generate(state: State) -> dict:
    docs_content = '\n\n'.join([doc.page_content for doc in state['context']])
    messages = prompt.invoke({'question': state['question'], 'context': docs_content})
    response = llm.invoke(messages)
    return {'answer': response.content}

## Compiling the RAG piepline

In [17]:
# Defining the state graph for the input question, retrieved documents and the answer
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
# Adding an edge from the START state to the retrieve state
graph_builder.add_edge(START, "retrieve")
# Compiling the graph
graph = graph_builder.compile()

## Testing the RAG Model

In [20]:
print(type(graph))

<class 'langgraph.graph.state.CompiledStateGraph'>


In [18]:
question = "What is happening in the stock market today?"
response = graph.invoke({'question': question})

print(response['answer'])

According to the context, stocks rose following an economic report released in the morning, with the S&P 500 jumping 1.6% and the Dow Jones Industrial Average rising 601 points.


In [31]:
question = "What is the sentiment of the market in 1 word?"
response = graph.invoke({'question': question})

print(response['answer'])

Bear.


In [32]:
question = 'What was the sentiment before today in the market in 1 word?'
response = graph.invoke({'question': question})

print(response['answer'])

Bearish.


In [33]:
question = 'What is Football and how is it played?'

response = graph.invoke({'question': question})
print(response['answer'])

I don't know. The provided context does not mention football or any information about how it is played. It appears to be a news article about the stock market and economics.
