## Fetch Data

In [44]:

!pip install langchain langchain-community langchain-core transformers chromadb



In [45]:
%pip install --upgrade --quiet  langchain sentence_transformers

In [46]:
try:
    from urllib.request import urlopen
except ImportError:
    from urllib2 import urlopen

import certifi
import json
import pandas as pd


def get_jsonparsed_data(url, api_key, exchange):
  if exchange == "NSE":
    url = f"https://financialmodelingprep.com/api/v3/search?query={ticker}&exchange=NSE&apikey={api_key}"
  else:
    url = f"https://financialmodelingprep.com/api/v3/quote/{ticker}?apikey={api_key}"
  response = urlopen(url, cafile=certifi.where())
  data = response.read().decode("utf-8")
  return json.loads(data)

api_key="C1HRSweTniWdBuLmTTse9w8KpkoiouM5"
ticker = "MSFT"
exchange = "US"
eco_ind = pd.DataFrame(get_jsonparsed_data(ticker, api_key,exchange))
eco_ind


Unnamed: 0,symbol,name,price,changesPercentage,change,dayLow,dayHigh,yearHigh,yearLow,marketCap,...,exchange,volume,avgVolume,open,previousClose,eps,pe,earningsAnnouncement,sharesOutstanding,timestamp
0,MSFT,Microsoft Corporation,417,1.0003,4.13,411.06,417.4,468.35,362.9,3100344960000,...,NASDAQ,22745918,19543667,411.365,412.87,12.09,34.49,2025-01-29T12:00:00.000+0000,7434880000,1732309201


## Storing the Pre-Processed Data into CSV

In [47]:

def preprocess_economic_data(df):
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df['earningsAnnouncement'] = pd.to_datetime(df['earningsAnnouncement'])
    return df

preprocessed_economic_df = preprocess_economic_data(eco_ind)
preprocessed_economic_df

Unnamed: 0,symbol,name,price,changesPercentage,change,dayLow,dayHigh,yearHigh,yearLow,marketCap,...,exchange,volume,avgVolume,open,previousClose,eps,pe,earningsAnnouncement,sharesOutstanding,timestamp
0,MSFT,Microsoft Corporation,417,1.0003,4.13,411.06,417.4,468.35,362.9,3100344960000,...,NASDAQ,22745918,19543667,411.365,412.87,12.09,34.49,2025-01-29 12:00:00+00:00,7434880000,1970-01-01 00:00:01.732309201


In [48]:
preprocessed_economic_df.to_csv("eco_ind.csv")

In [49]:
from langchain_community.embeddings import HuggingFaceEmbeddings
hg_embeddings = HuggingFaceEmbeddings()

In [50]:

from langchain.document_loaders import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
loader_eco = CSVLoader('eco_ind.csv')
documents_eco = loader_eco.load()

# Get your splitter ready
text_splitter = RecursiveCharacterTextSplitter(chunk_size=50, chunk_overlap=5)

# Split your docs into texts
texts_eco = text_splitter.split_documents(documents_eco)

# Embeddings
embeddings = HuggingFaceEmbeddings()


## Building the Vector DB for RAG

In [51]:

from langchain.vectorstores import Chroma

persist_directory = 'docs/chroma_rag/'


In [52]:

economic_langchain_chroma = Chroma.from_documents(
    documents=texts_eco,
    collection_name="economic_data",
    embedding=hg_embeddings,
    persist_directory=persist_directory
)

In [53]:
question = "Microsoft(MSFT)"
docs_eco = economic_langchain_chroma.similarity_search(question,k=3)
print(docs_eco)

[Document(metadata={'row': 0, 'source': 'eco_ind.csv'}, page_content=': 0\nsymbol: MSFT\nname: Microsoft Corporation'), Document(metadata={'row': 0, 'source': 'eco_ind.csv'}, page_content=': 0\nsymbol: MSFT\nname: Microsoft Corporation'), Document(metadata={'row': 0, 'source': 'eco_ind.csv'}, page_content='yearLow: 362.9\nmarketCap: 3100344960000')]


## Building RAG Chain using Vector DB and LLM

In [54]:
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain_community.llms import HuggingFaceHub
from IPython.display import display, Markdown
import os
import warnings
warnings.filterwarnings('ignore')

os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"

llm = HuggingFaceHub(
    repo_id="tiiuae/falcon-7b-instruct",
    model_kwargs={"temperature": 0.1},
)

retriever_eco = economic_langchain_chroma.as_retriever(search_kwargs={"k":2})
qs="Provide the latest financial report for Microsoft (MSFT), including key metrics, economic data, and market news."
template = """You are a Financial Market Expert specializing in analyzing economic data, market trends, and corporate performance. Your task is to retrieve the financial data and latest market insights for the specified company and provide a structured financial report.

- **Context:** {context}
  (Include any relevant market data, news, or parameters affecting the company or its sector.)

- **Query:** {question}
  (This is the specific query about the company for which the financial report is required.)

Deliver the output in JSON format, ensuring it includes the following components:
1. **Company Overview:** Basic details and market position.
2. **Key Financial Metrics:** Revenue, net income, EPS, market cap, P/E ratio, etc.
3. **Market Performance:** Stock price trends, recent movements, and analysis.
4. **Relevant News and Insights:** Headlines or significant events affecting the company.
5. **Economic Context:** Macro trends or industry-specific influences impacting the company.

Ensure the JSON structure is clean, concise, and directly answers the query with relevant data.



"""

PROMPT = PromptTemplate(input_variables=["context","question"], template=template)
qa_with_sources = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff",chain_type_kwargs = {"prompt": PROMPT}, retriever=retriever_eco, return_source_documents=True)
llm_response = qa_with_sources({"query": qs})

In [55]:
print(llm_response['result'])

You are a Financial Market Expert specializing in analyzing economic data, market trends, and corporate performance. Your task is to retrieve the financial data and latest market insights for the specified company and provide a structured financial report.

- **Context:** : 0
symbol: MSFT
name: Microsoft Corporation

: 0
symbol: MSFT
name: Microsoft Corporation  
  (Include any relevant market data, news, or parameters affecting the company or its sector.)  

- **Query:** Provide the latest financial report for Microsoft (MSFT), including key metrics, economic data, and market news.  
  (This is the specific query about the company for which the financial report is required.)

Deliver the output in JSON format, ensuring it includes the following components:
1. **Company Overview:** Basic details and market position.
2. **Key Financial Metrics:** Revenue, net income, EPS, market cap, P/E ratio, etc.
3. **Market Performance:** Stock price trends, recent movements, and analysis.
4. **Rele