In [1]:
import os
import pickle
import time
import streamlit as st
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain_community.document_loaders import UnstructuredURLLoader 
from langchain import OpenAI
from langchain.vectorstores import FAISS

In [2]:
loader=UnstructuredURLLoader(urls=[
    'https://www.cbsl.gov.lk/en/financial-system/financial-markets/government-securities-market',
    'https://www.sc.com/ke/investments/learn/understanding-bonds-for-beginners/',
    'https://www.researchgate.net/publication/275543195_Treasury_Bills_and_Central_Bank_Bills_for_Monetary_Policy'
])
data=loader.load()
len(data)

3

In [3]:
data[0]

Document(metadata={'source': 'https://www.cbsl.gov.lk/en/financial-system/financial-markets/government-securities-market'}, page_content="Skip to main content\n\nEnglish\n\nසිංහල\n\nதமிழ்\n\nNavigation\n\nABOUT\n\nAbout the Bank\n\nOverview\n\nHistory\n\nVision, Mission & Values\n\nObjectives\n\nFunctions\n\nOrganisational Structure\n\nCorporate Structure\n\nPrincipal Officers\n\nDepartments\n\nBank Premises\n\nBank Headquarters\n\nRegional Offices\n\nLibrary & Information Center\n\nCentre For Banking Studies\n\nThe Economic History Museum of Sri Lanka\n\nExter Report\n\nMONETARY POLICY\n\nAbout Monetary Policy\n\nOverview\n\nEconomic and Price Stability\n\nMonetary Policy Framework\n\nInstruments & Implementation\n\nPolicy Rates and Open Market Operations\n\nStatutory Reserve Requirement\n\nOther Policy Instruments\n\nMonetary Policy Committee\n\nStakeholder Engagement Committee\n\nMonetary Policy Communication\n\nOverview\n\nMonetary Policy Announcement Dates\n\nMonetary Policy Advan

In [4]:
text_splitter=RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

docs=text_splitter.split_documents(data)
len(docs)

30

In [5]:
docs[1]

Document(metadata={'source': 'https://www.cbsl.gov.lk/en/financial-system/financial-markets/government-securities-market'}, page_content='Monetary Policy Review\n\nMonetary Policy Review - No. 5 of 2024\n\nMonetary Policy Review - No. 4 of 2024\n\nMonetary Policy Review - No. 3 of 2024\n\nMonetary Policy Review - No. 2 of 2024\n\nMonetary Policy Review - No. 1 of 2024\n\nmore\n\nFINANCIAL SYSTEM\n\nFinancial System Stability\n\nOverview\n\nMajor Functions\n\nBanking Sector\n\nNon Bank Finance and Leasing Sector\n\nPrimary Dealers\n\nMicrofinance Sector\n\nMoney Broking Industry\n\nMacroprudential Surveillance\n\nSustainable Finance\n\nFinancial Markets\n\nFinancial Markets Overview\n\nInterbank Call money Market\n\nDomestic Foreign Exchange Market\n\nGovernment Securities Market\n\nCorporate Debt Securities Market\n\nEquity Market\n\nDevelopment Finance and Access to Finance\n\nFinancial Infrastructure\n\nPayment and Settlement Systems\n\nCredit Information\n\nLaws and Regulations\n\nI

In [6]:
# Open a file in write mode
with open("documents_output.txt", "w", encoding="utf-8") as file:
    for i, doc in enumerate(docs):
        file.write(f"Document {i+1}:\n")
        file.write(doc.page_content)  # Writing the page content
        file.write("\n\n")  # Add some spacing between documents


In [7]:
import numpy as np
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('all-MiniLM-L6-v2')

# Embed the documents 
embeddings = model.encode([d.page_content for d in docs])



  from tqdm.autonotebook import tqdm, trange





modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]



1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [8]:
import faiss
#get the dimension of the embeddings
d=embeddings.shape[1]
index = faiss.IndexFlatL2(d)
index.add(embeddings)
print(f"Total sentences indexed: {index.ntotal}")

Total sentences indexed: 30


In [9]:
# Define a query sentence
query_sentence = "How to invest treasury bonds?"
query_embedding = model.encode([query_sentence])

# Perform the search
k = 1  # Number of nearest neighbors to retrieve
distances, indices = index.search(query_embedding, k)

# Display the results
print(f"Query: {query_sentence}")

print("Most similar sentences:")
for i, idx in enumerate(indices[0]):
    print(f"{i + 1}: {docs[idx].page_content} (Distance: {distances[0][i]})")

Query: How to invest treasury bonds?
Most similar sentences:
1: Thank you for visiting www.sc.com/ke

Proceed

Cultivate an Understanding of Bonds – Beginners Guide

Managing your wealth well is like tending a beautiful formal garden – you need to start with good soil and a good set of tools. Just as good soil has the proper fertility to nourish a plant, having the right foundation in financial literacy should empower you to potentially cultivate a successful investment portfolio. Cultivate an Understanding of Bonds is part of our financial education series to help educate you on the fundamentals of investing as you tend your very own financial garden.

What is a Bond?

If you are looking to build up a well-diversified portfolio, you will usually be advised to include both stocks and bonds among your investments. While stocks may offer you the potential for capital appreciation, bonds may provide a steady stream of investment income, and play an important role of potentially lowering y

In [10]:
#%pip install weaviate-client tiktoken pypdf rapidocr-onnxruntime --user

In [11]:
# WEAVIATE_CLUSTER="https://3dffdprkqpwzmkaa1b0q.c0.asia-southeast1.gcp.weaviate.cloud"
# WEAVIATE_API_KEY="HtsEnlclbFq7D3Az18lnl6QOqehQHxV2T5fG"


In [12]:
# from langchain.vectorstores import Weaviate
# import weaviate

# client = weaviate.Client(WEAVIATE_CLUSTER,weaviate.AuthApiKey(WEAVIATE_API_KEY))

In [13]:
# import weaviate
# import os
# from weaviate.classes.init import AdditionalConfig, Timeout, Auth
# # Set these environment variables
# URL = "https://3dffdprkqpwzmkaa1b0q.c0.asia-southeast1.gcp.weaviate.cloud"
# APIKEY = "HtsEnlclbFq7D3Az18lnl6QOqehQHxV2T5fG"
  
# # Connect to a WCS instance
# client = weaviate.connect_to_weaviate_cloud(
#     cluster_url=URL,
#     auth_credentials=Auth.api_key(APIKEY),
#     additional_config=AdditionalConfig(timeout=Timeout(init=10)),
# )

# # Check connection
# client.is_ready()

# import weaviate
# import os

# # Instantiate the client with the auth config
# client = weaviate.Client(
#     url=WEAVIATE_CLUSTER,  # Replace with your Weaviate endpoint
#     auth_client_secret=weaviate.auth.AuthApiKey(api_key=WEAVIATE_API_KEY),  # Replace with your Weaviate instance API key
# )

In [14]:
docs[0].page_content

'Skip to main content\n\nEnglish\n\nසිංහල\n\nதமிழ்\n\nNavigation\n\nABOUT\n\nAbout the Bank\n\nOverview\n\nHistory\n\nVision, Mission & Values\n\nObjectives\n\nFunctions\n\nOrganisational Structure\n\nCorporate Structure\n\nPrincipal Officers\n\nDepartments\n\nBank Premises\n\nBank Headquarters\n\nRegional Offices\n\nLibrary & Information Center\n\nCentre For Banking Studies\n\nThe Economic History Museum of Sri Lanka\n\nExter Report\n\nMONETARY POLICY\n\nAbout Monetary Policy\n\nOverview\n\nEconomic and Price Stability\n\nMonetary Policy Framework\n\nInstruments & Implementation\n\nPolicy Rates and Open Market Operations\n\nStatutory Reserve Requirement\n\nOther Policy Instruments\n\nMonetary Policy Committee\n\nStakeholder Engagement Committee\n\nMonetary Policy Communication\n\nOverview\n\nMonetary Policy Announcement Dates\n\nMonetary Policy Advance Release Calendar\n\nMonetary Policy Review\n\nMonetary Policy Review - No. 5 of 2024\n\nMonetary Policy Review - No. 4 of 2024\n\nMone

In [15]:
# %pip install --upgrade weaviate-client


In [16]:
# schema = {
#     "class": "Document",
#     "description": "A document with an embedding vector",
#     "properties": [
#         {
#             "name": "text",
#             "dataType": ["text"],
#             "description": "The actual document or chunk of text"
#         },
#         {
#             "name": "embedding",
#             "dataType": ["number[]"],
#             "description": "The embedding vector for the text"
#         }
#     ]
# }

# # Add schema to Weaviate
# client.schema.create_class(schema)


In [17]:
# vector_db = Weaviate.from_documents(
#     documents=docs,
#     embedding=None,  # Since you already generated embeddings
#     client=client,
#     by_text=True  # Since you are not embedding text within the function
# )

In [18]:
# for i, text in enumerate([doc.page_content for doc in docs]):
#     client.data_object.create(
#         {
#             "text": text,
#             "embedding": embeddings[i].tolist(),
#         },
#         "YourClassNameHere"  # Replace with your Weaviate class name
#     )

In [19]:
# query_sentence="what is treasury bills"
# query_embedding = model.encode([query_sentence])[0]

# results = vector_db.similarity_search(
    
#     query=query_sentence,
#     k=2,
#     query_vector=query_embedding,
# )

In [20]:
from langchain.prompts import ChatPromptTemplate

template="""You are assistant for a financial institution.Use the following information to answer the questions.If you don't know the answer, just say that you don't know.You 10 sentences maximum to answer each question and keep the answer concise.
Question:{question}
Context:{context}
Answer:"""

In [21]:
prompt=ChatPromptTemplate.from_template(template)

In [22]:
prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are assistant for a financial institution.Use the following information to answer the questions.If you don't know the answer, just say that you don't know.You 10 sentences maximum to answer each question and keep the answer concise.\nQuestion:{question}\nContext:{context}\nAnswer:"), additional_kwargs={})])

In [23]:
from langchain import HuggingFaceHub
llm_model=HuggingFaceHub(
    huggingfacehub_api_token="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",
    repo_id='mistralai/Mistral-7B-Instruct-v0.1',
    model_kwargs={"temperature":1,'max_length': 180}
)

  llm_model=HuggingFaceHub(


In [24]:
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser

In [25]:
output_parser=StrOutputParser()

In [26]:
# Define a retriever function
# def retrieve_from_faiss(query_sentence, k=2):
#     query_embedding = model.encode([query_sentence])
#     distances, indices = index.search(query_embedding, k)
#     return [docs[idx].page_content for idx in indices[0]]

# # Retrieve context from FAISS
# retriever = retrieve_from_faiss("what is treasury bills")
# print(retriever)


In [27]:
from langchain_huggingface import HuggingFaceEmbeddings
db = FAISS.from_documents(docs, 
                          HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2'))


# Connect query to FAISS index using a retriever
retriever = db.as_retriever(
    search_type="similarity",
    search_kwargs={'k': 4}
)



In [28]:
question = "Give me some advices to invest in treasury bills and bonds"
rag_chain = ({
    "context": retriever,
    "question": RunnablePassthrough()
} | prompt | llm_model | output_parser)

result=rag_chain.invoke(question)


In [29]:
print(result)

Human: You are assistant for a financial institution.Use the following information to answer the questions.If you don't know the answer, just say that you don't know.You 10 sentences maximum to answer each question and keep the answer concise.
Question:Give me some advices to invest in treasury bills and bonds
Context:[Document(metadata={'source': 'https://www.cbsl.gov.lk/en/financial-system/financial-markets/government-securities-market'}, page_content='Treasury bills, normally issued at a discount are repaid at face value at the maturity\n\nTreasury bonds carry bi-annual coupon payments and are repaid at face value at the maturity.\n\nTreasury bonds can be issued at discount, par or premium\n\nMarket determined yield rates prevail\n\nTradable in the secondary market\n\nIssued in scripless form\n\nA collateral for short-term fund raising\n\nBenefits of Investing in Treasury Bills and Bonds\n\nIt is considered as default-risk free, since issued by the sovereign government.\n\nSince T b

In [30]:
print(result.split('Answer:')[-1])



1. Treasury bills and bonds are considered as default-risk free, since issued by the sovereign government.
2. They are highly liquid money market instruments that are tradable in the secondary market.
3. All receipts of interest and maturity proceeds by foreign investors are fully repatriable.
4. It is possible to have joint investments.
5. Under the current law, Government Securities are not subject to withholding tax.


In [31]:
%pip install -qU langchain-huggingface

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [32]:
import pandas as pd

# Load the predicted future interest rates from a CSV file
predicted_interest_rates = pd.read_csv('predicted_future_interest_rates.csv')

# Display the first few rows of the dataframe
print(predicted_interest_rates.head())

# Format the data as a string to provide context to the LLM model
context = predicted_interest_rates.to_string(index=False)

# Define a new question for the LLM model
question = "Based on the predicted future interest rates, what is the best time for investors to invest in treasury bills and bonds? Please provide some advice."

# Create a new prompt with the context and question
new_prompt = ChatPromptTemplate.from_template(template)
formatted_prompt = new_prompt.format(context=context, question=question)

# Generate advice using the LLM model
advice_chain = ({
    "context": retriever,
    "question": RunnablePassthrough()
} | prompt | llm_model | output_parser)

advice_result = advice_chain.invoke(formatted_prompt)

# Print the generated advice
print(advice_result)

FileNotFoundError: [Errno 2] No such file or directory: 'predicted_future_interest_rates.csv'