In [1]:
import os
import pickle
import time
import streamlit as st
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain_community.document_loaders import UnstructuredURLLoader 
from langchain import OpenAI
from langchain.vectorstores import FAISS

In [2]:
loader=UnstructuredURLLoader(urls=[
    'https://www.cbsl.gov.lk/en/financial-system/financial-markets/government-securities-market',
    'https://www.sc.com/ke/investments/learn/understanding-bonds-for-beginners/'
])
data=loader.load()
len(data)

2

In [3]:
data[0]

Document(metadata={'source': 'https://www.cbsl.gov.lk/en/financial-system/financial-markets/government-securities-market'}, page_content="Skip to main content\n\nEnglish\n\nසිංහල\n\nதமிழ்\n\nNavigation\n\nABOUT\n\nAbout the Bank\n\nOverview\n\nHistory\n\nVision, Mission & Values\n\nObjectives\n\nFunctions\n\nOrganisational Structure\n\nCorporate Structure\n\nPrincipal Officers\n\nDepartments\n\nBank Premises\n\nBank Headquarters\n\nRegional Offices\n\nLibrary & Information Center\n\nCentre For Banking Studies\n\nThe Economic History Museum of Sri Lanka\n\nExter Report\n\nMONETARY POLICY\n\nAbout Monetary Policy\n\nOverview\n\nEconomic and Price Stability\n\nMonetary Policy Framework\n\nInstruments & Implementation\n\nPolicy Rates and Open Market Operations\n\nStatutory Reserve Requirement\n\nOther Policy Instruments\n\nMonetary Policy Committee\n\nStakeholder Engagement Committee\n\nMonetary Policy Communication\n\nOverview\n\nMonetary Policy Announcement Dates\n\nMonetary Policy Advan

In [4]:
text_splitter=RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

docs=text_splitter.split_documents(data)
len(docs)

30

In [5]:
docs[1]

Document(metadata={'source': 'https://www.cbsl.gov.lk/en/financial-system/financial-markets/government-securities-market'}, page_content='Monetary Policy Review\n\nMonetary Policy Review - No. 5 of 2024\n\nMonetary Policy Review - No. 4 of 2024\n\nMonetary Policy Review - No. 3 of 2024\n\nMonetary Policy Review - No. 2 of 2024\n\nMonetary Policy Review - No. 1 of 2024\n\nmore\n\nFINANCIAL SYSTEM\n\nFinancial System Stability\n\nOverview\n\nMajor Functions\n\nBanking Sector\n\nNon Bank Finance and Leasing Sector\n\nPrimary Dealers\n\nMicrofinance Sector\n\nMoney Broking Industry\n\nMacroprudential Surveillance\n\nSustainable Finance\n\nFinancial Markets\n\nFinancial Markets Overview\n\nInterbank Call money Market\n\nDomestic Foreign Exchange Market\n\nGovernment Securities Market\n\nCorporate Debt Securities Market\n\nEquity Market\n\nDevelopment Finance and Access to Finance\n\nFinancial Infrastructure\n\nPayment and Settlement Systems\n\nCredit Information\n\nLaws and Regulations\n\nI

In [6]:
# Open a file in write mode
with open("documents_output.txt", "w", encoding="utf-8") as file:
    for i, doc in enumerate(docs):
        file.write(f"Document {i+1}:\n")
        file.write(doc.page_content)  # Writing the page content
        file.write("\n\n")  # Add some spacing between documents


In [7]:
import numpy as np
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('all-MiniLM-L6-v2')

# Embed the documents 
embeddings = model.encode([d.page_content for d in docs])



  from tqdm.autonotebook import tqdm, trange







In [8]:
import faiss
#get the dimension of the embeddings
d=embeddings.shape[1]
index = faiss.IndexFlatL2(d)
index.add(embeddings)
print(f"Total sentences indexed: {index.ntotal}")

Total sentences indexed: 30


In [9]:
# Define a query sentence
query_sentence = "How to invest treasury bonds?"
query_embedding = model.encode([query_sentence])

# Perform the search
k = 1  # Number of nearest neighbors to retrieve
distances, indices = index.search(query_embedding, k)

# Display the results
print(f"Query: {query_sentence}")

print("Most similar sentences:")
for i, idx in enumerate(indices[0]):
    print(f"{i + 1}: {docs[idx].page_content} (Distance: {distances[0][i]})")

Query: How to invest treasury bonds?
Most similar sentences:
1: Thank you for visiting www.sc.com/ke

Proceed

Cultivate an Understanding of Bonds – Beginners Guide

Managing your wealth well is like tending a beautiful formal garden – you need to start with good soil and a good set of tools. Just as good soil has the proper fertility to nourish a plant, having the right foundation in financial literacy should empower you to potentially cultivate a successful investment portfolio. Cultivate an Understanding of Bonds is part of our financial education series to help educate you on the fundamentals of investing as you tend your very own financial garden.

What is a Bond?

If you are looking to build up a well-diversified portfolio, you will usually be advised to include both stocks and bonds among your investments. While stocks may offer you the potential for capital appreciation, bonds may provide a steady stream of investment income, and play an important role of potentially lowering y

In [10]:
#%pip install weaviate-client tiktoken pypdf rapidocr-onnxruntime --user

In [11]:
WEAVIATE_CLUSTER="https://3dffdprkqpwzmkaa1b0q.c0.asia-southeast1.gcp.weaviate.cloud"
WEAVIATE_API_KEY="HtsEnlclbFq7D3Az18lnl6QOqehQHxV2T5fG"


In [12]:
from langchain.vectorstores import Weaviate
import weaviate

client = weaviate.Client(WEAVIATE_CLUSTER,weaviate.AuthApiKey(WEAVIATE_API_KEY))

Python client v3 `weaviate.Client(...)` connections and methods are deprecated and will
            be removed by 2024-11-30.

            Upgrade your code to use Python client v4 `weaviate.WeaviateClient` connections and methods.
                - For Python Client v4 usage, see: https://weaviate.io/developers/weaviate/client-libraries/python
                - For code migration, see: https://weaviate.io/developers/weaviate/client-libraries/python/v3_v4_migration

            If you have to use v3 code, install the v3 client and pin the v3 dependency in your requirements file: `weaviate-client>=3.26.7;<4.0.0`
  client = weaviate.Client(WEAVIATE_CLUSTER,weaviate.AuthApiKey(WEAVIATE_API_KEY))
            be removed by 2024-11-30.

            Upgrade your code to use Python client v4 `weaviate.WeaviateClient` connections and methods.
                - For Python Client v4 usage, see: https://weaviate.io/developers/weaviate/client-libraries/python
                - For code migration, s

In [13]:
# import weaviate
# import os
# from weaviate.classes.init import AdditionalConfig, Timeout, Auth
# # Set these environment variables
# URL = "https://3dffdprkqpwzmkaa1b0q.c0.asia-southeast1.gcp.weaviate.cloud"
# APIKEY = "HtsEnlclbFq7D3Az18lnl6QOqehQHxV2T5fG"
  
# # Connect to a WCS instance
# client = weaviate.connect_to_weaviate_cloud(
#     cluster_url=URL,
#     auth_credentials=Auth.api_key(APIKEY),
#     additional_config=AdditionalConfig(timeout=Timeout(init=10)),
# )

# # Check connection
# client.is_ready()

import weaviate
import os

# Instantiate the client with the auth config
client = weaviate.Client(
    url=WEAVIATE_CLUSTER,  # Replace with your Weaviate endpoint
    auth_client_secret=weaviate.auth.AuthApiKey(api_key=WEAVIATE_API_KEY),  # Replace with your Weaviate instance API key
)

Python client v3 `weaviate.Client(...)` connections and methods are deprecated and will
            be removed by 2024-11-30.

            Upgrade your code to use Python client v4 `weaviate.WeaviateClient` connections and methods.
                - For Python Client v4 usage, see: https://weaviate.io/developers/weaviate/client-libraries/python
                - For code migration, see: https://weaviate.io/developers/weaviate/client-libraries/python/v3_v4_migration

            If you have to use v3 code, install the v3 client and pin the v3 dependency in your requirements file: `weaviate-client>=3.26.7;<4.0.0`
  client = weaviate.Client(


In [14]:
docs[0].page_content

'Skip to main content\n\nEnglish\n\nසිංහල\n\nதமிழ்\n\nNavigation\n\nABOUT\n\nAbout the Bank\n\nOverview\n\nHistory\n\nVision, Mission & Values\n\nObjectives\n\nFunctions\n\nOrganisational Structure\n\nCorporate Structure\n\nPrincipal Officers\n\nDepartments\n\nBank Premises\n\nBank Headquarters\n\nRegional Offices\n\nLibrary & Information Center\n\nCentre For Banking Studies\n\nThe Economic History Museum of Sri Lanka\n\nExter Report\n\nMONETARY POLICY\n\nAbout Monetary Policy\n\nOverview\n\nEconomic and Price Stability\n\nMonetary Policy Framework\n\nInstruments & Implementation\n\nPolicy Rates and Open Market Operations\n\nStatutory Reserve Requirement\n\nOther Policy Instruments\n\nMonetary Policy Committee\n\nStakeholder Engagement Committee\n\nMonetary Policy Communication\n\nOverview\n\nMonetary Policy Announcement Dates\n\nMonetary Policy Advance Release Calendar\n\nMonetary Policy Review\n\nMonetary Policy Review - No. 5 of 2024\n\nMonetary Policy Review - No. 4 of 2024\n\nMone

In [15]:
%pip install --upgrade weaviate-client


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip
  return process_handler(cmd, _system_body)
  return process_handler(cmd, _system_body)
  return process_handler(cmd, _system_body)


In [16]:
# schema = {
#     "class": "Document",
#     "description": "A document with an embedding vector",
#     "properties": [
#         {
#             "name": "text",
#             "dataType": ["text"],
#             "description": "The actual document or chunk of text"
#         },
#         {
#             "name": "embedding",
#             "dataType": ["number[]"],
#             "description": "The embedding vector for the text"
#         }
#     ]
# }

# # Add schema to Weaviate
# client.schema.create_class(schema)


In [17]:
vector_db = Weaviate.from_documents(
    documents=docs,
    embedding=None,  # Since you already generated embeddings
    client=client,
    by_text=True  # Since you are not embedding text within the function
)

In [18]:
for i, text in enumerate([doc.page_content for doc in docs]):
    client.data_object.create(
        {
            "text": text,
            "embedding": embeddings[i].tolist(),
        },
        "YourClassNameHere"  # Replace with your Weaviate class name
    )

In [19]:
query_sentence="what is treasury bills"
query_embedding = model.encode([query_sentence])[0]

results = vector_db.similarity_search(
    
    query=query_sentence,
    k=2,
    query_vector=query_embedding,
)

ValueError: Error during query: [{'locations': [{'column': 58, 'line': 1}], 'message': 'Unknown argument "nearText" on field "LangChain_f912cbf4ef5f4146959d5d45dcd25a2c" of type "GetObjectsObj". Did you mean "nearVector" or "nearObject"?', 'path': None}]

In [20]:
from langchain.prompts import ChatPromptTemplate

template="""You are assistant for a financial institution.Use the following information to answer the questions.If you don't know the answer, just say that you don't know.You 10 sentences maximum to answer each question and keep the answer concise.
Question:{question}
Context:{context}
Answer:"""

In [21]:
prompt=ChatPromptTemplate.from_template(template)

In [22]:
prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are assistant for a financial institution.Use the following information to answer the questions.If you don't know the answer, just say that you don't know.You 10 sentences maximum to answer each question and keep the answer concise.\nQuestion:{question}\nContext:{context}\nAnswer:"), additional_kwargs={})])

In [25]:
from langchain import HuggingFaceHub
model=HuggingFaceHub(
    huggingfacehub_api_token="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",
    repo_id='mistralai/Mistral-7B-Instruct-v0.1',
    model_kwargs={"temperature":1,'max_length': 180}
)