In [2]:
from langchain_community.vectorstores import FAISS
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.prompts import ChatPromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
import os
from langchain_community.document_loaders.mongodb import MongodbLoader
from langchain_groq import ChatGroq
from dotenv import load_dotenv

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [3]:
load_dotenv()

groq_api_key=os.getenv("GROQ_API_KEY")

llm=ChatGroq(groq_api_key=groq_api_key,model_name="llama-3.1-70b-versatile")

llm

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x78fe54b479b0>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x78fe4805bc20>, model_name='llama-3.1-70b-versatile', model_kwargs={}, groq_api_key=SecretStr('**********'))

In [4]:
# add this import for running in jupyter notebook
import nest_asyncio

nest_asyncio.apply()

In [None]:
db_name = "loan_database"

# Define the collections and their respective field names
collections_fields = {
    "customers": ["customer_id", "first_name", "last_name", "date_of_birth", "address", "phone", "email", "national_id"],
    "loans": ["loan_id", "customer_id", "loan_type", "principal_amount", "interest_rate", "loan_term_months", "start_date", "end_date", "loan_status"],
    "repayments": ["repayment_id", "loan_id", "repayment_date", "amount_paid", "principal_paid", "interest_paid", "remaining_balance"],
    "collateral": ["collateral_id", "loan_id", "collateral_type", "description", "appraised_value", "date_of_valuation"],
    "employees": ["employee_id", "first_name", "last_name", "position", "branch_id", "email", "phone"],
    "branches": ["branch_id", "branch_name", "address", "phone"],
    "loan_applications": ["application_id", "customer_id", "loan_type", "requested_amount", "application_date", "application_status", "assigned_employee"],
    "payments_schedule": ["schedule_id", "loan_id", "due_date", "payment_due", "status"],
    # Add more collections and their fields as needed
}

# Initialize an empty list to hold all documents
all_docs = []

# Loop through each collection and load documents
for collection_name, field_names in collections_fields.items():
    loader = MongodbLoader(
        connection_string="mongodb+srv://",
        db_name=db_name,
        collection_name=collection_name,
        field_names=field_names,
    )
    docs = loader.load()
    all_docs.extend(docs)

# Now, all_docs contains documents from all specified collections


In [6]:
len(all_docs)

160

In [7]:
all_docs

[Document(metadata={'database': 'loan_database', 'collection': 'customers'}, page_content='CUST001 John Doe 1985-04-15 123 Main St, Cityville 555-1234 johndoe@example.com 123456789'),
 Document(metadata={'database': 'loan_database', 'collection': 'customers'}, page_content='CUST002 Jane Smith 1990-08-22 456 Oak St, Townsville 555-5678 janesmith@example.com 987654321'),
 Document(metadata={'database': 'loan_database', 'collection': 'customers'}, page_content='CUST003 Robert Brown 1982-11-30 789 Pine St, Cityville 555-7890 robert.brown@example.com 234567890'),
 Document(metadata={'database': 'loan_database', 'collection': 'customers'}, page_content='CUST004 Emily Davis 1975-06-14 321 Cedar St, Townsville 555-3456 emily.davis@example.com 345678901'),
 Document(metadata={'database': 'loan_database', 'collection': 'customers'}, page_content='CUST005 Michael Wilson 1992-10-21 654 Maple St, Cityville 555-4321 michael.wilson@example.com 456789012'),
 Document(metadata={'database': 'loan_databa

In [8]:
os.environ['HF_TOKEN']=os.getenv("HF_TOKEN")
from langchain_huggingface import HuggingFaceEmbeddings
embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

  from tqdm.autonotebook import tqdm, trange


In [37]:
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=100)
splits=text_splitter.split_documents(all_docs)
# vectorstore1=Chroma.from_documents(documents=splits,embedding=embeddings)
# retriever=vectorstore1.as_retriever()
# retriever
vectorstore = FAISS.from_documents(documents=splits, embedding=embeddings)
retriever = vectorstore.as_retriever()

In [30]:
len(splits)

232

In [None]:
## Prompt Template
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [46]:
question_answer_chain=create_stuff_documents_chain(llm,prompt)
rag_chain=create_retrieval_chain(retriever,question_answer_chain)

In [None]:
response=rag_chain.invoke({"input":"hello how are you? "})
response

{'input': 'cust001',
 'context': [Document(metadata={'database': 'loan_database', 'collection': 'loan_applications'}, page_content='APP001 CUST001 auto 20000 2023-01-10 pending EMP001'),
  Document(metadata={'database': 'loan_database', 'collection': 'loan_applications'}, page_content='APP014 CUST014 personal 9000 2023-08-05 pending EMP014'),
  Document(metadata={'database': 'loan_database', 'collection': 'customers'}, page_content='CUST010 Anna Clark 1987-07-29 456 Redwood St, Townsville 555-6543 anna.clark@example.com 901234567'),
  Document(metadata={'database': 'loan_database', 'collection': 'loan_applications'}, page_content='APP018 CUST018 personal 15000 2023-10-01 approved EMP018')],
 'answer': "CUST001's information is not available in the provided data. However, their application details are available:\n\nAPP001 CUST001 auto 20000 2023-01-10 pending EMP001"}

In [None]:
response['answer']