In [4]:
# imports
import os
import glob
from dotenv import load_dotenv
import gradio as gr
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_chroma import Chroma
from langchain.memory import ConversationBufferMemory  # Updated import
from langchain.chains import ConversationalRetrievalChain
from langchain_community.chat_message_histories import ChatMessageHistory  # New import
from langchain_core.chat_history import BaseChatMessageHistory  # New import
from langchain_core.runnables.history import RunnableWithMessageHistory  # New import

# Load environment variables
load_dotenv(override=True)
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')

# Model and database setup
MODEL = "gpt-4o-mini"
db_name = "vector_db"

# Create knowledge-base directory and move RazorPay_Details.md
os.makedirs("knowledge-base/razorpay", exist_ok=True)
with open("knowledge-base/razorpay/RazorPay_Details.md", "w", encoding="utf-8") as f:
    f.write("""
# Razorpay Company Information

## Overview
Razorpay is an Indian fintech company founded in 2014 by Harshil Mathur (CEO) and Shashank Kumar (Managing Director). Headquartered in Bengaluru, Karnataka, it provides a full-stack financial solutions platform for businesses, focusing on payment gateways, banking, and lending services. Razorpay aims to simplify online payments for startups, SMEs, and enterprises, supporting over 100 payment modes, including credit/debit cards, UPI, net banking, and digital wallets.

## Key Services
- **Payment Gateway**: Enables businesses to accept and process payments seamlessly with features like digital onboarding, UPI support, and tokenization (TokenHQ).
- **RazorpayX**: Offers neobanking solutions, including current accounts, vendor payments, and payroll management.
- **Razorpay Capital**: Provides working capital loans and instant settlements.
- **POS Solutions**: Supports offline payments through devices like MultiQR and dynamic QR codes.
- **International Expansion**: Acquired Curlec (Malaysia) in 2022 to offer payment solutions globally, targeting 5,000 businesses by 2025.

## Business Metrics
- **Valuation**: $7.5 billion (December 2021, Series F funding of $375 million led by Lone Pine Capital, Alkeon Capital, and TCV).
- **Revenue**: Significant growth, with FY24 net profit at ₹35 crore (4.7x increase).
- **Total Payment Volume (TPV)**: Annualized TPV of $180 billion as of 2024.
- **Customers**: Serves over 200,000 businesses, including SMEs, startups, and enterprises.

## Innovations
- First Indian fintech to support UPI and Bharat QR.
- Launched India’s first multi-network tokenization solution (TokenHQ).
- Introduced credit card payments on UPI and acquired Ezetap to become India’s largest omnichannel payment gateway.

## Funding and Investors
Backed by Y Combinator, Sequoia Capital India, Tiger Global, GIC, Matrix Partners, and Mastercard, among others.

## Strategic Focus
- Democratizing payments for underserved markets (startups and SMEs).
- Enhancing AI-driven fraud detection and personalized payment experiences.
- Expanding offline POS solutions, with over 500,000 terminals deployed.

## Recent Developments
- Received RBI’s Payment Aggregator (PA) license in 2023.
- Initiated domicile shift to India from the US in 2023 for IPO plans.
- Launched AI-powered tools for KYC automation and transaction risk assessment.

## Mission
To revolutionize money management for businesses by offering developer-friendly APIs, seamless integration, and innovative financial products to drive India’s digital economy.

## Contact
- Website: https://razorpay.com
- Headquarters: Bengaluru, Karnataka, India
- Employee Count: ~3,000 (2024)
""")

# Define add_metadata function
def add_metadata(doc, doc_type):
    doc.metadata["doc_type"] = doc_type
    return doc

# Read in documents
folders = glob.glob("knowledge-base/*")
text_loader_kwargs = {'encoding': 'utf-8'}
documents = []
for folder in folders:
    doc_type = os.path.basename(folder)
    loader = DirectoryLoader(folder, glob="**/*.md", loader_cls=TextLoader, loader_kwargs=text_loader_kwargs)
    folder_docs = loader.load()
    documents.extend([add_metadata(doc, doc_type) for doc in folder_docs])

# Split documents into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(documents)

print(f"Total number of chunks: {len(chunks)}")
print(f"Document types found: {set(doc.metadata['doc_type'] for doc in documents)}")

# Create vector store
embeddings = OpenAIEmbeddings()
if os.path.exists(db_name):
    Chroma(persist_directory=db_name, embedding_function=embeddings).delete_collection()
vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=db_name)
print(f"Vectorstore created with {vectorstore._collection.count()} documents")

# Set up LLM
llm = ChatOpenAI(temperature=0.7, model_name=MODEL)

# Set up conversation history
class CustomChatHistory(BaseChatMessageHistory):
    def __init__(self):
        self.messages = []

    def add_message(self, message):
        self.messages.append(message)

    def clear(self):
        self.messages = []

# Create a session-aware conversation chain
conversational_retrieval_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=vectorstore.as_retriever(search_kwargs={"k": 25})
)

# Wrap with message history
def get_session_history(session_id: str) -> CustomChatHistory:
    if f"history_{session_id}" not in get_session_history.store:
        get_session_history.store[f"history_{session_id}"] = CustomChatHistory()
    return get_session_history.store[f"history_{session_id}"]

get_session_history.store = {}

conversation_with_history = RunnableWithMessageHistory(
    conversational_retrieval_chain,
    get_session_history,
    input_messages_key="question",
    history_messages_key="chat_history",
    output_messages_key="answer"
)

# Chat function
def chat(question, history):
    response = conversation_with_history.invoke(
        {"question": question},
        config={"configurable": {"session_id": "user_1"}}  # Unique session ID
    )
    return response["answer"]

# Launch Gradio interface
view = gr.ChatInterface(chat, type="messages").launch(inbrowser=True)

Total number of chunks: 4
Document types found: {'razorpay'}
Vectorstore created with 4 documents
* Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.


Number of requested results 25 is greater than number of elements in index 4, updating n_results = 4
Number of requested results 25 is greater than number of elements in index 4, updating n_results = 4
