In [None]:
import os
from typing import List, Dict

from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import MongoDBAtlasVectorSearch
from langchain.chat_models import ChatOpenAI
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain.chains import ConversationalRetrievalChain, SequentialChain, LLMChain
from langchain.memory import MongoDBChatMessageHistory, ConversationBufferMemory
from langchain.prompts import PromptTemplate
from langchain.schema import Document

from pymongo import MongoClient



In [None]:
# Set up API keys and connection strings
os.environ["OPENAI_API_KEY"] = "your-api-key-here"
MONGODB_ATLAS_CLUSTER_URI = "your-mongodb-atlas-connection-string"

# Set up MongoDB client and collections
client = MongoClient(MONGODB_ATLAS_CLUSTER_URI)
db = client["your_database_name"]
vector_collection = db["your_vector_collection_name"]
chat_history_collection = db["chat_history"]

# Initialize language model
llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo")

# Load and split documents
loader = TextLoader("path/to/your/document.txt")
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
split_docs = text_splitter.split_documents(documents)



In [None]:
# Create vector store
embeddings = OpenAIEmbeddings()
vector_store = MongoDBAtlasVectorSearch.from_documents(
    split_docs,
    embeddings,
    collection=vector_collection,
    index_name="your_index_name"
)

# Create compressed retriever
base_retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 5})
compressor = LLMChainExtractor.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=base_retriever
)



In [None]:
# Set up chat history and memory
message_history = MongoDBChatMessageHistory(
    connection_string=MONGODB_ATLAS_CLUSTER_URI,
    database_name="your_database_name",
    collection_name="chat_history",
    session_id="user_123"  # This could be a unique identifier for each user or conversation
)
memory = ConversationBufferMemory(
    memory_key="chat_history",
    input_key="human_input",
    chat_memory=message_history
)



In [None]:
# Set up query optimization chain
summarize_prompt = PromptTemplate(
    template="Summarize the chat history:\n\n{chat_history}\n\nSummary:",
    input_variables=["chat_history"]
)
summarize_chain = LLMChain(llm=llm, prompt=summarize_prompt, output_key="history_summary")

concept_prompt = PromptTemplate(
    template="Identify key concepts in this query and history summary:\n\nHistory: {history_summary}\nQuery: {query}\n\nKey Concepts:",
    input_variables=["history_summary", "query"]
)
concept_chain = LLMChain(llm=llm, prompt=concept_prompt, output_key="key_concepts")

optimize_prompt = PromptTemplate(
    template="Optimize this query based on the history and concepts:\n\nHistory: {history_summary}\nConcepts: {key_concepts}\nQuery: {query}\n\nOptimized Query:",
    input_variables=["history_summary", "key_concepts", "query"]
)
optimize_chain = LLMChain(llm=llm, prompt=optimize_prompt, output_key="optimized_query")

query_optimizer = SequentialChain(
    chains=[summarize_chain, concept_chain, optimize_chain],
    input_variables=["chat_history", "query"],
    output_variables=["optimized_query"],
    verbose=True
)



In [None]:
# Set up RAG chain
rag_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=compression_retriever,
    memory=memory,
    condense_question_prompt=PromptTemplate.from_template("{chat_history}\n\nHuman: {question}\n\nAI: To better assist you, I'll rephrase your question. Here's the optimized query:"),
    condense_question_llm=query_optimizer
)



In [None]:
# Example conversation
queries = [
    "What is artificial intelligence?",
    "What are its main applications?",
    "Can you summarize what we've discussed about AI so far?"
]

for query in queries:
    result = rag_chain({"question": query})
    print(f"Human: {query}")
    print(f"AI: {result['answer']}\n")

# Display chat history
print("Chat History:")
for message in message_history.messages:
    print(f"{message.type}: {message.content}")