In [1]:
import os
import csv
import json
import pickle
import numpy as np
import pandas as pd
from operator import itemgetter
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.chat_history import (
    BaseChatMessageHistory,
    InMemoryChatMessageHistory
)
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage

In [13]:
from langchain_huggingface import HuggingFaceEmbeddings
from uuid import uuid4
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain_core.output_parsers import StrOutputParser
from langchain_chroma import Chroma
from langchain.storage import InMemoryStore
from langchain.retrievers import ParentDocumentRetriever
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [8]:
os.environ["OPENAI_BASE_URL"] = 'https://api.bianxie.ai/v1'
os.environ["OPENAI_API_KEY"] = 'sk-FEDzYSgWNKkHgc7labPUIfiFPpI3RZ1Wd7lR6fg2yZ1nNefP'

In [35]:
chat_model = ChatOpenAI(
    model = 'gpt-3.5-turbo',
    max_tokens=1024,
    temperature = 0.7,
    top_p = 1
)

In [None]:
embeddings = HuggingFaceEmbeddings(model_name = "sentence-transformers/all-MiniLM-L6-v2")

In [15]:
def save_to_pickle(obj, filename):
    with open(filename, "wb") as file:
        pickle.dump(obj, file, pickle.HIGHEST_PROTOCOL)

def load_from_pickle(filename):
    with open(filename, "rb") as file:
        return pickle.load(file)

In [16]:
with open("docstore.pkl", "rb") as file:
    store_dict = pickle.load(file)

In [21]:
with open("docstore.pkl", "rb") as file:   
    store_dict = pickle.load(file)
persist_directory = "./parentDB"
vector_store = Chroma(
    collection_name="product",
    embedding_function=embeddings,
    persist_directory=persist_directory
)
store = InMemoryStore()
store.mset(list(store_dict.items()))
child_splitter = RecursiveCharacterTextSplitter(chunk_size=600)
retriever = ParentDocumentRetriever(
    vectorstore=vector_store,
    docstore=store,
    child_splitter=child_splitter,
    search_kwargs={"k": 10}
)

In [None]:
len(vector_store.get()['documents'])

In [None]:
retriever.invoke("hello")

### Create a chain for chat history

In [24]:
system_prompt = ("""You are a Q&A agent that assists customers with inquiries. Your main objectives are to provide accurate information, address customer concerns, and enhance the shopping experience.\
                             In your response, please strictly follow the following instructions: 
                             1. Always remember previous interactions in the conversation history. Use this information to inform your responses and provide relevant answers.\
                             2. If you do not know the answer to a question, say that you don't know.\
                             3. Assess the content of the retrieved documents to determine their usefulness in answering the user's question. Not all documents will be relevant.\
                             3. Base your answers on your own knowledge and supplement your answers with information from relevant documents. \
                             4. DO NOT specifically mention the retrieved documents and provided information in your response.\
                             Retrieved Documents:
                             {context}"""
                             )

In [25]:
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder(variable_name="history"),
        ("human", "{question}"),
    ]
)
output_parser = StrOutputParser()

In [26]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [36]:
context1 = itemgetter("question") | retriever | format_docs
context2 = itemgetter("question") | retriever 
first_step1 = RunnablePassthrough.assign(context=context1)
first_step2 = RunnablePassthrough.assign(context=context2)
prompt1 = first_step1 | qa_prompt
prompt2 = first_step2 | qa_prompt
rag_chain = first_step2 | qa_prompt | chat_model | output_parser

In [37]:
question = "Any recommendation for cameras"

In [38]:
store = {}
def get_session_history(session_id: str) -> InMemoryChatMessageHistory:
        if session_id not in store:
            store[session_id] = InMemoryChatMessageHistory()
        return store[session_id]

In [39]:
with_message_history = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key = 'question',
    history_messages_key = "history"
)

In [None]:
with_message_history.invoke(
        {"question": "Hi, I'm Joe. Can you help me on finding the right products?"},
        config={"configurable": {"session_id": "1"}}
)

In [None]:
with_message_history.invoke(
        {"question": "If I want to purchase a camera. Which aspect of camera should I focus on?"},
        config={"configurable": {"session_id": "1"}}
)

In [None]:
with_message_history.invoke(
        {"question": "What's my name?"},
        config={"configurable": {"session_id": "1"}}
)

In [None]:
store['1'].messages

In [63]:
def save_conversation(conversation_history, filename):

    with open(filename, 'wb') as file:
        pickle.dump(conversation_history, file)

In [64]:
def load_conversation(filename):

    try:
        with open(filename, 'rb') as file:
            conversation_history = pickle.load(file)
        return conversation_history
    except FileNotFoundError:
        print(f"Error: {filename} not found.")
        return []
    except Exception as e:
        print(f"Error loading conversation history: {e}")
        return []

In [50]:
save_conversation(store["1"].messages, "test_history.pkl")

In [None]:
load_conversation("test_history.pkl")