**OpenAI Key**

In [1]:
import os
from getpass import getpass
from semantic_router.encoders import OpenAIEncoder

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY") or getpass("OpenAI API key: ")


OpenAI API key:  ········


In [2]:
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.schema import SystemMessage, HumanMessage
from tqdm.auto import tqdm


**SAMPLE DATA**

In [3]:
import pandas as pd

# Example data
data = pd.DataFrame({
    "id": range(1, 51),
    "sender": ["user" if i % 2 == 0 else "agent" for i in range(1, 51)],
    "timestamp": pd.date_range(start="2023-01-01", periods=50, freq="min"),
    "conversation_id": [1] * 50,  # single conversation ID
    "message": ["This is message " + str(i) for i in range(1, 51)]
})

In [16]:
conversation_durations = data.groupby("conversation_id").agg(
    start_time=("timestamp", "min"),
    end_time=("timestamp", "max")
)

In [18]:
print(conversation_durations)

                start_time            end_time
conversation_id                               
1               2023-01-01 2023-01-01 00:49:00


In [4]:
data

Unnamed: 0,id,sender,timestamp,conversation_id,message
0,1,agent,2023-01-01 00:00:00,1,This is message 1
1,2,user,2023-01-01 00:01:00,1,This is message 2
2,3,agent,2023-01-01 00:02:00,1,This is message 3
3,4,user,2023-01-01 00:03:00,1,This is message 4
4,5,agent,2023-01-01 00:04:00,1,This is message 5
5,6,user,2023-01-01 00:05:00,1,This is message 6
6,7,agent,2023-01-01 00:06:00,1,This is message 7
7,8,user,2023-01-01 00:07:00,1,This is message 8
8,9,agent,2023-01-01 00:08:00,1,This is message 9
9,10,user,2023-01-01 00:09:00,1,This is message 10


**Storing messages in vector DB**

In [5]:
# Initialize embeddings and Chroma vector store
embeddings = OpenAIEmbeddings()
vectorstore = Chroma(embedding_function=embeddings)

# Set batch size
batch_size = 512

# Process and add chat messages in batches
for i in tqdm(range(0, len(data), batch_size)):
    i_end = min(len(data), i + batch_size)
    batch = data[i:i_end].to_dict(orient="records")

    # Prepare metadata and message content for Chroma
    metadata = [{
        "sender": r["sender"],
        "timestamp": str(r["timestamp"]),
        "conversation_id": str(r["conversation_id"])
    } for r in batch]

    # Generate unique IDs for each chat message
    ids = [str(r["id"]) for r in batch]

    # Get the chat messages to embed
    messages = [r["message"] for r in batch]

    # Embed messages
    embeds = embeddings.embed_documents(messages)

    # Add messages to Chroma with embeddings, IDs, and metadata
    vectorstore.add_texts(
        texts=messages,
        metadatas=metadata,
        ids=ids
    )

  embeddings = OpenAIEmbeddings()
  vectorstore = Chroma(embedding_function=embeddings)


  0%|          | 0/1 [00:00<?, ?it/s]

In [20]:
metadata

[{'sender': 'agent',
  'timestamp': '2023-01-01 00:00:00',
  'conversation_id': '1'},
 {'sender': 'user',
  'timestamp': '2023-01-01 00:01:00',
  'conversation_id': '1'},
 {'sender': 'agent',
  'timestamp': '2023-01-01 00:02:00',
  'conversation_id': '1'},
 {'sender': 'user',
  'timestamp': '2023-01-01 00:03:00',
  'conversation_id': '1'},
 {'sender': 'agent',
  'timestamp': '2023-01-01 00:04:00',
  'conversation_id': '1'},
 {'sender': 'user',
  'timestamp': '2023-01-01 00:05:00',
  'conversation_id': '1'},
 {'sender': 'agent',
  'timestamp': '2023-01-01 00:06:00',
  'conversation_id': '1'},
 {'sender': 'user',
  'timestamp': '2023-01-01 00:07:00',
  'conversation_id': '1'},
 {'sender': 'agent',
  'timestamp': '2023-01-01 00:08:00',
  'conversation_id': '1'},
 {'sender': 'user',
  'timestamp': '2023-01-01 00:09:00',
  'conversation_id': '1'},
 {'sender': 'agent',
  'timestamp': '2023-01-01 00:10:00',
  'conversation_id': '1'},
 {'sender': 'user',
  'timestamp': '2023-01-01 00:11:00',
 

In [14]:
len(messages)

50

**Pulling messages from vector DB and running a summary on it (with test variable)**

In [15]:
# --- Retrieve Messages and Summarize ---

# Initialize the language model for chat-based models
llm = ChatOpenAI(model="gpt-3.5-turbo")

# Retrieve the relevant messages from Chroma
query_message = " "  # Example query
results = vectorstore.similarity_search(query=query_message, k=len(messages))  # Adjust k as needed

# Format the retrieved data into the conversation structure
#conversation = [
#    {
#        "sender": result.metadata.get("sender", "unknown"),
#        "timestamp": result.metadata.get("timestamp", "unknown"),
#        "message": result.page_content
#    }
#    for result in results
#]

conversation = [
    {"sender": "user", 'timestamp': '2023-01-01 00:13:00', "message": "Hi, What do you think about my drawing?"},
    {"sender": "agent", 'timestamp': '2023-01-01 00:13:15', "message": "Great! I might need more blue for the ocean water."},
    {"sender": "user", 'timestamp': '2023-01-01 00:13:20', "message": "Ok! Anything else that I need to add?"},
    {"sender": "agent", 'timestamp': '2023-01-01 00:13:25', "message": "To make it stand out, color the trees with different shades of green!"}
    # Add more messages as needed...
]


# Combine messages into a single conversation string for summarization
conversation_text = "\n".join([f"{msg['sender']} ({msg['timestamp']}): {msg['message']}" for msg in conversation])

# Define a summarization prompt
prompt_template = """Summarize the following conversation between the user and agent in a summary report format:

Conversation:
{conversation}

Summary:"""

# Set up the prompt with LangChain’s PromptTemplate
prompt = PromptTemplate(
    input_variables=["conversation"],
    template=prompt_template,
)

# Create the chain with the prompt and language model
summarization_chain = LLMChain(llm=llm, prompt=prompt)

# Generate the summary
summary = summarization_chain.run(conversation=conversation_text)

print("Summary of the conversation:")
print(
print(summary)

Summary of the conversation:
The user asked the agent for feedback on their drawing. The agent suggested adding more blue to the ocean water and coloring the trees with different shades of green to make it stand out. The user agreed to make the suggested changes.
