# Import Libraries

In [1]:
import os
from langchain_openai import OpenAIEmbeddings
import hashlib
from pinecone import Pinecone
from datetime import date
from pinecone import ServerlessSpec
import time
from langchain_pinecone import PineconeVectorStore
from langchain.schema import Document

  from tqdm.autonotebook import tqdm


# API Keys

In [2]:
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')

# Sample Transcript

In [None]:
sample_transcript = """Czech: Hello my name is Czech.     
Gian: Hello my name is Gian.
Shaundyl: Hello my name is Shaundyl.
Czech (Team Lead): Alright, everyone, thanks for joining today’s meeting. We have about 10 minutes to go over the final details before the product launch. Let's start with the progress update. Bob, how are we doing on the development front?
Shaundyl (Developer): Things are looking good. We’ve implemented all the major features. I’m currently working on the final round of bug fixes. I should be done with it by tomorrow, but I need the QA team to give it another pass afterward.
Czech: Great to hear that. Gian, how are we looking on the project timeline? Any changes or concerns?
Gian (Project Manager): We’re on track, but barely. The marketing materials were delayed by two days, but I managed to align the social media schedule to compensate for the delay. As long as the development and testing stay on track, we should meet the launch date. We might want to allocate some buffer time for any last-minute issues though.
Czech: Makes sense. Shaundyl, do you think we’ll have time for a final round of testing before we push live?
Shaundyl: We should. I’ll aim to finish bug fixes by midday tomorrow. If QA can start immediately after, we’ll have 24 hours for testing before the go-live. I’ll stay available for any hotfixes, just in case.
Czech: Perfect. Gian, can you make sure the QA team is on standby for tomorrow afternoon?
Gian: Absolutely. I’ll notify them as soon as we’re done here. I’ll also double-check the launch checklist to make sure nothing’s been missed.
Czech: Great. And one last thing – how are we handling customer support on launch day? Any special preparations?
Gian: We’ve set up a dedicated support channel for the product and briefed the customer support team on the common issues we’re anticipating. We’ll also monitor social media for any unexpected feedback.
Czech: Sounds like we’re in good shape. Thanks, everyone. Let’s aim to regroup tomorrow for a final status check. Anything else before we wrap up?
Shaundyl: Nothing from my side. I’ll update you if any blockers come up.
Gian: I’m all set. Let’s get this done!
Czech: Alright then, thanks again! Talk tomorrow.
"""

# Initialization

In [4]:
# Pinecone Initialization
PC = Pinecone(api_key=PINECONE_API_KEY)
# index = pc.Index("echo-openai")

# OpenAI Initialization
EMBEDDINGS = OpenAIEmbeddings(model='text-embedding-3-small', openai_api_key=OPENAI_API_KEY)

# Partitioning

### Chunking

In [6]:
def chunk_text(text, max_chunk_size=500):
    # Ensure each text ends with a newline to correctly split sentences
    if not text.endswith("\n"):
        text += "\n"

    # Split text into sentence
    sentences = text.split("\n")
    chunks = []
    current_chunk = ""

    # Iterate over sentence and assemble chunks
    for sentence in sentences:
        # Check if adding the current sentence exceeds the maximum chunk size
        if (len(current_chunk) + len(sentences) + 2 > max_chunk_size and current_chunk):
            # Add the current chunk to the list and start a new chunk
            chunks.append(current_chunk.strip())
            current_chunk = ""
        # Add the current sentence to the current chunk
        current_chunk += sentence.strip() + "\n"

    # Add any remaining text as the last chunk
    if (current_chunk):
        chunks.append(current_chunk.strip())

    # Convert chunks into a list of Document objects
    documents = [Document(page_content=chunk) for chunk in chunks]

    return documents # type: list[Documents]

documents = chunk_text(text=sample_transcript)
print(documents)

[Document(metadata={}, page_content="[00:00:00] John: Good morning, everyone. Thank you for joining today's kickoff meeting for our new     software development project. We'll be discussing the project scope, timelines, and\nresponsibilities. Let's get started with a quick round of introductions. I'll go first. I'm John, the     project manager. I'll be overseeing the project and ensuring we stay on track. Alice, would you     like to go next?\n[00:00:20]\nAlice: Sure, thanks John. Hi, everyone. I'm Alice, the lead developer. I'll be responsible for the"), Document(metadata={}, page_content="overall architecture and development of the software. Looking forward to working with all of you.\n[00:00:35]\nBob: Hi, I'm Bob, the UI/UX designer. I'll be handling the design aspects of the software, making\nsure it's user-friendly and visually appealing.\n[00:00:45]\nSara: Hello, I'm Sara, the QA analyst. I'll be testing the software to ensure it meets our quality\nstandards and is free of bugs.

### Index

In [8]:
# organization name
index_name = input("Give organization name")

# If organization name does not exist, it creates new index
if index_name not in PC.list_indexes().names():
    PC.create_index(
        name=index_name,
        dimension=1536,
        metric="cosine",
        spec=ServerlessSpec(
            cloud="aws",
            region="us-east-1"
        ),
        deletion_protection="disabled"
    )
    # wait for index to be ready
    while not PC.describe_index(index_name).status['ready']:
        time.sleep(1)
    print(index_name + " is created successfully.")
    index = PC.Index(index_name)
else:
    print("Organization already exists.")

Organization already exists.


### Upsert

In [9]:
namespace = input("Give meeting title")

docsearch = PineconeVectorStore.from_documents(
    documents=documents,
    index_name=index_name,
    embedding=EMBEDDINGS,
    namespace=namespace
)

time.sleep(5)

### Sample Query

In [10]:
index = PC.Index("scs")
namespace = "Kickoff Meeting for Software Development Project"

for ids in index.list(namespace=namespace):
    query = index.query(
        id=ids[0],
        namespace=namespace,
        top_k=4,
        include_values=True,
        include_metadata=True
    )
print(query)

{'matches': [{'id': '33657dd2-a642-4ae6-8e1a-6d7b0e0dce25',
              'metadata': {'text': 'customer information.\n'
                                   '[00:01:45]\n'
                                   "Bob: For the design, we'll focus on "
                                   "creating an intuitive user interface. I'll "
                                   'be working closely with\n'
                                   'Alice to ensure that the design is '
                                   'feasible from a development perspective. '
                                   "I'll also be\n"
                                   'conducting user research to understand the '
                                   'needs of our end-users better.\n'
                                   '[00:02:05]\n'
                                   "Sara: From a QA perspective, I'll be "
                                   'developing a comprehensive testing plan. '
                                   'This will includ