<a href="https://colab.research.google.com/github/ZIMKITH/Judgement-driven-sales-copilot/blob/main/Sales_Copilot_Architecture.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q openai pinecone

In [None]:
import os
from getpass import getpass
from openai import OpenAI
from pinecone import Pinecone

# --- SECURE CREDENTIAL INPUT ---
print("üîê Please enter your credentials (input is hidden for security):")

# 1. OpenAI Setup
# When you run this, a box will appear. Paste your key and hit Enter.
OPENAI_API_KEY = getpass("Enter OpenAI API Key: ")
client = OpenAI(api_key=OPENAI_API_KEY)

# 2. Pinecone Setup
PINECONE_API_KEY = getpass("Enter Pinecone API Key: ")
PINECONE_INDEX_NAME = input("Enter your Pinecone Index Name (e.g., sales-copilot): ")

# Initialize Pinecone Client
pc = Pinecone(api_key=PINECONE_API_KEY)

# 3. Connection Verification
try:
    # Check if the index exists in your project
    existing_indexes = [index['name'] for index in pc.list_indexes()]
    if PINECONE_INDEX_NAME in existing_indexes:
        print(f"\n‚úÖ SUCCESS: Connected to OpenAI and Pinecone. Index '{PINECONE_INDEX_NAME}' found.")
    else:
        print(f"\n‚ö†Ô∏è WARNING: Connected to Pinecone, but Index '{PINECONE_INDEX_NAME}' was not found.")
        print(f"Available indexes: {existing_indexes}")
except Exception as e:
    print(f"\n‚ùå ERROR: Connection failed. Check your API Key. \nDetails: {e}")

In [None]:
import re

# PHASE 2: THE "LAUNDROMAT" (INGESTION & CLEANING)

def clean_text(text):
    """
    Cleans unstructured text data for RAG ingestion.
    1. Removes Slack User IDs (format: <@U12345>)
    2. Redacts Email addresses
    """
    # 1. Regex to remove Slack User IDs
    # Logic: Look for '<@', followed by 'U', followed by any alphanumeric chars, ending with '>'
    text = re.sub(r'<@U[A-Z0-9]+>', '', text)

    # 2. Regex to redact Emails
    # Logic: Look for standard email patterns and replace with placeholder
    text = re.sub(r'[\w\.-]+@[\w\.-]+\.\w+', '[EMAIL_REDACTED]', text)

    # 3. Clean up extra whitespace created by removals
    # Logic: Split by whitespace and rejoin with single spaces
    text = " ".join(text.split())

    return text

# --- ARCHITECTURAL TEST ---
# We define messy test data to verify our logic works
raw_slack_messages = [
    "Hey <@U025W>, did we send the contract to johndoe@acmecorp.com yet?",
    "Reviewing the terms with <@U999X>. send feedback to legal@internal.org ASAP.",
    "Deal is stuck. <@U1234> pls help."
]

print("--- TESTING THE LAUNDROMAT ---")
for msg in raw_slack_messages:
    cleaned = clean_text(msg)
    print(f"üî¥ RAW:   {msg}")
    print(f"üü¢ CLEAN: {cleaned}")
    print("-" * 50)

In [None]:
import time

# PHASE 3: VECTORIZATION & STORAGE

def get_embedding(text):
    """
    Generates vector embeddings using OpenAI's text-embedding-3-small model.
    Output: A list of 1,536 floats.
    """
    response = client.embeddings.create(
        input=text,
        model="text-embedding-3-small"
    )
    return response.data[0].embedding

# --- KNOWLEDGE BASE (Simulated Sales Data) ---
# We are creating 5 fake sales logs to put into the brain of the AI.
sales_data = [
    {"id": "msg_001", "text": "The deal with Acme Corp is worth $50k. <@U888> is the lead."},
    {"id": "msg_002", "text": "Beta Inc requires a 20% discount. Email approval to boss@company.com."},
    {"id": "msg_003", "text": "Gamma LLC signed the NDA yesterday. We start the pilot next week."},
    {"id": "msg_004", "text": "Competitor X is undercutting us on the Delta project by $5k."},
    {"id": "msg_005", "text": "Meeting with Omega Co canceled. Reschedule for Q4."}
]

# Connect to the Pinecone Index
index = pc.Index(PINECONE_INDEX_NAME)

print(f"üöÄ Starting Ingestion into Index: {PINECONE_INDEX_NAME}...")

# PROCESS LOOP: Clean -> Embed -> Upsert
for item in sales_data:
    # 1. Clean the text using your function
    cleaned_text = clean_text(item['text'])

    # 2. Generate Embedding (Turn text into numbers)
    vector = get_embedding(cleaned_text)

    # 3. Metadata (Store the text so we can read it later)
    metadata = {
        "original_text": cleaned_text,
        "source": "slack"
    }

    # 4. Upsert to Pinecone
    # Format: (Unique ID, Vector List, Metadata Dictionary)
    index.upsert(vectors=[(item['id'], vector, metadata)])

    print(f"‚úÖ Indexed: {item['id']}")

# Small pause to ensure Pinecone processes the data
time.sleep(2)
print("\nüéâ PHASE 3 COMPLETE: Data is now stored in the Vector Database.")

In [None]:
# PHASE 4: RETRIEVAL AUGMENTED GENERATION (RAG)

def ask_sales_copilot(question):
    """
    1. Searches the Vector DB for relevant info.
    2. Sends context + question to GPT-4o.
    """
    print(f"\n‚ùì QUESTION: {question}")

    # STEP A: Embed the Question
    query_vector = get_embedding(question)

    # STEP B: Retrieve relevant context from Pinecone
    search_response = index.query(
        vector=query_vector,
        top_k=2,  # Get the top 2 most relevant matches
        include_metadata=True
    )

    # Extract the text from the search results
    contexts = [match['metadata']['original_text'] for match in search_response['matches']]
    context_str = "\n".join(contexts)

    print(f"üîç FOUND CONTEXT: {contexts}")

    # STEP C: Generate Answer with GPT-4o
    system_prompt = f"""
    You are a Sales Operations Copilot.
    Answer the user's question based ONLY on the context provided below.
    If the answer is not in the context, say "I don't have that information."

    CONTEXT:
    {context_str}
    """

    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": question}
        ],
        temperature=0
    )

    return response.choices[0].message.content

# --- TEST DRIVE ---
# Let's ask a question that requires knowledge from our hidden database.
answer1 = ask_sales_copilot("How much is the Acme deal worth?")
print(f"ü§ñ COPILOT ANSWER: {answer1}")

answer2 = ask_sales_copilot("What is happening with Beta Inc?")
print(f"ü§ñ COPILOT ANSWER: {answer2}")