<a href="https://colab.research.google.com/github/anjorisarabhai/OIBSIP/blob/main/initial_voice_bot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
# --- STEP 1: SETUP AND INSTALLATION (Necessary libraries will be reinstalled) ---
print("Installing necessary Python libraries...")
!pip install -q langchain pydantic langchain-core langchain-community requests

# Install and start the Ollama server
print("Setting up Ollama server...")
!curl -fsSL https://ollama.com/install.sh | sh -s

# --- Start Ollama server in the background and wait for readiness ---
import subprocess
import time
import requests
import json
from pydantic import BaseModel, Field
from typing import Literal
from langchain_community.chat_models import ChatOllama
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain.output_parsers import PydanticOutputParser
from datetime import datetime

# Start 'ollama serve' in the background
print("Starting Ollama service...")
subprocess.Popen(["ollama", "serve"])

# Wait for Ollama server to be ready
OLLAMA_URL = "http://127.0.0.1:11434"
print(f"Waiting for Ollama to be available at {OLLAMA_URL}...")
start_time = time.time()
while time.time() - start_time < 60:
    try:
        requests.get(f"{OLLAMA_URL}/api/tags", timeout=5)
        print("Ollama server is ready!")
        break
    except requests.exceptions.ConnectionError:
        time.sleep(5)
else:
    raise Exception("Ollama server failed to start within the time limit. Check system logs.")

# Pull the Llama 3 8B model (no -q flag)
print("Downloading Llama 3 model (8B)... This may take a few minutes.")
!ollama pull llama3

# --- STEP 2: PYTHON CODE FOR DATA EXTRACTION (KEY FIX IMPLEMENTED) ---
print("\nDefining Pydantic Schema and Extraction Function...")

# --- A. Define the Pydantic Schema ---
class VisitDetails(BaseModel):
    """Extracted details for scheduling a new visit in the CRM."""
    title: str = Field(description="A brief summary/title of the visit's purpose.")
    visit_type: Literal["OPERATION", "BUSINESS", "N/A"] = Field(description="Must be one of the allowed Visit Types: OPERATION or BUSINESS.")
    lead_name: str = Field(description="The full name of the lead/client.")
    date: str = Field(description="The date of the visit in YYYY-MM-DD format.")
    start_time: str = Field(description="The exact start time in 24-hour format (HH:MM).")
    end_time: str = Field(description="The exact end time in 24-hour format (HH:MM). If not specified, set to 'N/A'.")

# --- B. Define the Extraction Function (FIXED) ---
def extract_visit_data_final(transcript: str):
    """
    Uses the Prompt + Parser method with explicit JSON instructions and safe templating.
    """
    raw_output_string = "" # Initialize here to prevent UnboundLocalError
    try:
        current_date = datetime.now().strftime("%Y-%m-%d")

        # 1. Initialize the Pydantic Parser
        parser = PydanticOutputParser(pydantic_object=VisitDetails)

        # 2. Get the specific formatting instructions from the parser
        format_instructions = parser.get_format_instructions()

        # 3. Define the System Prompt and Template (CRITICAL FIX: Use PromptTemplate and concatenate instructions safely)

        # The prompt template only contains the variables we supply: transcript and custom instructions.
        template = """
        SYSTEM INSTRUCTIONS: You are an expert CRM data extraction agent. Your task is to analyze the user's voice transcript and extract the required information into a strict JSON object.
        Current Date: {current_date}. Infer dates and times relative to this date.
        If any field is missing, set its value to 'N/A'.

        {format_instructions}

        USER TRANSCRIPT: {transcript}

        RESPONSE:
        """

        # Create the PromptTemplate
        prompt = PromptTemplate(
            template=template,
            input_variables=["transcript", "current_date", "format_instructions"]
        )

        # 4. Initialize the Llama 3 Model with JSON format flag
        llm = ChatOllama(
            model="llama3",
            base_url=OLLAMA_URL,
            format="json",
            temperature=0
        )

        # 5. Create the Chain (Simple invoke)
        chain = prompt | llm

        # 6. Invoke the chain, passing the format_instructions separately
        raw_output_message = chain.invoke({
            "transcript": transcript,
            "current_date": current_date,
            "format_instructions": format_instructions # Pass the complex instructions here
        })

        # Get the content string
        raw_output_string = raw_output_message.content

        # 7. The parser attempts to convert the raw string into the Pydantic object
        result = parser.parse(raw_output_string)

        return result.dict()

    except Exception as e:
        # This will now safely print the raw output when a parsing error occurs
        print(f"\n! RAW OUTPUT THAT CAUSED ERROR: \n{raw_output_string}\n")
        print(f"An error occurred during final parsing. Error: {e}")
        return None

# --- STEP 3: EXECUTION ---
voice_input = "Schedule a BUSINESS visit with Anjori Sarabhai for 7:40 PM tomorrow to discuss interest rates and follow-up on the loan status. The meeting should end 5 minutes later."

print("\n\n--- RUNNING COMPATIBLE DATA EXTRACTION WITH LLAMA 3 ---")
extracted_data = extract_visit_data_final(voice_input)

if extracted_data:
    print("\n Successfully Extracted Structured JSON:")
    print(json.dumps(extracted_data, indent=4))
else:
    print("\n Failed to get structured data. See error details above.")

Installing necessary Python libraries...
Setting up Ollama server...
>>> Cleaning up old version at /usr/local/lib/ollama
>>> Installing ollama to /usr/local
>>> Downloading Linux amd64 bundle
######################################################################## 100.0%
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.
Starting Ollama service...
Waiting for Ollama to be available at http://127.0.0.1:11434...
Ollama server is ready!
Downloading Llama 3 model (8B)... This may take a few minutes.
[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l

Defining Pydantic Sche

/tmp/ipython-input-563480001.py:116: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  return result.dict()
