In [1]:
import gradio as gr
from typing import List
from groq import Groq
from pinecone import Pinecone
from openai import AzureOpenAI
import logging
import os

# API Configuration
AZURE_OPENAI_ENDPOINT = "https://mobi-dev-openai.openai.azure.com/openai/deployments/insurance-text-embedding-3-small/embeddings?api-version=2023-05-15"
AZURE_OPENAI_API_KEY = "f5da280ab5fd4f6cb1bcf296b49339f4"
AZURE_OPENAI_API_VERSION = "2023-05-15"
PINECONE_API_KEY = "pcsk_4wCzBu_M5yMXdFNFRBL8NZU2XB4GrADRbynuMK7ww1GkSyWt7ER5cimPr1awGis9Hi6563"
GROQ_API_KEY = "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"

# Initialize clients globally
groq_client = Groq(api_key=GROQ_API_KEY)
azure_client = AzureOpenAI(
    api_key=AZURE_OPENAI_API_KEY,
    api_version=AZURE_OPENAI_API_VERSION,
    azure_endpoint=AZURE_OPENAI_ENDPOINT
)
pc = Pinecone(api_key=PINECONE_API_KEY)
index = pc.Index("gst-chat-agent")

def get_embedding(text: str) -> List[float]:
    """Generate embedding for text"""
    try:
        response = azure_client.embeddings.create(
            input=[text],
            model="insurance-text-embedding-3-small"
        )
        return response.data[0].embedding
    except Exception as e:
        logging.error(f"Embedding error: {e}")
        return None

def get_context(query: str) -> str:
    """Get relevant context from Pinecone for all queries"""
    try:
        embedding = get_embedding(query)
        if not embedding:
            return ""
            
        results = index.query(
            vector=embedding,
            top_k=3,
            include_metadata=True
        )
        
        contexts = []
        for match in results.matches:
            text = match.metadata.get('text', '').strip()
            if text:
                contexts.append(text)
        
        return "\n\n".join(contexts)
    except Exception as e:
        logging.error(f"Context error: {e}")
        return ""

def get_chat_response(message: str, chat_history: list) -> str:
    """Get response from Groq with conversation history"""
    try:
        # Get context only if needed
        context = get_context(message)
        
        # Build conversation history
        messages = [
            {
                "role": "system",
                "content": """You are Glacien's GST expert. Provide clear, accurate answers about Indian GST."""
            }
        ]
        
        # Add chat history (limited to last 5 exchanges)
        for user_msg, assistant_msg in chat_history[-5:]:
            messages.extend([
                {"role": "user", "content": user_msg},
                {"role": "assistant", "content": assistant_msg}
            ])
        
        # Add current message with context if available
        current_message = f"Context: {context}\nQuestion: {message}" if context else message
        messages.append({"role": "user", "content": current_message})
        
        completion = groq_client.chat.completions.create(
            model="llama-3.3-70b-versatile",
            messages=messages,
            temperature=0.7,
            max_tokens=500
        )
        
        return completion.choices[0].message.content
    except Exception as e:
        logging.error(f"Response error: {e}")
        return "I encountered an error. Please try again."

def respond(message: str, chat_history: list) -> tuple:
    """Process message and update chat history"""
    if not message.strip():
        return "", chat_history
    
    response = get_chat_response(message, chat_history)
    chat_history.append((message, response))
    
    return "", chat_history

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

# Gradio interface setup
with gr.Blocks(
    theme=gr.themes.Soft(),
    css="""
    .user-message { background-color: #e3f2fd !important; }
    .assistant-message { background-color: #f5f5f5 !important; }
    .message { margin: 8px 0; }
    .chatbot { height: 450px; overflow-y: auto; }
    """
) as demo:
    # Header
    with gr.Row():
        if os.path.exists("logo.png"):
            gr.Image("logo.png", show_label=False, height=40)
        gr.Markdown("# Glacien GST Assistant")
    
    gr.Markdown("Ask any questions about Indian GST regulations and compliance.")
    
    # State for managing chat history
    state = gr.State([])
    
    # Chat interface with persistent history
    chatbot = gr.Chatbot(
        value=[],
        height=450,
        show_label=False,
        avatar_images=("👤", "🤖"),
        bubble_full_width=False,
        render_markdown=True,
        elem_classes={
            "user": ["user-message", "message"],
            "bot": ["assistant-message", "message"]
        }
    )
    
    # Input area
    with gr.Row():
        txt = gr.Textbox(
            show_label=False,
            placeholder="Type your GST question here...",
            scale=8
        )
        send = gr.Button("Send", scale=1, variant="primary")

    # Clear button
    clear = gr.Button("Clear Chat")
    
    # Event handlers with concurrency handling
    def clear_chat():
        return [], [], []
    
    txt.submit(
        fn=respond,
        inputs=[txt, chatbot],
        outputs=[txt, chatbot],
        api_name="chat"
    ).then(
        lambda x: gr.update(interactive=True),
        None,
        [txt]
    )
    
    send.click(
        fn=respond,
        inputs=[txt, chatbot],
        outputs=[txt, chatbot]
    ).then(
        lambda x: gr.update(interactive=True),
        None,
        [txt]
    )
    
    clear.click(
        fn=clear_chat,
        inputs=None,
        outputs=[txt, chatbot, state]
    )

if __name__ == "__main__":
    # Launch with proper configuration
    # demo.queue()  # Enable queuing without the deprecated parameter
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=True
    )

  from .autonotebook import tqdm as notebook_tqdm
2025-02-13 19:30:59,491 - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
2025-02-13 19:30:59,503 - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"


* Running on local URL:  http://0.0.0.0:7860


2025-02-13 19:31:00,171 - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
2025-02-13 19:31:00,343 - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"


* Running on public URL: https://6b9b083f137c6acf25.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


2025-02-13 19:31:02,332 - INFO - HTTP Request: HEAD https://6b9b083f137c6acf25.gradio.live "HTTP/1.1 200 OK"


In [2]:
from groq import Groq

client = Groq(api_key=GROQ_API_KEY)
completion = client.chat.completions.create(
    model="llama-3.3-70b-versatile",
    messages=[
        {
            "role": "system",
            "content": "Query Classifier System\nA document retrieval classification system that determines when to fetch relevant documents based on query content.\nOverview\nThis system analyzes user queries and returns a JSON response indicating whether document retrieval is required.\nCore Functionality\nThe system implements a binary classification:\n\nReturns { \"fetch_documents\": true } for GST-related queries\nReturns { \"fetch_documents\": false } for all other queries\n\nImplementation Rules\nResponse Format\n{ \"fetch_documents\": true }"
        },
        {
            "role": "user",
            "content": "gst task"
        },
        {
            "role": "assistant",
            "content": "{\n   \"fetch_documents\": true\n}"
        }
    ],
    temperature=0,
    max_completion_tokens=1024,
    top_p=1,
    stream=False,
    response_format={"type": "json_object"},
    stop=None,
)

print(completion.choices[0].message)


2025-02-13 19:55:22,859 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


ChatCompletionMessage(content='{\n   "fetch_documents": true\n}', role='assistant', function_call=None, reasoning=None, tool_calls=None)
