In [3]:
# tinkering.ipynb or tinkering.py
"""
Initialize and test LLMService in notebook environment
"""

# Fix import paths for your current structure
import sys
import os

# Add the project root to Python path
project_root = os.path.dirname(os.path.abspath('.'))
if project_root not in sys.path:
    sys.path.append(project_root)

# Option 1: Simple settings without pydantic
class SimpleSettings:
    def __init__(self):
        self.groq_api_key = os.getenv("GROQ_API_KEY", "")
        self.groq_model = "llama3-8b-8192"
        self.groq_temperature = 0.1
        self.groq_max_tokens = 1000

# Create settings instance
settings = SimpleSettings()

# Now import and initialize LLMService
import logging
from typing import List, Dict, Any
from groq import Groq

# Set up logging for notebook
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class LLMService:
    """Service for LLM operations using Groq"""
    
    def __init__(self, api_key: str = None):
        """Initialize LLM service with Groq client"""
        self.api_key = api_key or settings.groq_api_key
        if not self.api_key:
            raise ValueError("Groq API key is required. Set GROQ_API_KEY environment variable.")
        
        self.client = Groq(api_key=self.api_key)
        self.model = settings.groq_model
        self.temperature = settings.groq_temperature
        self.max_tokens = settings.groq_max_tokens
        
        logger.info(f"LLM service initialized with model: {self.model}")
    
    def generate_answer(self, question: str, context_chunks: List[Dict[str, Any]] = None) -> str:
        """Generate answer using Groq LLM with optional context"""
        try:
            if context_chunks:
                context = self._format_context(context_chunks)
                prompt = self._create_prompt(question, context)
            else:
                prompt = question
            
            response = self.client.chat.completions.create(
                model=self.model,
                messages=[
                    {
                        "role": "system",
                        "content": "You are a helpful AI assistant. Answer questions clearly and concisely."
                    },
                    {
                        "role": "user",
                        "content": prompt
                    }
                ],
                temperature=self.temperature,
                max_tokens=self.max_tokens,
                top_p=1,
                stream=False
            )
            
            answer = response.choices[0].message.content.strip()
            logger.info(f"Generated answer for question: {question[:50]}...")
            return answer
            
        except Exception as e:
            logger.error(f"Error generating answer: {str(e)}")
            return f"I apologize, but I encountered an error: {str(e)}"
    
    def _format_context(self, context_chunks: List[Dict[str, Any]]) -> str:
        """Format context chunks into a readable string"""
        if not context_chunks:
            return "No relevant context found."
        
        formatted_context = []
        for i, chunk in enumerate(context_chunks, 1):
            source = chunk.get('metadata', {}).get('source', 'Unknown')
            text = chunk.get('text', '')
            formatted_context.append(f"[Context {i} - Source: {source}]\n{text}")
        
        return "\n\n".join(formatted_context)
    
    def _create_prompt(self, question: str, context: str) -> str:
        """Create a well-structured prompt for the LLM"""
        return f"""Based on the following context, please answer the question.

Context:
{context}

Question: {question}

Answer:"""
    
    def simple_chat(self, message: str) -> str:
        """Simple chat without context - useful for testing"""
        try:
            response = self.client.chat.completions.create(
                model=self.model,
                messages=[
                    {
                        "role": "user",
                        "content": message
                    }
                ],
                temperature=self.temperature,
                max_tokens=self.max_tokens
            )
            
            return response.choices[0].message.content.strip()
            
        except Exception as e:
            return f"Error: {str(e)}"

# ============================================================================
# NOTEBOOK TESTING SECTION
# ============================================================================

def test_llm_service():
    """Test the LLM service initialization and basic functionality"""
    
    print("=== Testing LLM Service ===\n")
    
    # Check if API key is set
    if not os.getenv("GROQ_API_KEY"):
        print("❌ GROQ_API_KEY not found in environment variables")
        print("Please set it with: os.environ['GROQ_API_KEY'] = 'your-api-key'")
        return None
    
    try:
        # Initialize LLM service
        print("🚀 Initializing LLM service...")
        llm_service = LLMService()
        print("✅ LLM service initialized successfully!")
        
        # Test simple chat
        print("\n📝 Testing simple chat...")
        test_message = "Hello! Can you tell me what 2+2 equals?"
        response = llm_service.simple_chat(test_message)
        print(f"Question: {test_message}")
        print(f"Answer: {response}")
        
        # Test with context
        print("\n📚 Testing with context...")
        sample_context = [
            {
                "text": "Machine learning is a subset of artificial intelligence that focuses on algorithms that can learn from data.",
                "metadata": {"source": "ai_textbook.pdf"}
            }
        ]
        
        context_question = "What is machine learning?"
        context_answer = llm_service.generate_answer(context_question, sample_context)
        print(f"Question: {context_question}")
        print(f"Answer: {context_answer}")
        
        print("\n✅ All tests passed!")
        return llm_service
        
    except Exception as e:
        print(f"❌ Error initializing LLM service: {str(e)}")
        return None

# ============================================================================
# RUN TESTS
# ============================================================================

if __name__ == "__main__" or "ipykernel" in sys.modules:
    llm = LLMService()
    print(llm.generate_answer("What is the capital of France?"))
    

INFO:__main__:LLM service initialized with model: llama3-8b-8192
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:__main__:Generated answer for question: What is the capital of France?...


The capital of France is Paris.


In [6]:
import os
import sqlite3
import hashlib
import jwt
from datetime import datetime, timedelta
from typing import Optional, List
import time
import uuid

from fastapi import FastAPI, HTTPException, Depends, UploadFile, File, Form
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
from pydantic import BaseModel
import chromadb
from groq import Groq
import docling
from docling.document_converter import DocumentConverter

# Configuration
SECRET_KEY = "your-secret-key-change-this"
GROQ_API_KEY = "your-groq-api-key"
ALGORITHM = "HS256"
ACCESS_TOKEN_EXPIRE_MINUTES = 30

# Initialize
app = FastAPI(title="QA API", version="1.0.0")
security = HTTPBearer()
groq_client = Groq(api_key=GROQ_API_KEY)

# Initialize ChromaDB
chroma_client = chromadb.PersistentClient(path="./chroma_db")
collection = chroma_client.get_or_create_collection(name="documents")

# Document converter
converter = DocumentConverter()

# Models
class UserCreate(BaseModel):
    email: str
    password: str

class UserLogin(BaseModel):
    email: str
    password: str

class Question(BaseModel):
    question: str

class Token(BaseModel):
    access_token: str
    token_type: str

# Database setup
def init_db():
    conn = sqlite3.connect('qa_app.db')
    cursor = conn.cursor()
    
    # Users table
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS users (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            email TEXT UNIQUE NOT NULL,
            password_hash TEXT NOT NULL,
            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
        )
    ''')
    
    # Logs table
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS query_logs (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            user_id INTEGER,
            question TEXT NOT NULL,
            response TEXT NOT NULL,
            response_time REAL NOT NULL,
            timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
            FOREIGN KEY (user_id) REFERENCES users (id)
        )
    ''')
    
    conn.commit()
    conn.close()

# Utility functions
def hash_password(password: str) -> str:
    return hashlib.sha256(password.encode()).hexdigest()

def verify_password(password: str, hashed: str) -> bool:
    return hash_password(password) == hashed

def create_access_token(data: dict):
    to_encode = data.copy()
    expire = datetime.utcnow() + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
    to_encode.update({"exp": expire})
    encoded_jwt = jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
    return encoded_jwt

def get_user_from_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
    try:
        payload = jwt.decode(credentials.credentials, SECRET_KEY, algorithms=[ALGORITHM])
        email: str = payload.get("sub")
        if email is None:
            raise HTTPException(status_code=401, detail="Invalid token")
        return email
    except jwt.ExpiredSignatureError:
        raise HTTPException(status_code=401, detail="Token expired")
    except jwt.JWTError:
        raise HTTPException(status_code=401, detail="Invalid token")

def get_user_id(email: str) -> int:
    conn = sqlite3.connect('qa_app.db')
    cursor = conn.cursor()
    cursor.execute("SELECT id FROM users WHERE email = ?", (email,))
    result = cursor.fetchone()
    conn.close()
    return result[0] if result else None

def chunk_text(text: str, chunk_size: int = 1000, overlap: int = 200) -> List[str]:
    """Simple text chunking with overlap"""
    chunks = []
    start = 0
    while start < len(text):
        end = start + chunk_size
        chunk = text[start:end]
        chunks.append(chunk)
        if end >= len(text):
            break
        start = end - overlap
    return chunks

# API Endpoints
@app.on_event("startup")
async def startup_event():
    init_db()

@app.post("/register", response_model=Token)
async def register(user: UserCreate):
    conn = sqlite3.connect('qa_app.db')
    cursor = conn.cursor()
    
    # Check if user exists
    cursor.execute("SELECT id FROM users WHERE email = ?", (user.email,))
    if cursor.fetchone():
        conn.close()
        raise HTTPException(status_code=400, detail="Email already registered")
    
    # Create user
    password_hash = hash_password(user.password)
    cursor.execute(
        "INSERT INTO users (email, password_hash) VALUES (?, ?)",
        (user.email, password_hash)
    )
    conn.commit()
    conn.close()
    
    # Create token
    access_token = create_access_token(data={"sub": user.email})
    return {"access_token": access_token, "token_type": "bearer"}

@app.post("/login", response_model=Token)
async def login(user: UserLogin):
    conn = sqlite3.connect('qa_app.db')
    cursor = conn.cursor()
    cursor.execute("SELECT password_hash FROM users WHERE email = ?", (user.email,))
    result = cursor.fetchone()
    conn.close()
    
    if not result or not verify_password(user.password, result[0]):
        raise HTTPException(status_code=401, detail="Invalid credentials")
    
    access_token = create_access_token(data={"sub": user.email})
    return {"access_token": access_token, "token_type": "bearer"}

@app.post("/upload")
async def upload_document(
    file: UploadFile = File(...),
    current_user: str = Depends(get_user_from_token)
):
    # Check file size (10MB limit)
    if file.size > 10 * 1024 * 1024:
        raise HTTPException(status_code=400, detail="File too large. Max size: 10MB")
    
    # Check file type
    if not file.filename.endswith(('.txt', '.pdf')):
        raise HTTPException(status_code=400, detail="Only .txt and .pdf files are supported")
    
    try:
        # Save uploaded file temporarily
        temp_path = f"temp_{uuid.uuid4()}_{file.filename}"
        with open(temp_path, "wb") as f:
            content = await file.read()
            f.write(content)
        
        # Convert document using docling
        result = converter.convert(temp_path)
        text_content = result.document.export_to_markdown()
        
        # Clean up temp file
        os.remove(temp_path)
        
        # Chunk the text
        chunks = chunk_text(text_content)
        
        # Store in ChromaDB
        chunk_ids = [f"{file.filename}_{i}" for i in range(len(chunks))]
        metadatas = [{"filename": file.filename, "chunk_index": i} for i in range(len(chunks))]
        
        collection.add(
            documents=chunks,
            ids=chunk_ids,
            metadatas=metadatas
        )
        
        return {
            "message": f"Document '{file.filename}' uploaded successfully",
            "chunks_created": len(chunks)
        }
        
    except Exception as e:
        # Clean up temp file if it exists
        if os.path.exists(temp_path):
            os.remove(temp_path)
        raise HTTPException(status_code=500, detail=f"Error processing document: {str(e)}")

@app.post("/ask")
async def ask_question(
    question: Question,
    current_user: str = Depends(get_user_from_token)
):
    start_time = time.time()
    
    try:
        # Search for relevant chunks
        results = collection.query(
            query_texts=[question.question],
            n_results=3
        )
        
        if not results['documents'][0]:
            raise HTTPException(status_code=404, detail="No relevant documents found")
        
        # Prepare context from retrieved chunks
        context = "\n\n".join(results['documents'][0])
        
        # Create prompt for Groq
        prompt = f"""Based on the following context, answer the user's question. If the answer cannot be found in the context, say so clearly.

Context:
{context}

Question: {question.question}

Answer:"""
        
        # Get response from Groq
        response = groq_client.chat.completions.create(
            model="llama3-8b-8192",  # or another available model
            messages=[
                {"role": "user", "content": prompt}
            ],
            max_tokens=500,
            temperature=0.1
        )
        
        answer = response.choices[0].message.content
        response_time = time.time() - start_time
        
        # Log the query
        user_id = get_user_id(current_user)
        conn = sqlite3.connect('qa_app.db')
        cursor = conn.cursor()
        cursor.execute(
            "INSERT INTO query_logs (user_id, question, response, response_time) VALUES (?, ?, ?, ?)",
            (user_id, question.question, answer, response_time)
        )
        conn.commit()
        conn.close()
        
        return {
            "question": question.question,
            "answer": answer,
            "response_time": response_time,
            "sources": len(results['documents'][0])
        }
        
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error processing question: {str(e)}")

@app.get("/health")
async def health_check():
    return {"status": "healthy"}

if __name__ == "__main__":
    import uvicorn
    import asyncio
    import sys
    
    # Check if we're in a Jupyter environment or already have an event loop
    try:
        # Try to get the current event loop
        loop = asyncio.get_running_loop()
        print("Running in an existing event loop (like Jupyter)")
        print("Please run the server manually with: uvicorn main:app --reload --host 0.0.0.0 --port 8000")
    except RuntimeError:
        # No event loop running, safe to use uvicorn.run()
        uvicorn.run(app, host="0.0.0.0", port=8000, reload=True)

ERROR:chromadb.telemetry.product.posthog:Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
ERROR:chromadb.telemetry.product.posthog:Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


Running in an existing event loop (like Jupyter)
Please run the server manually with: uvicorn main:app --reload --host 0.0.0.0 --port 8000


        on_event is deprecated, use lifespan event handlers instead.

        Read more about it in the
        [FastAPI docs for Lifespan Events](https://fastapi.tiangolo.com/advanced/events/).
        
  @app.on_event("startup")


In [7]:
#!/usr/bin/env python3
"""
Simple server runner to avoid asyncio conflicts
"""

if __name__ == "__main__":
    import uvicorn
    
    # Run the server
    uvicorn.run(
        "main:app",
        host="0.0.0.0",
        port=8000,
        reload=True,
        log_level="info"
    )

INFO:     Will watch for changes in these directories: ['c:\\Q&A\\tests']
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)
INFO:     Started reloader process [27860] using WatchFiles
INFO:     Stopping reloader process [27860]
