# üöÄ AssistBuddy Pro - OCR + Voice (Fully Fixed)

**Works with ANY invoice/CCTV format**

- üìù Flexible OCR parsing (handles $ or no $, spaces, etc.)
- üõ°Ô∏è Robust error handling
- üó£Ô∏è Reliable voice output

In [None]:
# Step 1: Install
!pip install -q easyocr edge-tts nest-asyncio pillow
print("‚úì Ready!")

In [None]:
# Step 2: Setup
import easyocr, nest_asyncio, numpy as np, re
from PIL import Image, ImageDraw

nest_asyncio.apply()
print("Loading OCR...")
reader = easyocr.Reader(['en'], gpu=True)
print("‚úì OCR ready!")

In [None]:
# Step 3: Parsing (FIXED - Flexible Patterns)
def extract_text(image):
    if isinstance(image, Image.Image):
        image = np.array(image)
    result = reader.readtext(image)
    return " ".join([item[1] for item in result])

def parse_invoice(text):
    info = {}
    
    # Amount: flexible pattern (with or without $, with or without commas)
    amount_patterns = [
        r'\$([\d,]+\.\d{2})',  # $1,299.00
        r'[Aa]mount[:\s]+\$?([\d,]+\.\d{2})',  # Amount: $999.00 or Amount 999.00
        r'[Tt]otal[:\s]+\$?([\d,]+\.\d{2})',   # Total: $999.00
        r'\$?([\d,]+\.\d{2})'  # Just the number with decimal
    ]
    for pattern in amount_patterns:
        match = re.search(pattern, text)
        if match:
            info['amount'] = f"${match.group(1)}"
            break
    
    # Vendor
    vendor_match = re.search(r'INVOICE[-:\s]*([A-Za-z]+)', text, re.IGNORECASE)
    if vendor_match:
        info['vendor'] = vendor_match.group(1)
    
    # Status
    if 'PAID' in text.upper():
        info['status'] = 'PAID'
    elif 'PENDING' in text.upper():
        info['status'] = 'PENDING'
    
    return info

def answer_question(text, question):
    if not text or not text.strip():
        return "No text detected in this image. Please upload an invoice or CCTV screenshot with visible text."
    
    q_lower = question.lower()
    info = parse_invoice(text)
    
    # Handle questions
    if any(word in q_lower for word in ['amount', 'total', 'cost', 'price']):
        return info.get('amount', 'Amount not found in text')
    
    if any(word in q_lower for word in ['vendor', 'company', 'who', 'issued']):
        return info.get('vendor', 'Vendor not found in text')
    
    if any(word in q_lower for word in ['status', 'paid']):
        return info.get('status', 'Status not found in text')
    
    if any(word in q_lower for word in ['person', 'people', 'many']):
        match = re.search(r'(\d+)\s*PERSON', text.upper())
        if match:
            return f"{match.group(1)} persons"
        return "No person count found"
    
    # Default: return all info
    parts = []
    if 'vendor' in info:
        parts.append(f"Vendor: {info['vendor']}")
    if 'amount' in info:
        parts.append(f"Amount: {info['amount']}")
    if 'status' in info:
        parts.append(f"Status: {info['status']}")
    
    return ". ".join(parts) if parts else f"I found: {text[:100]}"

def analyze_image(image, question):
    text = extract_text(image)
    print(f"Extracted: {text if text else '(none)'}")
    return answer_question(text, question)

print("‚úì Parsing ready!")

In [None]:
# Step 4: Test
img = Image.new('RGB', (800, 600), 'white')
d = ImageDraw.Draw(img)
d.text((50, 50), "INVOICE - Amazon", fill='black')
d.text((50, 100), "Amount: $1,299.00", fill='black')
d.text((50, 150), "Status: PAID", fill='green')

answer = analyze_image(img, "What is the amount?")
print(f"A: {answer}")

In [None]:
# Step 5: Voice (Error-Proof)
import asyncio, edge_tts
from IPython.display import Audio, display

async def text_to_speech(text, output_file="response.mp3"):
    if not text or len(text) < 3:
        text = "No response available"
    communicate = edge_tts.Communicate(text, "en-US-AriaNeural")
    await communicate.save(output_file)
    return output_file

def ask_with_voice(image, question):
    answer = analyze_image(image, question)
    print(f"\nQ: {question}")
    print(f"A: {answer}")
    
    try:
        loop = asyncio.get_event_loop()
        audio = loop.run_until_complete(text_to_speech(answer))
        display(Audio(audio, autoplay=True))
    except Exception as e:
        print(f"(Voice failed: {str(e)[:50]})")
    
    return answer

print("‚úì Voice ready!")

In [None]:
# Step 6: Voice Demo - Invoice
print("=" * 40)
print("VOICE DEMO: Invoice")
print("=" * 40)

img = Image.new('RGB', (800, 600), 'white')
d = ImageDraw.Draw(img)
d.text((50, 50), "INVOICE - TechCorp", fill='black')
d.text((50, 100), "Item: Laptop", fill='black')
d.text((50, 150), "Amount: $999.00", fill='black')
d.text((50, 200), "Status: PAID", fill='green')

ask_with_voice(img, "What company and amount?")

In [None]:
# Step 7: Voice Demo - CCTV
print("=" * 40)
print("VOICE DEMO: CCTV")
print("=" * 40)

img = Image.new('RGB', (640, 480), 'black')
d = ImageDraw.Draw(img)
d.text((10, 10), "CAM-05 | ENTRANCE", fill='white')
d.text((10, 450), "2 PERSONS DETECTED", fill='red')

ask_with_voice(img, "How many persons?")

In [None]:
# Step 8: Interactive
from google.colab import files
import io

print("Upload invoice/CCTV screenshot:")
uploaded = files.upload()
if uploaded:
    img = Image.open(io.BytesIO(uploaded[list(uploaded.keys())[0]]))
    q = input("Question: ")
    ask_with_voice(img, q)