In [1]:
# @title
# Google Colab PDF XML Embedder for Odoo 16
# Run this in Google Colab

# Step 1: Install required packages
!pip install PyPDF2

# Step 2: Import libraries and upload functionality
import os
import xml.etree.ElementTree as ET
from PyPDF2 import PdfReader, PdfWriter
from io import BytesIO
from datetime import datetime
from google.colab import files
import zipfile

class PDFXMLEmbedder:
    def __init__(self):
        self.supported_formats = ['.pdf']

    def create_sample_xml(self, invoice_data=None):
        """
        Create XML structure matching Indonesian tax invoice format
        Based on INV-INDO-2024-00222 example
        """
        if invoice_data is None:
            # Based on your INV-INDO-2024-00222 example
            invoice_data = {
                'invoice_number': 'INV/INDO/2024/00222',
                'invoice_date': '2024-08-02',
                'due_date': '2024-09-03',
                'terms': 'Net 30',
                'supplier': {
                    'name': 'PT. Bitonic Teknologi labs',
                    'npwp': '95.436.189.5-011.000',
                    'address': 'GEDUNG MAYAPADA TOWER II LT 11 SUITE M31 1 BY MARQUEE EXECUTIVE OFFICE,JL JENDERAL SUDIRMAN RT.004 RW.002 KARET KUNINGAN SETIABUDI',
                    'city': 'Jakarta Selatan Jakarta',
                    'postal_code': '12920',
                    'country': 'Indonesia',
                    'email': 'ar@yellow.ai',
                    'website': 'https://yellow.ai/'
                },
                'customer': {
                    'name': 'PT. BFI Finance Indonesia',
                    'npwp': '01.316.111.2-091.000',
                    'address': 'BFI Tower, Sunburst CBD Lot 1.2, Jalan Kapten Soebijanto Djojohadikusumo, BSD City, Tangerang, Banten',
                    'city': 'Tangerang Jakarta Raya',
                    'postal_code': '15322',
                    'country': 'Indonesia'
                },
                'currency': 'IDR',
                'line_items': [
                    {
                        'description': 'Additional Monthly Whatsapp number Jan 2024(CDU Project)',
                        'service_start': '2024-01-01',
                        'service_end': '2024-01-31',
                        'quantity': '1',
                        'rate': '2250000.00',
                        'tax_rate': '11',
                        'amount': '2497500.00'
                    },
                    {
                        'description': 'Additional Monthly one agent access Jan 2024(CDU Project)',
                        'service_start': '2024-01-01',
                        'service_end': '2024-01-31',
                        'quantity': '1',
                        'rate': '400000.00',
                        'tax_rate': '11',
                        'amount': '444000.00'
                    },
                    {
                        'description': 'FB- Marketing Conversation Jan 2024',
                        'service_start': '2024-01-01',
                        'service_end': '2024-01-31',
                        'quantity': '136465',
                        'rate': '603.92',
                        'tax_rate': '11',
                        'amount': '91479476.51'
                    },
                    {
                        'description': 'FB- Utility Conversation Jan 2024',
                        'service_start': '2024-01-01',
                        'service_end': '2024-01-31',
                        'quantity': '65',
                        'rate': '293.88',
                        'tax_rate': '11',
                        'amount': '21203.44'
                    },
                    {
                        'description': 'FB- Authentication Jan 2024',
                        'service_start': '2024-01-01',
                        'service_end': '2024-01-31',
                        'quantity': '0',
                        'rate': '440.809',
                        'tax_rate': '11',
                        'amount': '0.00'
                    },
                    {
                        'description': 'Service conversation Jan 2024',
                        'service_start': '2024-01-01',
                        'service_end': '2024-01-31',
                        'quantity': '3841',
                        'rate': '270.48',
                        'tax_rate': '11',
                        'amount': '1153194.18'
                    }
                ],
                'bank_details': {
                    'account_name': 'PT BITONIC TEKNOLOGI LABS',
                    'account_idr': '050-299031-068',
                    'account_usd': '050-299031-115',
                    'cif_no': '050-299031',
                    'bank_name': 'PT BANK HSBC INDONESIA',
                    'bank_address': 'WTC1, JL. JEND SUDIRMAN KAV.29-31, JAKARTA 12920',
                    'bank_code': '0870010',
                    'swift_code': 'HSBCIDJA'
                },
                'subtotal': '86121958.68',
                'discount': '0.00',
                'tax_amount': '9473415.45',
                'total_amount': '95595374.13',
                'payment': '0.00',
                'balance_due': '95595374.13'
            }

        # Create XML root element for Indonesian tax invoice
        root = ET.Element('tax_invoice')
        root.set('xmlns', 'http://www.odoo.com/invoice')
        root.set('type', 'out_invoice')
        root.set('country', 'ID')
        root.set('document_type', 'tax_invoice')

        # Invoice header
        header = ET.SubElement(root, 'header')
        ET.SubElement(header, 'number').text = invoice_data['invoice_number']
        ET.SubElement(header, 'invoice_date').text = invoice_data['invoice_date']
        ET.SubElement(header, 'due_date').text = invoice_data['due_date']
        ET.SubElement(header, 'terms').text = invoice_data['terms']
        ET.SubElement(header, 'currency').text = invoice_data['currency']
        ET.SubElement(header, 'state').text = 'posted'
        ET.SubElement(header, 'system_generated').text = 'true'

        # Supplier (PT. Bitonic Teknologi labs)
        parties = ET.SubElement(root, 'parties')
        supplier = ET.SubElement(parties, 'supplier')
        ET.SubElement(supplier, 'name').text = invoice_data['supplier']['name']
        ET.SubElement(supplier, 'npwp').text = invoice_data['supplier']['npwp']
        ET.SubElement(supplier, 'address').text = invoice_data['supplier']['address']
        ET.SubElement(supplier, 'city').text = invoice_data['supplier']['city']
        ET.SubElement(supplier, 'postal_code').text = invoice_data['supplier']['postal_code']
        ET.SubElement(supplier, 'country').text = invoice_data['supplier']['country']
        ET.SubElement(supplier, 'email').text = invoice_data['supplier']['email']
        ET.SubElement(supplier, 'website').text = invoice_data['supplier']['website']

        # Customer (PT. BFI Finance Indonesia)
        customer = ET.SubElement(parties, 'customer')
        ET.SubElement(customer, 'name').text = invoice_data['customer']['name']
        ET.SubElement(customer, 'npwp').text = invoice_data['customer']['npwp']
        ET.SubElement(customer, 'address').text = invoice_data['customer']['address']
        ET.SubElement(customer, 'city').text = invoice_data['customer']['city']
        ET.SubElement(customer, 'postal_code').text = invoice_data['customer']['postal_code']
        ET.SubElement(customer, 'country').text = invoice_data['customer']['country']

        # Invoice line items
        lines = ET.SubElement(root, 'invoice_lines')
        for item in invoice_data['line_items']:
            line = ET.SubElement(lines, 'line')
            ET.SubElement(line, 'description').text = item['description']
            ET.SubElement(line, 'service_start').text = item['service_start']
            ET.SubElement(line, 'service_end').text = item['service_end']
            ET.SubElement(line, 'quantity').text = item['quantity']
            ET.SubElement(line, 'rate').text = item['rate']
            ET.SubElement(line, 'tax_rate').text = item['tax_rate'] + '%'
            ET.SubElement(line, 'amount').text = item['amount']

        # Tax information (Indonesian VAT)
        taxes = ET.SubElement(root, 'taxes')
        tax_line = ET.SubElement(taxes, 'tax')
        ET.SubElement(tax_line, 'name').text = 'VAT@11%'
        ET.SubElement(tax_line, 'rate').text = '11%'
        ET.SubElement(tax_line, 'net_amount').text = invoice_data['subtotal']
        ET.SubElement(tax_line, 'tax_amount').text = invoice_data['tax_amount']

        # Bank details
        banking = ET.SubElement(root, 'banking')
        ET.SubElement(banking, 'account_name').text = invoice_data['bank_details']['account_name']
        ET.SubElement(banking, 'account_idr').text = invoice_data['bank_details']['account_idr']
        ET.SubElement(banking, 'account_usd').text = invoice_data['bank_details']['account_usd']
        ET.SubElement(banking, 'cif_no').text = invoice_data['bank_details']['cif_no']
        ET.SubElement(banking, 'bank_name').text = invoice_data['bank_details']['bank_name']
        ET.SubElement(banking, 'bank_address').text = invoice_data['bank_details']['bank_address']
        ET.SubElement(banking, 'bank_code').text = invoice_data['bank_details']['bank_code']
        ET.SubElement(banking, 'swift_code').text = invoice_data['bank_details']['swift_code']

        # Totals
        totals = ET.SubElement(root, 'totals')
        ET.SubElement(totals, 'subtotal').text = invoice_data['subtotal']
        ET.SubElement(totals, 'discount').text = invoice_data['discount']
        ET.SubElement(totals, 'tax_amount').text = invoice_data['tax_amount']
        ET.SubElement(totals, 'total_amount').text = invoice_data['total_amount']
        ET.SubElement(totals, 'payment').text = invoice_data['payment']
        ET.SubElement(totals, 'balance_due').text = invoice_data['balance_due']

        return ET.tostring(root, encoding='unicode')

    def embed_xml_in_pdf(self, pdf_path, xml_data, output_path=None):
        """
        Embed XML data into PDF metadata for Odoo compatibility
        """
        if output_path is None:
            name, ext = os.path.splitext(pdf_path)
            output_path = f"{name}_with_xml{ext}"

        try:
            # Read existing PDF
            with open(pdf_path, 'rb') as file:
                reader = PdfReader(file)
                writer = PdfWriter()

                # Copy all pages
                for page in reader.pages:
                    writer.add_page(page)

                # Get existing metadata or create new
                metadata = reader.metadata or {}

                # Add XML data to metadata
                writer.add_metadata({
                    '/Title': metadata.get('/Title', 'Indonesian Tax Invoice'),
                    '/Author': metadata.get('/Author', 'System'),
                    '/Subject': metadata.get('/Subject', 'Electronic Tax Invoice'),
                    '/Creator': 'PDF-XML-Embedder-Colab',
                    '/Producer': 'Python Script for Odoo 16',
                    '/XMLData': xml_data,
                    '/OdooCompatible': 'true',
                    '/InvoiceType': 'tax_invoice',
                    '/Country': 'Indonesia'
                })

                # Also embed as file attachment
                xml_bytes = xml_data.encode('utf-8')
                writer.add_attachment('invoice_data.xml', xml_bytes)

                # Write the new PDF
                with open(output_path, 'wb') as output_file:
                    writer.write(output_file)

            return output_path, "Success"

        except Exception as e:
            return None, f"Error: {str(e)}"

    def extract_xml_from_pdf(self, pdf_path):
        """
        Extract embedded XML data from PDF (for verification)
        """
        try:
            with open(pdf_path, 'rb') as file:
                reader = PdfReader(file)

                # Check metadata for XML
                if reader.metadata and '/XMLData' in reader.metadata:
                    return reader.metadata['/XMLData'], "Found in metadata"

                # Check attachments
                if hasattr(reader, 'attachments'):
                    for name, data in reader.attachments.items():
                        if name.endswith('.xml'):
                            return data.decode('utf-8'), f"Found as attachment: {name}"

                return None, "No XML data found"

        except Exception as e:
            return None, f"Error reading PDF: {str(e)}"

# Step 3: Initialize the embedder
print("🚀 PDF XML Embedder for Odoo 16 - Google Colab Version")
print("=" * 55)

embedder = PDFXMLEmbedder()

# Step 4: Upload your PDF file
print("\n📁 Upload your PDF file:")
uploaded = files.upload()

# Process uploaded files
for filename in uploaded.keys():
    print(f"\n✅ Processing: {filename}")

    # Generate XML data
    xml_content = embedder.create_sample_xml()

    # Embed XML into PDF
    output_file, status = embedder.embed_xml_in_pdf(filename, xml_content)

    print(f"📊 Status: {status}")

    if output_file:
        print(f"🎯 Output created: {output_file}")

        # Verify the embedding
        extracted_xml, extract_status = embedder.extract_xml_from_pdf(output_file)
        print(f"🔍 Verification: {extract_status}")

        # Show XML preview
        if extracted_xml:
            print(f"\n📋 XML Preview (first 300 chars):")
            print("-" * 40)
            print(extracted_xml[:300] + "..." if len(extracted_xml) > 300 else extracted_xml)

        # Download the result
        print(f"\n⬇️ Downloading {output_file}...")
        files.download(output_file)

        # Also save XML separately for inspection
        xml_filename = filename.replace('.pdf', '_extracted.xml')
        with open(xml_filename, 'w', encoding='utf-8') as f:
            f.write(xml_content)

        print(f"⬇️ Downloading {xml_filename}...")
        files.download(xml_filename)

print("\n" + "=" * 55)
print("✨ Complete! Your Odoo 16 compatible PDF has been generated!")
print("🎯 Upload the *_with_xml.pdf file to Odoo 16")
print("📄 The separate .xml file shows what data was embedded")

Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyPDF2
Successfully installed PyPDF2-3.0.1
🚀 PDF XML Embedder for Odoo 16 - Google Colab Version

📁 Upload your PDF file:


Saving INV-INDO-2024-00222.pdf to INV-INDO-2024-00222.pdf

✅ Processing: INV-INDO-2024-00222.pdf
📊 Status: Success
🎯 Output created: INV-INDO-2024-00222_with_xml.pdf
🔍 Verification: Found in metadata

📋 XML Preview (first 300 chars):
----------------------------------------
<tax_invoice xmlns="http://www.odoo.com/invoice" type="out_invoice" country="ID" document_type="tax_invoice"><header><number>INV/INDO/2024/00222</number><invoice_date>2024-08-02</invoice_date><due_date>2024-09-03</due_date><terms>Net 30</terms><currency>IDR</currency><state>posted</state><system_gen...

⬇️ Downloading INV-INDO-2024-00222_with_xml.pdf...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

⬇️ Downloading INV-INDO-2024-00222_extracted.xml...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


✨ Complete! Your Odoo 16 compatible PDF has been generated!
🎯 Upload the *_with_xml.pdf file to Odoo 16
📄 The separate .xml file shows what data was embedded


In [6]:
# @title
# Google Colab PDF XML Embedder - SIMPLE AUTO-DOWNLOAD VERSION
# This version forces automatic downloads without connection issues

# Step 1: Install required packages
!pip install PyPDF2 pdfplumber

# Step 2: Import libraries
import os
import xml.etree.ElementTree as ET
from PyPDF2 import PdfReader, PdfWriter
import pdfplumber
import re
import json
from google.colab import files

print("🚀 PDF XML Embedder - Real Data Extraction")
print("=" * 50)

def extract_pdf_text(pdf_path):
    """Extract text content from PDF"""
    try:
        with pdfplumber.open(pdf_path) as pdf:
            text = ""
            for page in pdf.pages:
                text += page.extract_text() + "\n"
        return text
    except Exception as e:
        print(f"Error extracting text: {e}")
        return None

def parse_invoice_data(pdf_text):
    """Parse Indonesian tax invoice data from extracted PDF text"""
    invoice_data = {
        'invoice_number': '',
        'invoice_date': '',
        'due_date': '',
        'terms': '',
        'supplier': {},
        'customer': {},
        'currency': 'IDR',
        'line_items': [],
        'bank_details': {},
        'subtotal': '0.00',
        'discount': '0.00',
        'tax_amount': '0.00',
        'total_amount': '0.00',
        'payment': '0.00',
        'balance_due': '0.00'
    }

    lines = pdf_text.split('\n')

    # Extract invoice number
    for line in lines:
        if 'INVOICE NO' in line or 'INV/' in line:
            match = re.search(r'INV[/\-\w]+/\d{4}/\d+', line)
            if match:
                invoice_data['invoice_number'] = match.group()

    # Extract dates
    date_pattern = r'\d{1,2}/\d{1,2}/\d{4}'
    for i, line in enumerate(lines):
        if 'DATE' in line and not 'DUE DATE' in line:
            date_match = re.search(date_pattern, line)
            if date_match:
                date_str = date_match.group()
                try:
                    month, day, year = date_str.split('/')
                    invoice_data['invoice_date'] = f"{year}-{month.zfill(2)}-{day.zfill(2)}"
                except:
                    invoice_data['invoice_date'] = date_str

        if 'DUE DATE' in line:
            date_match = re.search(date_pattern, line)
            if date_match:
                date_str = date_match.group()
                try:
                    month, day, year = date_str.split('/')
                    invoice_data['due_date'] = f"{year}-{month.zfill(2)}-{day.zfill(2)}"
                except:
                    invoice_data['due_date'] = date_str

    # Extract terms
    for line in lines:
        if 'TERMS' in line and 'Net 30' in line:
            invoice_data['terms'] = 'Net 30'

    # Extract supplier info
    supplier_started = False
    customer_started = False

    for i, line in enumerate(lines):
        line = line.strip()

        # Supplier information
        if 'PT.' in line and not supplier_started and not customer_started:
            invoice_data['supplier']['name'] = line
            supplier_started = True

            # Look for NPWP and other details
            for j in range(i+1, min(i+10, len(lines))):
                if 'NPWP' in lines[j]:
                    npwp_match = re.search(r'[\d\.\-]+', lines[j])
                    if npwp_match:
                        invoice_data['supplier']['npwp'] = npwp_match.group()

                if any(keyword in lines[j].upper() for keyword in ['GEDUNG', 'TOWER', 'JL', 'JALAN']):
                    if 'address' not in invoice_data['supplier']:
                        invoice_data['supplier']['address'] = lines[j].strip()
                    else:
                        invoice_data['supplier']['address'] += ' ' + lines[j].strip()

                if '@' in lines[j]:
                    invoice_data['supplier']['email'] = lines[j].strip()
                if 'http' in lines[j]:
                    invoice_data['supplier']['website'] = lines[j].strip()

        # Customer information
        if 'INVOICE TO:' in line:
            customer_started = True
            if i+1 < len(lines):
                invoice_data['customer']['name'] = lines[i+1].strip()

                for j in range(i+2, min(i+15, len(lines))):
                    if 'NPWP NUMBER:' in lines[j] and j+1 < len(lines):
                        invoice_data['customer']['npwp'] = lines[j+1].strip()

                    if any(keyword in lines[j].upper() for keyword in ['TOWER', 'JL', 'JALAN', 'BSD']):
                        if 'address' not in invoice_data['customer']:
                            invoice_data['customer']['address'] = lines[j].strip()
                        else:
                            invoice_data['customer']['address'] += ' ' + lines[j].strip()

    # Extract line items
    in_items_section = False
    for i, line in enumerate(lines):
        if 'ACTIVITY' in line and 'SERVICE START' in line:
            in_items_section = True
            continue

        if in_items_section and line.strip():
            if any(keyword in line for keyword in ['Monthly', 'FB-', 'Service', 'Additional']):
                amounts = re.findall(r'Rp[\d,\.]+', line)

                if len(amounts) >= 2:
                    item = {
                        'description': '',
                        'service_start': '',
                        'service_end': '',
                        'quantity': '1',
                        'rate': amounts[-2].replace('Rp', '').replace(',', ''),
                        'tax_rate': '11',
                        'amount': amounts[-1].replace('Rp', '').replace(',', '')
                    }

                    # Extract description
                    parts = line.split()
                    desc_parts = []
                    for part in parts:
                        if not re.match(r'\d', part) and 'Rp' not in part and '/' not in part:
                            desc_parts.append(part)
                        else:
                            break
                    item['description'] = ' '.join(desc_parts)

                    # Extract dates
                    dates = re.findall(r'\d{1,2}/\d{1,2}/\d{4}', line)
                    if len(dates) >= 2:
                        try:
                            start_parts = dates[0].split('/')
                            end_parts = dates[1].split('/')
                            item['service_start'] = f"{start_parts[2]}-{start_parts[0].zfill(2)}-{start_parts[1].zfill(2)}"
                            item['service_end'] = f"{end_parts[2]}-{end_parts[0].zfill(2)}-{end_parts[1].zfill(2)}"
                        except:
                            pass

                    # Extract quantity
                    qty_match = re.search(r'(\d+(?:,\d+)?)\s+Rp', line)
                    if qty_match:
                        item['quantity'] = qty_match.group(1).replace(',', '')

                    if item['description']:
                        invoice_data['line_items'].append(item)

    # Extract totals
    for line in lines:
        if 'SUBTOTAL:' in line:
            amount = re.search(r'[\d,\.]+', line.split('SUBTOTAL:')[1])
            if amount:
                invoice_data['subtotal'] = amount.group().replace(',', '')
        elif 'TAX:' in line and 'RATE' not in line:
            amount = re.search(r'[\d,\.]+', line.split('TAX:')[1])
            if amount:
                invoice_data['tax_amount'] = amount.group().replace(',', '')
        elif 'TOTAL:' in line and 'SUBTOTAL' not in line:
            amount = re.search(r'[\d,\.]+', line.split('TOTAL:')[1])
            if amount:
                invoice_data['total_amount'] = amount.group().replace(',', '')
        elif 'BALANCE DUE:' in line:
            amount = re.search(r'[\d,\.]+', line.split('BALANCE DUE:')[1])
            if amount:
                invoice_data['balance_due'] = amount.group().replace(',', '')

    # Extract bank details
    for line in lines:
        if 'Account Name:' in line:
            invoice_data['bank_details']['account_name'] = line.split('Account Name:')[1].strip()
        elif 'Account Number for IDR:' in line:
            invoice_data['bank_details']['account_idr'] = line.split('Account Number for IDR:')[1].strip()
        elif 'Bank Name:' in line:
            invoice_data['bank_details']['bank_name'] = line.split('Bank Name:')[1].strip()
        elif 'Swift Code:' in line:
            invoice_data['bank_details']['swift_code'] = line.split('Swift Code:')[1].strip()

    return invoice_data

def create_xml_from_extracted_data(invoice_data):
    """Create XML structure from extracted invoice data"""
    root = ET.Element('tax_invoice')
    root.set('xmlns', 'http://www.odoo.com/invoice')
    root.set('type', 'out_invoice')
    root.set('country', 'ID')

    # Header
    header = ET.SubElement(root, 'header')
    ET.SubElement(header, 'number').text = invoice_data.get('invoice_number', '')
    ET.SubElement(header, 'invoice_date').text = invoice_data.get('invoice_date', '')
    ET.SubElement(header, 'due_date').text = invoice_data.get('due_date', '')
    ET.SubElement(header, 'terms').text = invoice_data.get('terms', '')
    ET.SubElement(header, 'currency').text = invoice_data.get('currency', 'IDR')

    # Parties
    parties = ET.SubElement(root, 'parties')

    supplier = ET.SubElement(parties, 'supplier')
    supplier_data = invoice_data.get('supplier', {})
    ET.SubElement(supplier, 'name').text = supplier_data.get('name', '')
    ET.SubElement(supplier, 'npwp').text = supplier_data.get('npwp', '')
    ET.SubElement(supplier, 'address').text = supplier_data.get('address', '')

    customer = ET.SubElement(parties, 'customer')
    customer_data = invoice_data.get('customer', {})
    ET.SubElement(customer, 'name').text = customer_data.get('name', '')
    ET.SubElement(customer, 'npwp').text = customer_data.get('npwp', '')
    ET.SubElement(customer, 'address').text = customer_data.get('address', '')

    # Line items
    lines = ET.SubElement(root, 'invoice_lines')
    for item in invoice_data.get('line_items', []):
        line = ET.SubElement(lines, 'line')
        ET.SubElement(line, 'description').text = item.get('description', '')
        ET.SubElement(line, 'quantity').text = str(item.get('quantity', '1'))
        ET.SubElement(line, 'rate').text = str(item.get('rate', '0'))
        ET.SubElement(line, 'amount').text = str(item.get('amount', '0'))

    # Totals
    totals = ET.SubElement(root, 'totals')
    ET.SubElement(totals, 'subtotal').text = str(invoice_data.get('subtotal', '0'))
    ET.SubElement(totals, 'tax_amount').text = str(invoice_data.get('tax_amount', '0'))
    ET.SubElement(totals, 'total_amount').text = str(invoice_data.get('total_amount', '0'))
    ET.SubElement(totals, 'balance_due').text = str(invoice_data.get('balance_due', '0'))

    return ET.tostring(root, encoding='unicode')

def embed_xml_in_pdf(pdf_path, xml_data):
    """Embed XML data into PDF"""
    name, ext = os.path.splitext(pdf_path)
    output_path = f"{name}_with_xml{ext}"

    try:
        with open(pdf_path, 'rb') as file:
            reader = PdfReader(file)
            writer = PdfWriter()

            for page in reader.pages:
                writer.add_page(page)

            writer.add_metadata({
                '/Title': 'Indonesian Tax Invoice',
                '/Creator': 'PDF-XML-Embedder',
                '/XMLData': xml_data,
                '/OdooCompatible': 'true'
            })

            xml_bytes = xml_data.encode('utf-8')
            writer.add_attachment('invoice_data.xml', xml_bytes)

            with open(output_path, 'wb') as output_file:
                writer.write(output_file)

        return output_path
    except Exception as e:
        print(f"Error: {e}")
        return None

# Main execution
print("\n📁 Upload your PDF file:")
uploaded = files.upload()

for filename in uploaded.keys():
    print(f"\n✅ Processing: {filename}")

    # Extract and parse
    pdf_text = extract_pdf_text(filename)
    if not pdf_text:
        print("❌ Could not extract text")
        continue

    invoice_data = parse_invoice_data(pdf_text)

    # Show extracted data
    print(f"\n📋 EXTRACTED DATA:")
    print(f"Invoice: {invoice_data['invoice_number']}")
    print(f"Supplier: {invoice_data['supplier'].get('name', 'Not found')}")
    print(f"Customer: {invoice_data['customer'].get('name', 'Not found')}")
    print(f"Items: {len(invoice_data['line_items'])}")
    print(f"Total: {invoice_data['total_amount']}")

    # Create XML and embed
    xml_content = create_xml_from_extracted_data(invoice_data)
    output_file = embed_xml_in_pdf(filename, xml_content)

    if output_file:
        print(f"\n🎯 Created: {output_file}")

        # Save additional files
        json_file = filename.replace('.pdf', '_data.json')
        xml_file = filename.replace('.pdf', '_content.xml')

        with open(json_file, 'w') as f:
            json.dump(invoice_data, f, indent=2)

        with open(xml_file, 'w') as f:
            f.write(xml_content)

        print(f"💾 Also created: {json_file}, {xml_file}")

        # Multiple download methods
        print(f"\n⬇️ Trying auto-download...")

        # Method 1: Standard download
        download_success = 0
        for file_to_download in [output_file, json_file, xml_file]:
            try:
                files.download(file_to_download)
                print(f"✅ Downloaded: {file_to_download}")
                download_success += 1
            except Exception as e:
                print(f"⚠️ Standard download failed for {file_to_download}: {e}")

        # Method 2: JavaScript-based download
        if download_success < 3:
            print(f"\n🔄 Trying JavaScript download method...")
            from IPython.display import Javascript, display

            for file_to_download in [output_file, json_file, xml_file]:
                try:
                    js_code = f"""
                    const filename = '{file_to_download}';
                    const element = document.createElement('a');
                    element.setAttribute('href', '/files/' + filename);
                    element.setAttribute('download', filename);
                    element.style.display = 'none';
                    document.body.appendChild(element);
                    element.click();
                    document.body.removeChild(element);
                    console.log('Downloaded: ' + filename);
                    """
                    display(Javascript(js_code))
                    print(f"📥 JS Download triggered: {file_to_download}")
                except Exception as e:
                    print(f"JS download failed: {e}")

        # Method 3: Create proper download buttons that force download
        print(f"\n🖱️ Creating download buttons (no localhost needed):")
        from IPython.display import HTML, display
        import base64

        # Read files and encode them for direct download
        try:
            with open(output_file, 'rb') as f:
                pdf_data = base64.b64encode(f.read()).decode()

            with open(json_file, 'r') as f:
                json_data = base64.b64encode(f.read().encode()).decode()

            with open(xml_file, 'r') as f:
                xml_data = base64.b64encode(f.read().encode()).decode()

            button_html = f"""
            <div style="margin: 10px 0;">
                <h4>📥 Download Your Files (Click to Download):</h4>

                <a href="data:application/pdf;base64,{pdf_data}"
                   download="{output_file}"
                   style="display: inline-block; background: #4CAF50; color: white; padding: 12px 20px; margin: 5px;
                          text-decoration: none; border-radius: 5px; font-weight: bold;">
                    📄 Download {output_file} (Main File for Odoo)
                </a><br>

                <a href="data:application/json;base64,{json_data}"
                   download="{json_file}"
                   style="display: inline-block; background: #2196F3; color: white; padding: 12px 20px; margin: 5px;
                          text-decoration: none; border-radius: 5px; font-weight: bold;">
                    📊 Download {json_file} (Extracted Data)
                </a><br>

                <a href="data:application/xml;base64,{xml_data}"
                   download="{xml_file}"
                   style="display: inline-block; background: #FF9800; color: white; padding: 12px 20px; margin: 5px;
                          text-decoration: none; border-radius: 5px; font-weight: bold;">
                    📄 Download {xml_file} (XML Content)
                </a>
            </div>

            <div style="margin-top: 15px; padding: 15px; background: #e8f5e8; border-radius: 5px; border-left: 4px solid #4CAF50;">
                <strong>✅ Files Ready!</strong><br>
                📄 <strong>{output_file}</strong> - Upload this to Odoo 16<br>
                📊 <strong>{json_file}</strong> - Your extracted data<br>
                📄 <strong>{xml_file}</strong> - The embedded XML content
            </div>

            <div style="margin-top: 10px; padding: 10px; background: #fff3cd; border-radius: 5px;">
                <strong>💡 Alternative Method:</strong><br>
                If buttons don't work: Click the 📁 folder icon in left sidebar → Right-click each file → Download
            </div>
            """

            display(HTML(button_html))
            print("✅ Download buttons created with direct file data!")

        except Exception as e:
            print(f"❌ Could not create direct download buttons: {e}")

            # Fallback: Simple download buttons
            simple_html = f"""
            <div style="margin: 10px 0;">
                <h4>📁 Files Created Successfully:</h4>
                <p>🎯 <strong>{output_file}</strong> - Main file for Odoo</p>
                <p>📊 <strong>{json_file}</strong> - Extracted data</p>
                <p>📄 <strong>{xml_file}</strong> - XML content</p>

                <div style="background: #f8f9fa; padding: 15px; border-radius: 5px; margin-top: 10px;">
                    <strong>📥 To Download:</strong><br>
                    1. Click the 📁 folder icon on the left sidebar<br>
                    2. Find your files in the list<br>
                    3. Right-click each file → "Download"<br>
                    4. Or double-click to preview first
                </div>
            </div>
            """
            display(HTML(simple_html))

        # Method 4: Force download with different approach
        print(f"\n🔧 Alternative download commands:")
        print(f"Run these in separate cells if needed:")
        print(f"files.download('{output_file}')")
        print(f"files.download('{json_file}')")
        print(f"files.download('{xml_file}')")

print(f"\n✨ Complete! Check your downloads folder.")
print(f"🎯 Upload the *_with_xml.pdf file to Odoo 16!")

🚀 PDF XML Embedder - Real Data Extraction

📁 Upload your PDF file:


Saving INV-INDO-2024-00222.pdf to INV-INDO-2024-00222 (5).pdf

✅ Processing: INV-INDO-2024-00222 (5).pdf

📋 EXTRACTED DATA:
Invoice: 
Supplier: PT. Bitonic Teknologi labs ORIGINAL FOR RECIPIENT
Customer: INDO/2024/0022
Items: 6
Total: 0.00

🎯 Created: INV-INDO-2024-00222 (5)_with_xml.pdf
💾 Also created: INV-INDO-2024-00222 (5)_data.json, INV-INDO-2024-00222 (5)_content.xml

⬇️ Trying auto-download...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✅ Downloaded: INV-INDO-2024-00222 (5)_with_xml.pdf


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✅ Downloaded: INV-INDO-2024-00222 (5)_data.json


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✅ Downloaded: INV-INDO-2024-00222 (5)_content.xml

🖱️ Creating download buttons (no localhost needed):


✅ Download buttons created with direct file data!

🔧 Alternative download commands:
Run these in separate cells if needed:
files.download('INV-INDO-2024-00222 (5)_with_xml.pdf')
files.download('INV-INDO-2024-00222 (5)_data.json')
files.download('INV-INDO-2024-00222 (5)_content.xml')

✨ Complete! Check your downloads folder.
🎯 Upload the *_with_xml.pdf file to Odoo 16!


In [6]:
# PRACTICAL Odoo 16 Invoice Import - WORKING SOLUTIONS
# Addresses real Odoo limitations and provides working alternatives

!pip install PyPDF2 pdfplumber openpyxl pandas

import pandas as pd
import json
import re
import os
from google.colab import files
import pdfplumber
import base64
from datetime import datetime

print("🔧 PRACTICAL Odoo 16 Invoice Import Solutions")
print("=" * 60)

def parse_indonesian_number(num_str):
    """Convert Indonesian formatted number to float"""
    if not num_str or str(num_str).strip() == '':
        return 0.0

    clean_str = str(num_str).replace('Rp', '').replace('IDR', '').strip()

    try:
        # Handle Indonesian format: 1.234.567,89
        if ',' in clean_str and '.' in clean_str:
            parts = clean_str.rsplit(',', 1)
            integer_part = parts[0].replace('.', '')
            decimal_part = parts[1]
            return float(f"{integer_part}.{decimal_part}")
        elif '.' in clean_str and clean_str.count('.') > 1:
            # Multiple dots = thousands separators
            return float(clean_str.replace('.', ''))
        elif ',' in clean_str and clean_str.count(',') == 1:
            # Single comma = decimal separator
            return float(clean_str.replace(',', '.'))
        else:
            return float(clean_str.replace(',', ''))
    except:
        print(f"Warning: Could not parse number '{num_str}'")
        return 0.0

def extract_pdf_text(pdf_path):
    """Extract text from PDF"""
    try:
        with pdfplumber.open(pdf_path) as pdf:
            text = ""
            for page in pdf.pages:
                page_text = page.extract_text()
                if page_text:
                    text += page_text + "\n"
        return text
    except Exception as e:
        print(f"Error extracting PDF text: {e}")
        return None

def parse_invoice_comprehensive(pdf_text):
    """Enhanced invoice parsing for Indonesian invoices"""
    data = {
        'invoice_number': '',
        'invoice_date': '',
        'due_date': '',
        'supplier_name': '',
        'supplier_address': '',
        'supplier_npwp': '',
        'supplier_email': '',
        'customer_name': '',
        'customer_address': '',
        'customer_npwp': '',
        'line_items': [],
        'subtotal': 0.0,
        'tax_amount': 0.0,
        'total_amount': 0.0,
        'currency': 'IDR'
    }

    lines = [line.strip() for line in pdf_text.split('\n') if line.strip()]

    # Extract invoice number - multiple patterns
    for line in lines:
        patterns = [
            r'INV[/\-\w]*[/\-]\d{4}[/\-]\d+',
            r'INVOICE\s*NO[:\s]*([A-Z0-9/\-]+)',
            r'No\.\s*Invoice[:\s]*([A-Z0-9/\-]+)',
            r'Invoice\s*#[:\s]*([A-Z0-9/\-]+)'
        ]
        for pattern in patterns:
            match = re.search(pattern, line, re.IGNORECASE)
            if match:
                if len(pattern.split('(')) > 1:  # Has capture group
                    data['invoice_number'] = match.group(1).strip()
                else:
                    data['invoice_number'] = match.group().strip()
                break
        if data['invoice_number']:
            break

    # Extract dates
    date_patterns = [r'\d{1,2}/\d{1,2}/\d{4}', r'\d{1,2}-\d{1,2}-\d{4}']
    for line in lines:
        for date_pattern in date_patterns:
            dates_in_line = re.findall(date_pattern, line)
            if dates_in_line:
                if 'date' in line.lower() and 'due' not in line.lower():
                    data['invoice_date'] = dates_in_line[0]
                elif 'due' in line.lower():
                    data['due_date'] = dates_in_line[0]

    # Extract supplier info
    supplier_keywords = ['PT.', 'CV.', 'UD.', 'From:', 'Vendor:']
    for i, line in enumerate(lines):
        for keyword in supplier_keywords:
            if keyword in line and not data['supplier_name']:
                if keyword in ['From:', 'Vendor:']:
                    # Next line is company name
                    if i + 1 < len(lines):
                        data['supplier_name'] = lines[i + 1]
                else:
                    # Current line contains company name
                    data['supplier_name'] = line

                # Look for NPWP in next few lines
                for j in range(i, min(i + 8, len(lines))):
                    if 'npwp' in lines[j].lower():
                        npwp_match = re.search(r'[\d\.\-]{10,}', lines[j])
                        if npwp_match:
                            data['supplier_npwp'] = npwp_match.group()

                    # Look for email
                    if '@' in lines[j]:
                        email_match = re.search(r'[\w\.-]+@[\w\.-]+\.\w+', lines[j])
                        if email_match:
                            data['supplier_email'] = email_match.group()
                break
        if data['supplier_name']:
            break

    # Extract customer info
    customer_keywords = ['Bill To:', 'Invoice To:', 'Customer:', 'To:']
    for i, line in enumerate(lines):
        for keyword in customer_keywords:
            if keyword in line and not data['customer_name']:
                if i + 1 < len(lines):
                    data['customer_name'] = lines[i + 1]

                # Look for customer NPWP
                for j in range(i + 1, min(i + 10, len(lines))):
                    if j < len(lines) and 'npwp' in lines[j].lower():
                        npwp_match = re.search(r'[\d\.\-]{10,}', lines[j])
                        if npwp_match:
                            data['customer_npwp'] = npwp_match.group()
                        break
                break
        if data['customer_name']:
            break

    # Extract line items - look for common patterns
    item_patterns = [
        r'(.*?)\s+(\d+(?:[,\.]\d+)?)\s+Rp\s*([\d,\.]+)\s+Rp\s*([\d,\.]+)',
        r'(.*?)\s+Rp\s*([\d,\.]+)\s+Rp\s*([\d,\.]+)',
        r'(Monthly|Service|Additional|FB-.*?)\s+.*?Rp\s*([\d,\.]+)'
    ]

    for line in lines:
        for pattern in item_patterns:
            match = re.search(pattern, line, re.IGNORECASE)
            if match:
                groups = match.groups()
                if len(groups) >= 2:
                    description = groups[0].strip()
                    # Get the last amount as total
                    amount_str = groups[-1]
                    amount = parse_indonesian_number(amount_str)

                    if description and amount > 0:
                        data['line_items'].append({
                            'description': description,
                            'amount': amount,
                            'quantity': 1
                        })
                break

    # Extract totals
    total_keywords = {
        'subtotal': ['subtotal', 'sub total', 'sub-total'],
        'tax_amount': ['tax', 'ppn', 'vat', 'pajak'],
        'total_amount': ['total', 'grand total', 'amount due', 'balance due']
    }

    for line in lines:
        line_lower = line.lower()

        # Find amounts in the line
        amounts = re.findall(r'rp\s*([\d,\.]+)', line, re.IGNORECASE)
        if not amounts:
            amounts = re.findall(r'([\d,\.]+)', line)

        if amounts:
            amount = parse_indonesian_number(amounts[-1])  # Take last amount

            for field, keywords in total_keywords.items():
                for keyword in keywords:
                    if keyword in line_lower and amount > 0:
                        if data[field] == 0.0:  # Only set if not already set
                            data[field] = amount
                        break

    return data

def create_odoo_csv_import(invoice_data, filename):
    """Create CSV file for Odoo import - MOST RELIABLE METHOD"""
    csv_filename = filename.replace('.pdf', '_vendor_bills.csv')

    # Prepare vendor bill data
    if invoice_data['line_items']:
        # Create one row per line item
        csv_data = []
        for item in invoice_data['line_items']:
            amount_excl_tax = item['amount'] / 1.11 if invoice_data['tax_amount'] > 0 else item['amount']
            csv_data.append({
                'partner_id/name': invoice_data['supplier_name'],
                'partner_id/vat': invoice_data['supplier_npwp'],
                'partner_id/email': invoice_data['supplier_email'],
                'ref': invoice_data['invoice_number'],
                'invoice_date': invoice_data['invoice_date'],
                'invoice_date_due': invoice_data['due_date'],
                'move_type': 'in_invoice',
                'currency_id': 'IDR',
                'invoice_line_ids/name': item['description'],
                'invoice_line_ids/quantity': item['quantity'],
                'invoice_line_ids/price_unit': amount_excl_tax,
                'invoice_line_ids/account_id': 'Expenses',  # You may need to adjust
                'invoice_line_ids/tax_ids/name': 'PPN 11%' if invoice_data['tax_amount'] > 0 else ''
            })
    else:
        # Single line invoice
        amount_excl_tax = invoice_data['subtotal'] if invoice_data['subtotal'] > 0 else invoice_data['total_amount'] / 1.11
        csv_data = [{
            'partner_id/name': invoice_data['supplier_name'],
            'partner_id/vat': invoice_data['supplier_npwp'],
            'partner_id/email': invoice_data['supplier_email'],
            'ref': invoice_data['invoice_number'],
            'invoice_date': invoice_data['invoice_date'],
            'invoice_date_due': invoice_data['due_date'],
            'move_type': 'in_invoice',
            'currency_id': 'IDR',
            'invoice_line_ids/name': f'Invoice {invoice_data["invoice_number"]}',
            'invoice_line_ids/quantity': 1,
            'invoice_line_ids/price_unit': amount_excl_tax,
            'invoice_line_ids/account_id': 'Expenses',
            'invoice_line_ids/tax_ids/name': 'PPN 11%' if invoice_data['tax_amount'] > 0 else ''
        }]

    df = pd.DataFrame(csv_data)
    df.to_csv(csv_filename, index=False)
    return csv_filename

def create_manual_entry_guide(invoice_data, filename):
    """Create step-by-step manual entry guide"""
    guide_filename = filename.replace('.pdf', '_manual_entry_guide.txt')

    guide_content = f"""
ODOO 16 MANUAL ENTRY GUIDE
{invoice_data['invoice_number']}
=====================================

STEP 1: Create/Update Vendor
----------------------------
Go to: Accounting → Vendors → Vendors → Create

Vendor Name: {invoice_data['supplier_name']}
Tax ID (NPWP): {invoice_data['supplier_npwp']}
Email: {invoice_data['supplier_email']}
Address: {invoice_data['supplier_address']}

STEP 2: Create Vendor Bill
-------------------------
Go to: Accounting → Vendors → Bills → Create

Basic Information:
• Vendor: {invoice_data['supplier_name']}
• Bill Reference: {invoice_data['invoice_number']}
• Bill Date: {invoice_data['invoice_date']}
• Due Date: {invoice_data['due_date']}
• Currency: IDR

STEP 3: Add Invoice Lines
------------------------
"""

    if invoice_data['line_items']:
        for i, item in enumerate(invoice_data['line_items'], 1):
            guide_content += f"""
Line {i}:
• Label: {item['description']}
• Account: Expenses (or appropriate expense account)
• Quantity: {item['quantity']}
• Unit Price: {item['amount'] / 1.11:.2f} (excluding tax)
• Taxes: PPN 11% (if applicable)
"""
    else:
        guide_content += f"""
Single Line:
• Label: Invoice {invoice_data['invoice_number']}
• Account: Expenses
• Quantity: 1
• Unit Price: {invoice_data['total_amount'] / 1.11:.2f} (excluding tax)
• Taxes: PPN 11%
"""

    guide_content += f"""

STEP 4: Verify Totals
--------------------
Expected totals:
• Untaxed Amount: {invoice_data['subtotal']:.2f}
• Tax Amount: {invoice_data['tax_amount']:.2f}
• Total: {invoice_data['total_amount']:.2f}

STEP 5: Save and Confirm
-----------------------
• Click Save
• Review the bill
• Click Confirm to post the bill

TROUBLESHOOTING:
• If vendor doesn't exist: Create it first
• If accounts missing: Check Chart of Accounts
• If taxes wrong: Configure Indonesian tax rates
• If currency issues: Enable IDR currency
"""

    with open(guide_filename, 'w', encoding='utf-8') as f:
        f.write(guide_content)

    return guide_filename

def create_excel_template(invoice_data, filename):
    """Create Excel template for bulk import"""
    excel_filename = filename.replace('.pdf', '_excel_template.xlsx')

    # Create DataFrame with proper Odoo import structure
    if invoice_data['line_items']:
        excel_data = []
        for item in invoice_data['line_items']:
            amount_excl_tax = item['amount'] / 1.11 if invoice_data['tax_amount'] > 0 else item['amount']
            excel_data.append({
                'Vendor': invoice_data['supplier_name'],
                'Bill Reference': invoice_data['invoice_number'],
                'Bill Date': invoice_data['invoice_date'],
                'Due Date': invoice_data['due_date'],
                'Description': item['description'],
                'Quantity': item['quantity'],
                'Unit Price (Excl Tax)': amount_excl_tax,
                'Tax': 'PPN 11%' if invoice_data['tax_amount'] > 0 else '',
                'Total Amount': item['amount']
            })
    else:
        amount_excl_tax = invoice_data['total_amount'] / 1.11
        excel_data = [{
            'Vendor': invoice_data['supplier_name'],
            'Bill Reference': invoice_data['invoice_number'],
            'Bill Date': invoice_data['invoice_date'],
            'Due Date': invoice_data['due_date'],
            'Description': f'Invoice {invoice_data["invoice_number"]}',
            'Quantity': 1,
            'Unit Price (Excl Tax)': amount_excl_tax,
            'Tax': 'PPN 11%' if invoice_data['tax_amount'] > 0 else '',
            'Total Amount': invoice_data['total_amount']
        }]

    df = pd.DataFrame(excel_data)
    df.to_excel(excel_filename, index=False, sheet_name='Vendor Bills')
    return excel_filename

# Main execution
print("\n📁 Upload your PDF invoice:")
uploaded = files.upload()

for filename in uploaded.keys():
    print(f"\n✅ Processing: {filename}")

    # Extract and parse
    pdf_text = extract_pdf_text(filename)
    if not pdf_text:
        print("❌ Could not extract text from PDF")
        continue

    print(f"📄 Extracted {len(pdf_text)} characters of text")

    # Parse invoice data
    invoice_data = parse_invoice_comprehensive(pdf_text)

    # Display extracted data
    print(f"\n📋 EXTRACTED DATA:")
    print(f"📄 Invoice: {invoice_data['invoice_number']}")
    print(f"📅 Date: {invoice_data['invoice_date']}")
    print(f"🏢 Supplier: {invoice_data['supplier_name']}")
    print(f"🆔 Supplier NPWP: {invoice_data['supplier_npwp']}")
    print(f"👤 Customer: {invoice_data['customer_name']}")
    print(f"💰 Total: IDR {invoice_data['total_amount']:,.2f}")
    print(f"🧾 Line Items: {len(invoice_data['line_items'])}")

    if invoice_data['line_items']:
        print("\n📋 Line Items:")
        for i, item in enumerate(invoice_data['line_items'], 1):
            print(f"   {i}. {item['description']}: IDR {item['amount']:,.2f}")

    # Create all import formats
    files_created = []

    try:
        # 1. CSV Import (Most reliable)
        csv_file = create_odoo_csv_import(invoice_data, filename)
        files_created.append(csv_file)
        print(f"✅ Created: {csv_file}")
    except Exception as e:
        print(f"❌ CSV creation failed: {e}")

    try:
        # 2. Manual entry guide
        guide_file = create_manual_entry_guide(invoice_data, filename)
        files_created.append(guide_file)
        print(f"✅ Created: {guide_file}")
    except Exception as e:
        print(f"❌ Guide creation failed: {e}")

    try:
        # 3. Excel template
        excel_file = create_excel_template(invoice_data, filename)
        files_created.append(excel_file)
        print(f"✅ Created: {excel_file}")
    except Exception as e:
        print(f"❌ Excel creation failed: {e}")

    # 4. JSON backup
    json_filename = filename.replace('.pdf', '_extracted_data.json')
    with open(json_filename, 'w', encoding='utf-8') as f:
        json.dump(invoice_data, f, indent=2, ensure_ascii=False)
    files_created.append(json_filename)

    # Create download interface
    print(f"\n📦 Created {len(files_created)} files for Odoo import")

    from IPython.display import HTML, display

    # Create comprehensive download interface
    html_content = f"""
    <div style="margin: 20px 0; padding: 20px; background: #f8f9fa; border-radius: 10px;">
        <h3 style="color: #28a745;">🎯 Odoo 16 Import Solutions</h3>

        <div style="background: #d4edda; padding: 15px; border-radius: 5px; margin: 15px 0;">
            <h4 style="color: #155724;">🥇 METHOD 1: CSV Import (Recommended)</h4>
    """

    # Add download buttons for each file
    try:
        for file_path in files_created:
            with open(file_path, 'rb') as f:
                file_data = base64.b64encode(f.read()).decode()

            if file_path.endswith('.csv'):
                color = "#28a745"
                icon = "📊"
                method = "CSV Import"
                instruction = "Accounting → Vendor Bills → Import → Upload CSV"
            elif file_path.endswith('.txt'):
                color = "#17a2b8"
                icon = "📖"
                method = "Manual Entry Guide"
                instruction = "Follow step-by-step instructions"
            elif file_path.endswith('.xlsx'):
                color = "#007bff"
                icon = "📑"
                method = "Excel Template"
                instruction = "Edit and import via Excel"
            else:
                color = "#6c757d"
                icon = "💾"
                method = "Data Backup"
                instruction = "JSON data for reference"

            mime_type = {
                '.csv': 'text/csv',
                '.txt': 'text/plain',
                '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
                '.json': 'application/json'
            }.get(os.path.splitext(file_path)[1], 'application/octet-stream')

            html_content += f"""
            <p style="margin: 10px 0;">
                <a href="data:{mime_type};base64,{file_data}"
                   download="{os.path.basename(file_path)}"
                   style="background: {color}; color: white; padding: 10px 20px;
                          text-decoration: none; border-radius: 5px; margin-right: 10px; font-weight: bold;">
                    {icon} Download {os.path.basename(file_path)}
                </a>
                <span style="color: #495057;">{method}</span><br>
                <small style="color: #6c757d; margin-left: 10px;">{instruction}</small>
            </p>
            """

        html_content += f"""
        </div>

        <div style="background: #fff3cd; padding: 15px; border-radius: 5px; margin: 15px 0;">
            <h4 style="color: #856404;">⚠️ Important Notes</h4>
            <ul style="color: #856404;">
                <li><strong>Odoo's PDF auto-import is a PAID service</strong> - requires credits/subscription</li>
                <li><strong>CSV import works 100%</strong> - no OCR or AI needed</li>
                <li><strong>Manual entry is always reliable</strong> - use the guide for step-by-step process</li>
                <li><strong>Check vendor exists first</strong> - create vendor before importing bills</li>
                <li><strong>Verify account mapping</strong> - adjust expense accounts as needed</li>
            </ul>
        </div>

        <div style="background: #d1ecf1; padding: 15px; border-radius: 5px;">
            <h4 style="color: #0c5460;">🛠️ Odoo Configuration Needed</h4>
            <ol style="color: #0c5460;">
                <li>Enable IDR currency: Accounting → Configuration → Currencies</li>
                <li>Setup Indonesian taxes: Accounting → Configuration → Taxes</li>
                <li>Create PPN 11% tax rate</li>
                <li>Verify chart of accounts has expense accounts</li>
            </ol>
        </div>
    </div>
    """

        display(HTML(html_content))

    except Exception as e:
        print(f"Could not create download interface: {e}")
        print(f"Files created: {files_created}")

        # Manual download commands
        print(f"\n🔧 Manual download commands:")
        for file_path in files_created:
            print(f"files.download('{file_path}')")

print(f"\n✅ Processing complete!")
print(f"💡 Key insight: Odoo's PDF auto-import is a PAID cloud service")
print(f"🎯 Use CSV import for 100% reliable results without extra costs")

🔧 PRACTICAL Odoo 16 Invoice Import Solutions

📁 Upload your PDF invoice:


Saving INV-INDO-2024-00222.pdf to INV-INDO-2024-00222 (4).pdf

✅ Processing: INV-INDO-2024-00222 (4).pdf
📄 Extracted 2144 characters of text

📋 EXTRACTED DATA:
📄 Invoice: INV/
📅 Date: 8/2/2024
🏢 Supplier: PT. Bitonic Teknologi labs ORIGINAL FOR RECIPIENT
🆔 Supplier NPWP: 95.436.189.5-011.000
👤 Customer: 
💰 Total: IDR 0.00
🧾 Line Items: 7

📋 Line Items:
   1. Additional: IDR 2,250,000.00
   2. Additional: IDR 400,000.00
   3. FB-: IDR 603.92
   4. FB-: IDR 293.88
   5. FB-: IDR 440.81
   6. Service: IDR 270.48
   7. VAT@11%: IDR 86,121,958.68
✅ Created: INV-INDO-2024-00222 (4)_vendor_bills.csv
✅ Created: INV-INDO-2024-00222 (4)_manual_entry_guide.txt
✅ Created: INV-INDO-2024-00222 (4)_excel_template.xlsx

📦 Created 4 files for Odoo import



✅ Processing complete!
💡 Key insight: Odoo's PDF auto-import is a PAID cloud service
🎯 Use CSV import for 100% reliable results without extra costs
