# NotebookLM Converter Demo

🚀 **Chuyển đổi sách điện tử sang định dạng tối ưu cho Google NotebookLM**

Notebook này demo ứng dụng **NotebookLM Converter** - một công cụ chuyển đổi sách điện tử sang các định dạng được tối ưu hóa đặc biệt cho Google NotebookLM.

## 📚 Input Formats Supported
- **EPUB** - Electronic Publication
- **MOBI** - Amazon Kindle Format  
- **AZW/AZW3** - Amazon Advanced Format
- **KFX** - Kindle Format X (cần Calibre)
- **iBooks** - Apple iBooks
- **CBR/CBZ** - Comic Book Archive

## 📄 Output Formats - Optimized for NotebookLM
- **PDF** - Typography & structure tối ưu cho AI analysis
- **TXT** - Cleaned text với metadata header
- **Markdown** - Structured format với YAML frontmatter

## 🎯 Optimization Modes
- **Chuẩn** - Standard conversion
- **NotebookLM** - Optimized đặc biệt cho NotebookLM
- **AI-Ready** - Best preparation cho AI systems

In [None]:
# Section 1: Import Required Libraries
import tkinter as tk
from tkinter import filedialog, messagebox, ttk
import ebooklib
from ebooklib import epub
from bs4 import BeautifulSoup
from xhtml2pdf import pisa
import markdown
import os
import base64
import threading
import re
import subprocess
import sys
import zipfile
import tempfile
from PIL import Image
import io
from datetime import datetime

print("✅ Imported all required libraries for NotebookLM Converter")
print("📦 Core modules: tkinter, ebooklib, beautifulsoup4, xhtml2pdf")
print("🔧 Support modules: PIL, zipfile, threading, subprocess")

In [None]:
# Section 2: Create Main Application Class
class NotebookLMConverterApp:
    """
    NotebookLM Converter - Chuyển đổi sách điện tử sang định dạng tối ưu cho NotebookLM
    
    Supports:
    - Input: EPUB, MOBI, AZW/KFX, iBooks, CBR/CBZ
    - Output: PDF, TXT, Markdown (optimized for NotebookLM)
    """
    
    def __init__(self, root):
        """Khởi tạo ứng dụng NotebookLM Converter"""
        self.root = root
        self.root.title("NotebookLM Converter")
        self.root.geometry("700x500")
        self.root.configure(bg="#f0f0f0")
        
        # Application variables
        self.input_path = None
        self.output_format = tk.StringVar(value="PDF")
        self.quality_var = tk.StringVar(value="notebooklm")
        
        print("🔧 NotebookLM Converter Class initialized")
        print("📱 Default settings: PDF output, NotebookLM optimization")

# Demo: Create class instance (without GUI for now)
converter_demo = NotebookLMConverterApp.__new__(NotebookLMConverterApp)
converter_demo.input_path = None
converter_demo.output_format = "PDF"
converter_demo.quality_var = "notebooklm"
print("✅ Demo converter instance created successfully")

In [None]:
# Section 3: Setup GUI Interface
def setup_gui_interface(self):
    """Setup main GUI interface cho NotebookLM Converter"""
    
    # Main frame
    main_frame = tk.Frame(self.root, padx=20, pady=20, bg="#f0f0f0")
    main_frame.pack(expand=True, fill=tk.BOTH)
    
    # Title section
    title_label = tk.Label(main_frame, text="NotebookLM Converter", 
                          font=("Helvetica", 20, "bold"), bg="#f0f0f0", fg="#333")
    title_label.pack(pady=(0, 10))
    
    subtitle_label = tk.Label(main_frame, 
                             text="Chuyển đổi sách điện tử sang định dạng tối ưu cho NotebookLM", 
                             font=("Helvetica", 10), bg="#f0f0f0", fg="#666")
    subtitle_label.pack(pady=(0, 5))
    
    format_support_label = tk.Label(main_frame, 
                                   text="Input: EPUB, MOBI, AZW/KFX, IBA, CBR/CBZ • Output: PDF, TXT, Markdown", 
                                   font=("Helvetica", 9), bg="#f0f0f0", fg="#888")
    format_support_label.pack(pady=(0, 20))
    
    return main_frame

# Thêm method vào class
NotebookLMConverterApp.setup_gui_interface = setup_gui_interface

print("🎨 GUI Interface setup method added")
print("📋 Components: Title, subtitle, format support info")
print("🖼️  Layout: Main frame với padding và styling")

In [None]:
# Section 4: Implement File Selection
def select_input_file(self):
    """File selection cho multiple ebook formats"""
    
    file_types = [
        ("All supported", "*.epub;*.mobi;*.azw;*.azw3;*.kfx;*.ibooks;*.cbr;*.cbz"),
        ("EPUB files", "*.epub"),
        ("MOBI files", "*.mobi"),
        ("AZW files", "*.azw;*.azw3"),
        ("KFX files", "*.kfx"),
        ("iBooks files", "*.ibooks"),
        ("Comic Book Archive", "*.cbr;*.cbz"),
        ("All files", "*.*")
    ]
    
    self.input_path = filedialog.askopenfilename(
        title="Chọn file sách điện tử",
        filetypes=file_types
    )
    
    if self.input_path:
        filename = os.path.basename(self.input_path)
        file_ext = os.path.splitext(filename)[1].lower()
        print(f"📚 Selected: {filename} ({file_ext.upper()})")
        return True
    else:
        print("❌ No file selected")
        return False

# Demo file type detection
def detect_file_type(file_path):
    """Detect ebook file type"""
    if not file_path:
        return "Unknown"
    
    ext = os.path.splitext(file_path)[1].lower()
    type_mapping = {
        '.epub': 'EPUB - Electronic Publication',
        '.mobi': 'MOBI - Amazon Kindle',
        '.azw': 'AZW - Amazon Format', 
        '.azw3': 'AZW3 - Amazon Advanced',
        '.kfx': 'KFX - Kindle Format X',
        '.ibooks': 'iBooks - Apple Format',
        '.cbr': 'CBR - Comic Book RAR',
        '.cbz': 'CBZ - Comic Book ZIP'
    }
    
    return type_mapping.get(ext, f"Unknown format: {ext}")

# Add methods to class
NotebookLMConverterApp.select_input_file = select_input_file

# Demo file type detection
demo_files = [
    "sample.epub", "book.mobi", "novel.azw3", 
    "manual.kfx", "story.ibooks", "comic.cbz"
]

print("🔍 File Type Detection Demo:")
for file in demo_files:
    print(f"  📄 {file} → {detect_file_type(file)}")

print("\n✅ File selection functionality implemented")

In [None]:
# Section 5: Add Output Format Options
def setup_output_options(self):
    """Setup output format options cho NotebookLM"""
    
    # Output formats optimized for NotebookLM
    notebooklm_formats = [
        ("PDF", "PDF", "Portable Document Format - Tối ưu cho NotebookLM, giữ nguyên định dạng"),
        ("TXT", "TXT", "Plain Text - Văn bản thuần túy, dễ phân tích cho AI"),
        ("Markdown", "MD", "Markdown - Định dạng có cấu trúc, tối ưu cho AI processing")
    ]
    
    # Optimization modes for NotebookLM
    optimization_modes = [
        ("standard", "Chuẩn", "Standard conversion"),
        ("notebooklm", "NotebookLM", "Optimized đặc biệt cho NotebookLM"),
        ("ai_ready", "AI-Ready", "Best preparation cho AI systems")
    ]
    
    return notebooklm_formats, optimization_modes

# Demo output format configuration
def demo_format_config():
    """Demo configuration cho output formats"""
    formats, modes = setup_output_options(None)
    
    print("📄 NotebookLM Output Formats:")
    for display_name, value, description in formats:
        print(f"  🔸 {display_name} ({value}): {description}")
    
    print("\n⚙️ Optimization Modes:")
    for value, display_name, description in modes:
        print(f"  🔹 {display_name} ({value}): {description}")
    
    # Extension mapping
    extensions = {"PDF": ".pdf", "TXT": ".txt", "MD": ".md"}
    print(f"\n📁 File Extensions: {extensions}")

# Add to class
NotebookLMConverterApp.setup_output_options = setup_output_options

# Run demo
demo_format_config()
print("\n✅ Output format options configured for NotebookLM")

In [None]:
# Section 6: Create EPUB Extraction Methods
def extract_from_epub(self, epub_path):
    """Trích xuất nội dung từ file EPUB cho NotebookLM"""
    try:
        book = epub.read_epub(epub_path)
        return self.extract_text_content_from_epub(book)
    except Exception as e:
        raise Exception(f"Lỗi đọc file EPUB: {str(e)}")

def extract_text_content_from_epub(self, book):
    """Trích xuất và tối ưu text content từ EPUB"""
    text_content = ""
    
    # Extract metadata
    title = book.get_metadata('DC', 'title')
    creator = book.get_metadata('DC', 'creator')
    
    # Add metadata header for NotebookLM
    if title:
        text_content += f"# {title[0][0]}\n\n"
    if creator:
        text_content += f"**Tác giả:** {creator[0][0]}\n\n"
    
    text_content += "---\n\n"
    
    # Extract content from spine
    for item_id in book.spine:
        item = book.get_item_with_id(item_id[0])
        if item.get_type() == ebooklib.ITEM_DOCUMENT:
            soup = BeautifulSoup(item.get_content(), 'lxml')
            
            # Remove script and style tags
            for script in soup(["script", "style"]):
                script.decompose()
            
            # Clean text for NotebookLM
            text = soup.get_text()
            text = re.sub(r'\n\s*\n', '\n\n', text)  # Remove extra blank lines
            text = re.sub(r' +', ' ', text)  # Remove extra spaces
            text_content += text + "\n\n"
    
    return text_content

# Demo EPUB structure analysis
def demo_epub_structure():
    """Demo EPUB structure analysis"""
    print("📖 EPUB Structure Analysis for NotebookLM:")
    print("  🔸 Metadata extraction: Title, Author, Language")
    print("  🔸 Content extraction: All text from spine items")
    print("  🔸 Text cleaning: Remove scripts, styles, extra whitespace")
    print("  🔸 Structure preservation: Headers, paragraphs")
    print("  🔸 NotebookLM optimization: Clean formatting, proper line breaks")
    
    # Sample metadata structure
    sample_metadata = {
        "title": "Sample Book for NotebookLM",
        "creator": "Author Name", 
        "language": "vi",
        "chapters": 15,
        "estimated_words": 50000
    }
    
    print(f"\n📊 Sample Metadata: {sample_metadata}")

# Add methods to class
NotebookLMConverterApp.extract_from_epub = extract_from_epub
NotebookLMConverterApp.extract_text_content_from_epub = extract_text_content_from_epub

# Run demo
demo_epub_structure()
print("\n✅ EPUB extraction methods implemented for NotebookLM")

In [None]:
# Section 7: Create MOBI/AZW Extraction Methods
def extract_from_mobi(self, mobi_path):
    """Trích xuất nội dung từ MOBI/AZW/AZW3 bằng Calibre"""
    try:
        # Convert MOBI to temp EPUB for processing
        with tempfile.TemporaryDirectory() as temp_dir:
            temp_epub = os.path.join(temp_dir, "temp.epub")
            
            result = subprocess.run(['ebook-convert', mobi_path, temp_epub], 
                                  capture_output=True, text=True, shell=True)
            if result.returncode != 0:
                raise Exception(f"Lỗi chuyển đổi MOBI: {result.stderr}")
            
            return self.extract_from_epub(temp_epub)
    except FileNotFoundError:
        raise Exception("Calibre chưa được cài đặt. Vui lòng cài đặt Calibre để xử lý file MOBI/AZW.")

def extract_from_kfx(self, kfx_path):
    """Trích xuất nội dung từ KFX format (cần Calibre + plugin)"""
    try:
        with tempfile.TemporaryDirectory() as temp_dir:
            temp_epub = os.path.join(temp_dir, "temp.epub")
            
            result = subprocess.run(['ebook-convert', kfx_path, temp_epub], 
                                  capture_output=True, text=True, shell=True)
            if result.returncode != 0:
                raise Exception(f"Lỗi chuyển đổi KFX: {result.stderr}")
            
            return self.extract_from_epub(temp_epub)
    except FileNotFoundError:
        raise Exception("Calibre với plugin KFX chưa được cài đặt.")

def extract_from_ibooks(self, ibooks_path):
    """Trích xuất nội dung từ iBooks (thường là EPUB với extension khác)"""
    try:
        return self.extract_from_epub(ibooks_path)
    except Exception as e:
        raise Exception(f"Lỗi đọc file iBooks: {str(e)}")

# Demo Calibre integration
def demo_calibre_integration():
    """Demo Calibre integration cho Amazon formats"""
    print("📚 Calibre Integration for Amazon Formats:")
    print("  🔸 MOBI → Temp EPUB → Extract text")
    print("  🔸 AZW/AZW3 → Temp EPUB → Extract text") 
    print("  🔸 KFX → Temp EPUB (cần KFX plugin)")
    print("  🔸 iBooks → Direct EPUB processing")
    
    # Sample conversion workflow
    workflow = [
        "1. Detect file format (MOBI/AZW/KFX)",
        "2. Use ebook-convert command",
        "3. Create temporary EPUB file", 
        "4. Extract using EPUB methods",
        "5. Clean up temporary files",
        "6. Optimize for NotebookLM"
    ]
    
    print("\n🔄 Conversion Workflow:")
    for step in workflow:
        print(f"  {step}")
    
    # Calibre check simulation
    print(f"\n🔧 Calibre Check: {'✅ Available' if True else '❌ Not found'}")

# Add methods to class
NotebookLMConverterApp.extract_from_mobi = extract_from_mobi
NotebookLMConverterApp.extract_from_kfx = extract_from_kfx
NotebookLMConverterApp.extract_from_ibooks = extract_from_ibooks

# Run demo
demo_calibre_integration()
print("\n✅ MOBI/AZW/KFX extraction methods implemented")

In [None]:
# Section 8: Create Comic Book Extraction Methods
def extract_from_comic(self, comic_path):
    """Trích xuất nội dung từ comic book archive (CBR/CBZ)"""
    try:
        extracted_text = f"# Comic Book: {os.path.basename(comic_path)}\n\n"
        
        if comic_path.lower().endswith('.cbz'):
            # CBZ is ZIP format
            with zipfile.ZipFile(comic_path, 'r') as zip_ref:
                file_list = sorted([f for f in zip_ref.namelist() 
                                  if f.lower().endswith(('.jpg', '.jpeg', '.png', '.gif', '.bmp'))])
                
                extracted_text += f"**Tổng số trang:** {len(file_list)}\n\n"
                extracted_text += "**Cấu trúc comic:**\n\n"
                
                for i, filename in enumerate(file_list, 1):
                    extracted_text += f"## Trang {i}: {filename}\n\n"
                    
        elif comic_path.lower().endswith('.cbr'):
            # CBR is RAR format
            extracted_text += "**Định dạng:** Comic Book RAR\n\n"
            extracted_text += "Đây là file comic book RAR. Cần công cụ đặc biệt để trích xuất.\n\n"
            extracted_text += "**Gợi ý:** Chuyển đổi thành CBZ để xử lý tốt hơn.\n"
        
        # Add metadata for NotebookLM
        extracted_text += "---\n\n"
        extracted_text += "**Optimized for NotebookLM Analysis**\n\n"
        extracted_text += "Đây là comic book được chuyển đổi thành text để phân tích trong NotebookLM.\n"
        
        return extracted_text
    except Exception as e:
        raise Exception(f"Lỗi đọc comic book: {str(e)}")

# Demo comic book processing
def demo_comic_processing():
    """Demo comic book processing workflow"""
    print("📚 Comic Book Processing for NotebookLM:")
    print("  🔸 CBZ (ZIP): Extract image list and structure")
    print("  🔸 CBR (RAR): Basic info extraction")
    print("  🔸 Metadata: Page count, file structure")
    print("  🔸 NotebookLM format: Structured text output")
    
    # Sample comic structure
    sample_comic = {
        "name": "sample_comic.cbz",
        "pages": 24,
        "format": "CBZ (ZIP)",
        "images": ["page001.jpg", "page002.jpg", "...", "page024.jpg"],
        "size_mb": 45.2
    }
    
    print(f"\n📊 Sample Comic Info: {sample_comic}")
    
    # Sample output structure
    sample_output = """
# Comic Book: sample_comic.cbz

**Tổng số trang:** 24

**Cấu trúc comic:**

## Trang 1: page001.jpg
## Trang 2: page002.jpg
...
## Trang 24: page024.jpg

---

**Optimized for NotebookLM Analysis**

Đây là comic book được chuyển đổi thành text để phân tích trong NotebookLM.
    """
    
    print(f"\n📝 Sample Output Structure:")
    print(sample_output[:200] + "...")

# Add method to class
NotebookLMConverterApp.extract_from_comic = extract_from_comic

# Run demo
demo_comic_processing()
print("\n✅ Comic book extraction methods implemented")

In [None]:
# Section 9: Implement PDF Generation
def create_notebooklm_pdf(self, content, output_path):
    """Tạo PDF tối ưu cho NotebookLM"""
    try:
        # HTML template với CSS tối ưu cho NotebookLM
        html_template = """
        <html>
        <head>
            <meta charset="UTF-8">
            <style>
                body {{
                    font-family: 'Times New Roman', serif;
                    font-size: 12pt;
                    line-height: 1.6;
                    margin: 2cm;
                    color: #333;
                }}
                h1, h2, h3, h4, h5, h6 {{
                    color: #2c3e50;
                    margin-top: 1.5em;
                    margin-bottom: 0.5em;
                }}
                h1 {{ 
                    font-size: 24pt; 
                    border-bottom: 2px solid #3498db;
                    padding-bottom: 0.3em;
                }}
                h2 {{ font-size: 18pt; }}
                h3 {{ font-size: 14pt; }}
                p {{ 
                    margin-bottom: 1em; 
                    text-align: justify; 
                }}
                .notebooklm-header {{
                    background: #f8f9fa;
                    padding: 1em;
                    border-left: 4px solid #007bff;
                    margin-bottom: 2em;
                }}
                .chapter {{ page-break-before: always; }}
            </style>
        </head>
        <body>
            <div class="notebooklm-header">
                <h1>Document Optimized for NotebookLM</h1>
                <p>Generated: {timestamp}</p>
                <p>Optimization: {quality_mode}</p>
            </div>
            <div class="content">
                {formatted_content}
            </div>
        </body>
        </html>
        """
        
        # Format content cho HTML
        formatted_content = self.format_content_for_html(content)
        quality_mode = getattr(self, 'quality_var', 'notebooklm')
        
        # Generate HTML
        html_content = html_template.format(
            formatted_content=formatted_content,
            timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            quality_mode=quality_mode
        )
        
        # Create PDF
        with open(output_path, "w+b") as pdf_file:
            pisa_status = pisa.CreatePDF(html_content, dest=pdf_file, encoding='UTF-8')

        if pisa_status.err:
            raise Exception(f"Lỗi khi tạo PDF: {pisa_status.err}")
            
    except Exception as e:
        raise Exception(f"Lỗi tạo PDF cho NotebookLM: {str(e)}")

def format_content_for_html(self, content):
    """Format content cho HTML output"""
    # Convert basic markdown-like syntax to HTML
    html_content = content.replace('\n\n', '</p><p>')
    html_content = f'<p>{html_content}</p>'
    
    # Convert headers
    html_content = re.sub(r'<p># ([^<]+)</p>', r'<h1>\1</h1>', html_content)
    html_content = re.sub(r'<p>## ([^<]+)</p>', r'<h2>\1</h2>', html_content)
    html_content = re.sub(r'<p>### ([^<]+)</p>', r'<h3>\1</h3>', html_content)
    
    # Convert bold/italic
    html_content = re.sub(r'\*\*([^*]+)\*\*', r'<strong>\1</strong>', html_content)
    html_content = re.sub(r'\*([^*]+)\*', r'<em>\1</em>', html_content)
    
    return html_content

# Demo PDF generation
def demo_pdf_generation():
    """Demo PDF generation cho NotebookLM"""
    print("📄 PDF Generation for NotebookLM:")
    print("  🔸 HTML template với CSS tối ưu")
    print("  🔸 Typography: Times New Roman, 12pt, line-height 1.6")
    print("  🔸 Colors: Professional color scheme")
    print("  🔸 Structure: Headers, paragraphs, justified text")
    print("  🔸 NotebookLM header: Timestamp, optimization info")
    
    # Sample HTML output
    sample_html = """
    <div class="notebooklm-header">
        <h1>Document Optimized for NotebookLM</h1>
        <p>Generated: 2024-01-20 10:30:45</p>
        <p>Optimization: notebooklm</p>
    </div>
    """
    
    print(f"\n🔖 Sample HTML Header:")
    print(sample_html)

# Add methods to class
NotebookLMConverterApp.create_notebooklm_pdf = create_notebooklm_pdf
NotebookLMConverterApp.format_content_for_html = format_content_for_html

# Run demo
demo_pdf_generation()
print("\n✅ PDF generation implemented for NotebookLM")

In [None]:
# Section 10: Implement TXT Generation
def create_notebooklm_txt(self, content, output_path):
    """Tạo TXT tối ưu cho NotebookLM"""
    try:
        # Optimize text cho NotebookLM
        optimized_content = self.optimize_text_for_notebooklm(content)
        
        with open(output_path, 'w', encoding='utf-8') as txt_file:
            txt_file.write(optimized_content)
            
    except Exception as e:
        raise Exception(f"Lỗi tạo TXT cho NotebookLM: {str(e)}")

def optimize_text_for_notebooklm(self, content):
    """Tối ưu hóa text cho NotebookLM"""
    # Clean content
    lines = content.split('\n')
    cleaned_lines = []
    
    for line in lines:
        line = line.strip()
        if line:  # Skip empty lines
            # Remove unnecessary special characters
            line = re.sub(r'[^\w\s\.\,\!\?\:\;\-\(\)\[\]\{\}\"\'\/]', '', line)
            cleaned_lines.append(line)
    
    # Join with proper structure for AI
    optimized_content = '\n\n'.join(cleaned_lines)
    
    # Add NotebookLM header
    quality_mode = getattr(self, 'quality_var', 'notebooklm')
    header = f"""DOCUMENT OPTIMIZED FOR NOTEBOOKLM
Quality Mode: {quality_mode.upper()}
Generated: {self.get_current_timestamp()}
Content Length: {len(optimized_content)} characters
Optimization Features:
- Cleaned special characters
- Proper paragraph structure
- AI-ready formatting
- UTF-8 encoding

---

"""
    
    return header + optimized_content

def get_current_timestamp(self):
    """Get current timestamp"""
    return datetime.now().strftime("%Y-%m-%d %H:%M:%S")

# Demo TXT optimization
def demo_txt_optimization():
    """Demo TXT optimization cho NotebookLM"""
    print("📝 TXT Optimization for NotebookLM:")
    print("  🔸 Clean special characters")
    print("  🔸 Proper paragraph structure")
    print("  🔸 UTF-8 encoding")
    print("  🔸 AI-ready formatting")
    print("  🔸 Metadata header")
    
    # Sample raw text
    sample_raw = """
    This is a sample text with extra   spaces.
    
    
    And some weird characters: @#$%^&*()
    Another paragraph with proper content.
    """
    
    # Sample optimized text
    sample_optimized = """
DOCUMENT OPTIMIZED FOR NOTEBOOKLM
Quality Mode: NOTEBOOKLM
Generated: 2024-01-20 10:30:45
Content Length: 95 characters

---

This is a sample text with extra spaces.

And some weird characters

Another paragraph with proper content.
    """
    
    print(f"\n📖 Raw Text:")
    print(repr(sample_raw))
    
    print(f"\n🔧 Optimized Text:")
    print(sample_optimized)

# Add methods to class
NotebookLMConverterApp.create_notebooklm_txt = create_notebooklm_txt
NotebookLMConverterApp.optimize_text_for_notebooklm = optimize_text_for_notebooklm
NotebookLMConverterApp.get_current_timestamp = get_current_timestamp

# Run demo
demo_txt_optimization()
print("\n✅ TXT generation implemented for NotebookLM")

In [None]:
# Section 11: Implement Markdown Generation
def create_notebooklm_markdown(self, content, output_path):
    """Tạo Markdown tối ưu cho NotebookLM"""
    try:
        # Convert content to structured Markdown
        markdown_content = self.format_content_for_markdown(content)
        
        with open(output_path, 'w', encoding='utf-8') as md_file:
            md_file.write(markdown_content)
            
    except Exception as e:
        raise Exception(f"Lỗi tạo Markdown cho NotebookLM: {str(e)}")

def format_content_for_markdown(self, content):
    """Format content cho Markdown output tối ưu cho NotebookLM"""
    # Add YAML frontmatter for NotebookLM
    quality_mode = getattr(self, 'quality_var', 'notebooklm')
    
    markdown_header = f"""---
title: "Document for NotebookLM Analysis"
format: "Markdown"
optimization: "{quality_mode}"
generated: "{self.get_current_timestamp()}"
converter: "NotebookLM Converter v2.0"
ai_ready: true
encoding: "UTF-8"
---

# Document for NotebookLM Analysis

> **Optimized for Google NotebookLM**  
> This document has been specially formatted for AI analysis and processing.

## Document Information

- **Conversion Date**: {self.get_current_timestamp()}
- **Optimization Mode**: {quality_mode.title()}
- **Format**: Markdown with YAML frontmatter
- **Encoding**: UTF-8

---

"""
    
    # Improve Markdown structure
    lines = content.split('\n')
    formatted_lines = []
    
    for line in lines:
        line = line.strip()
        if line:
            # Detect and format headers
            if line.startswith('#'):
                formatted_lines.append(line)
            elif len(line) > 50 and not line.endswith('.') and not line.startswith('**'):
                # Possible header - convert to h2
                formatted_lines.append(f"## {line}")
            else:
                formatted_lines.append(line)
    
    # Join content
    formatted_content = '\n\n'.join(formatted_lines)
    
    # Add footer
    footer = f"""

---

## NotebookLM Integration Notes

This document is optimized for Google NotebookLM with the following features:

- ✅ **YAML Frontmatter**: Metadata for AI processing
- ✅ **Structured Headers**: Logical content hierarchy  
- ✅ **Clean Formatting**: Consistent markdown syntax
- ✅ **AI-Ready Content**: Optimized for language models
- ✅ **UTF-8 Encoding**: Full Unicode support

### Recommended NotebookLM Workflow

1. **Upload** this markdown file to NotebookLM
2. **Analyze** using AI-powered insights
3. **Generate** summaries and questions
4. **Extract** key information and themes

*Generated by NotebookLM Converter - Optimized for AI Analysis*
"""
    
    return markdown_header + formatted_content + footer

# Demo Markdown generation
def demo_markdown_generation():
    """Demo Markdown generation cho NotebookLM"""
    print("📝 Markdown Generation for NotebookLM:")
    print("  🔸 YAML frontmatter với metadata")
    print("  🔸 Structured headers và content")
    print("  🔸 AI-ready formatting")
    print("  🔸 UTF-8 encoding")
    print("  🔸 NotebookLM integration notes")
    
    # Sample YAML frontmatter
    sample_yaml = """---
title: "Document for NotebookLM Analysis"
format: "Markdown"
optimization: "notebooklm"
generated: "2024-01-20 10:30:45"
converter: "NotebookLM Converter v2.0"
ai_ready: true
encoding: "UTF-8"
---"""
    
    print(f"\n📋 Sample YAML Frontmatter:")
    print(sample_yaml)
    
    # Sample content structure
    content_structure = [
        "# Document for NotebookLM Analysis",
        "## Document Information", 
        "## Main Content",
        "### Chapter 1",
        "### Chapter 2",
        "## NotebookLM Integration Notes"
    ]
    
    print(f"\n🏗️ Content Structure:")
    for item in content_structure:
        print(f"  {item}")

# Add methods to class
NotebookLMConverterApp.create_notebooklm_markdown = create_notebooklm_markdown
NotebookLMConverterApp.format_content_for_markdown = format_content_for_markdown

# Run demo
demo_markdown_generation()
print("\n✅ Markdown generation implemented for NotebookLM")

In [None]:
# Section 12: Add Conversion Threading
def start_conversion_thread(self):
    """Bắt đầu conversion trong thread riêng"""
    if not self.input_path:
        print("❌ No input file selected")
        return False

    # Get output format
    format_choice = self.output_format.get() if hasattr(self.output_format, 'get') else self.output_format
    
    # Extension mapping
    extensions = {"PDF": ".pdf", "TXT": ".txt", "MD": ".md"}
    
    # Simulate file save dialog
    output_path = f"output_notebooklm.{extensions[format_choice][1:]}"
    
    print(f"🔄 Starting conversion to {format_choice}...")
    print(f"📁 Output: {output_path}")
    
    # Start conversion in thread
    conversion_thread = threading.Thread(
        target=self.convert_ebook,
        args=(self.input_path, output_path, format_choice)
    )
    conversion_thread.start()
    
    return True

def convert_ebook(self, input_path, output_path, output_format):
    """Main conversion function"""
    try:
        print(f"📖 Processing: {os.path.basename(input_path)}")
        
        # Detect file type
        file_ext = os.path.splitext(input_path)[1].lower()
        
        # Extract content based on file type
        if file_ext == '.epub':
            book_content = self.extract_from_epub(input_path)
        elif file_ext in ['.mobi', '.azw', '.azw3']:
            book_content = self.extract_from_mobi(input_path)
        elif file_ext == '.kfx':
            book_content = self.extract_from_kfx(input_path)
        elif file_ext == '.ibooks':
            book_content = self.extract_from_ibooks(input_path)
        elif file_ext in ['.cbr', '.cbz']:
            book_content = self.extract_from_comic(input_path)
        else:
            raise Exception(f"Unsupported format: {file_ext}")
        
        print(f"✅ Content extracted: {len(book_content)} characters")
        
        # Convert to output format
        if output_format == "PDF":
            self.create_notebooklm_pdf(book_content, output_path)
        elif output_format == "TXT":
            self.create_notebooklm_txt(book_content, output_path)
        elif output_format == "MD":
            self.create_notebooklm_markdown(book_content, output_path)
        
        print(f"🎉 Conversion successful: {output_path}")
        return True
        
    except Exception as e:
        print(f"❌ Conversion failed: {str(e)}")
        return False

# Demo threading workflow
def demo_conversion_workflow():
    """Demo conversion workflow với threading"""
    print("🔄 Conversion Threading Workflow:")
    print("  1. 🔍 Validate input file")
    print("  2. 🎯 Determine output format") 
    print("  3. 🧵 Start background thread")
    print("  4. 📖 Extract content by file type")
    print("  5. 🔧 Apply NotebookLM optimization")
    print("  6. 💾 Generate output file")
    print("  7. ✅ Report completion status")
    
    # Sample conversion status
    conversion_status = {
        "input": "sample_book.epub",
        "output": "sample_book_notebooklm.pdf", 
        "format": "PDF",
        "optimization": "notebooklm",
        "thread_id": "conversion_thread_001",
        "progress": "100%",
        "status": "completed"
    }
    
    print(f"\n📊 Sample Conversion Status:")
    for key, value in conversion_status.items():
        print(f"  {key}: {value}")

# Add methods to class
NotebookLMConverterApp.start_conversion_thread = start_conversion_thread
NotebookLMConverterApp.convert_ebook = convert_ebook

# Run demo
demo_conversion_workflow()
print("\n✅ Conversion threading implemented")

In [None]:
# Section 13: Create Error Handling
def handle_conversion_error(self, error, context=""):
    """Comprehensive error handling cho conversion process"""
    error_types = {
        "FileNotFoundError": "File không tìm thấy",
        "PermissionError": "Không có quyền truy cập file",
        "UnicodeDecodeError": "Lỗi encoding file",
        "subprocess.CalledProcessError": "Lỗi Calibre conversion",
        "zipfile.BadZipFile": "File ZIP/CBZ bị hỏng",
        "Exception": "Lỗi không xác định"
    }
    
    error_type = type(error).__name__
    error_description = error_types.get(error_type, "Lỗi không xác định")
    
    error_info = {
        "type": error_type,
        "description": error_description,
        "message": str(error),
        "context": context,
        "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    }
    
    return error_info

def validate_input_file(self, file_path):
    """Validate input file before conversion"""
    if not file_path:
        raise ValueError("No file path provided")
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    if not os.path.isfile(file_path):
        raise ValueError(f"Path is not a file: {file_path}")
    
    # Check file size (max 100MB for demo)
    file_size = os.path.getsize(file_path)
    if file_size > 100 * 1024 * 1024:
        raise ValueError(f"File too large: {file_size / (1024*1024):.1f} MB")
    
    # Check file extension
    valid_extensions = ['.epub', '.mobi', '.azw', '.azw3', '.kfx', '.ibooks', '.cbr', '.cbz']
    file_ext = os.path.splitext(file_path)[1].lower()
    
    if file_ext not in valid_extensions:
        raise ValueError(f"Unsupported file format: {file_ext}")
    
    return True

def check_dependencies(self):
    """Check required dependencies"""
    dependencies = {
        "calibre": "ebook-convert --version",
        "python_modules": ["ebooklib", "beautifulsoup4", "xhtml2pdf", "PIL"]
    }
    
    status = {"calibre": False, "python_modules": True}
    
    # Check Calibre
    try:
        result = subprocess.run(['ebook-convert', '--version'], 
                              capture_output=True, text=True, shell=True)
        status["calibre"] = result.returncode == 0
    except:
        status["calibre"] = False
    
    return status

# Demo error handling
def demo_error_handling():
    """Demo comprehensive error handling"""
    print("🚨 Error Handling for NotebookLM Converter:")
    print("  🔸 File validation: existence, size, format")
    print("  🔸 Dependency checking: Calibre, Python modules")
    print("  🔸 Conversion errors: format-specific handling")
    print("  🔸 User feedback: clear error messages")
    
    # Sample error scenarios
    error_scenarios = [
        {"type": "FileNotFoundError", "case": "Missing input file"},
        {"type": "UnicodeDecodeError", "case": "Corrupted EPUB file"},
        {"type": "subprocess.CalledProcessError", "case": "Calibre not found"},
        {"type": "PermissionError", "case": "No write access to output folder"},
        {"type": "zipfile.BadZipFile", "case": "Corrupted CBZ file"}
    ]
    
    print(f"\n🔧 Error Scenarios:")
    for scenario in error_scenarios:
        print(f"  ❌ {scenario['type']}: {scenario['case']}")
    
    # Demo dependency check
    print(f"\n🔍 Dependency Check Demo:")
    print(f"  📦 Calibre: {'✅ Available' if True else '❌ Missing'}")
    print(f"  🐍 Python modules: ✅ Available")
    
    # Sample error info
    sample_error = {
        "type": "FileNotFoundError",
        "description": "File không tìm thấy", 
        "message": "sample_book.epub not found",
        "context": "EPUB extraction",
        "timestamp": "2024-01-20 10:30:45"
    }
    
    print(f"\n📋 Sample Error Info:")
    for key, value in sample_error.items():
        print(f"  {key}: {value}")

# Add methods to class
NotebookLMConverterApp.handle_conversion_error = handle_conversion_error
NotebookLMConverterApp.validate_input_file = validate_input_file
NotebookLMConverterApp.check_dependencies = check_dependencies

# Run demo
demo_error_handling()
print("\n✅ Comprehensive error handling implemented")

In [None]:
# Section 14: Run Application & Complete Demo

def run_notebooklm_converter():
    """Complete NotebookLM Converter Application Runner"""
    try:
        print("🚀 Launching NotebookLM Converter...")
        print("=" * 50)
        
        # Create main window (simulated for demo)
        print("🖼️  Creating GUI window...")
        print("📱 Title: NotebookLM Converter")
        print("📐 Size: 700x500")
        print("🎨 Theme: Professional UI")
        
        # Initialize application
        print("\n🔧 Initializing application...")
        print("📚 Input formats: EPUB, MOBI, AZW/KFX, iBooks, CBR/CBZ")
        print("📄 Output formats: PDF, TXT, Markdown (NotebookLM optimized)")
        print("⚙️ Optimization modes: Standard, NotebookLM, AI-Ready")
        
        # Check dependencies
        print("\n🔍 Checking dependencies...")
        deps = {"ebooklib": True, "beautifulsoup4": True, "xhtml2pdf": True, "calibre": True}
        for dep, status in deps.items():
            print(f"  {'✅' if status else '❌'} {dep}")
        
        print("\n🎯 NotebookLM Converter ready!")
        print("💡 Ready to convert ebooks to NotebookLM-optimized formats")
        
        return True
        
    except Exception as e:
        print(f"❌ Failed to launch: {str(e)}")
        return False

# Complete workflow demonstration
def demo_complete_workflow():
    """Demo complete NotebookLM conversion workflow"""
    print("\n📋 Complete NotebookLM Conversion Workflow Demo:")
    print("=" * 60)
    
    # Step 1: File Selection
    print("\n1️⃣ FILE SELECTION")
    sample_files = [
        {"name": "novel.epub", "size": "2.3 MB", "type": "EPUB"},
        {"name": "textbook.mobi", "size": "5.1 MB", "type": "MOBI"},
        {"name": "comic.cbz", "size": "45.2 MB", "type": "CBZ"}
    ]
    
    for file in sample_files:
        print(f"  📚 {file['name']} ({file['size']}) - {file['type']}")
    
    # Step 2: Format Selection  
    print("\n2️⃣ OUTPUT FORMAT SELECTION")
    formats = [
        {"format": "PDF", "optimization": "Typography + Structure", "best_for": "Direct upload to NotebookLM"},
        {"format": "TXT", "optimization": "Clean text + Metadata", "best_for": "AI processing"},
        {"format": "Markdown", "optimization": "YAML + Structure", "best_for": "Editing + AI analysis"}
    ]
    
    for fmt in formats:
        print(f"  📄 {fmt['format']}: {fmt['optimization']} → {fmt['best_for']}")
    
    # Step 3: Optimization Selection
    print("\n3️⃣ OPTIMIZATION MODE")
    optimizations = [
        {"mode": "Standard", "description": "Basic conversion"},
        {"mode": "NotebookLM", "description": "Optimized for NotebookLM"}, 
        {"mode": "AI-Ready", "description": "Best for AI systems"}
    ]
    
    for opt in optimizations:
        print(f"  ⚙️ {opt['mode']}: {opt['description']}")
    
    # Step 4: Conversion Process
    print("\n4️⃣ CONVERSION PROCESS")
    steps = [
        "🔍 Validate input file",
        "📖 Extract content by format", 
        "🧹 Clean and optimize text",
        "🎨 Apply NotebookLM formatting",
        "💾 Generate output file",
        "✅ Complete with metadata"
    ]
    
    for step in steps:
        print(f"  {step}")
    
    # Step 5: NotebookLM Integration
    print("\n5️⃣ NOTEBOOKLM INTEGRATION")
    integration_tips = [
        "📤 Upload generated file to NotebookLM",
        "🤖 Use AI analysis features",
        "📊 Generate insights and summaries", 
        "❓ Ask questions about content",
        "📝 Create study notes and highlights"
    ]
    
    for tip in integration_tips:
        print(f"  {tip}")

# Performance benchmarks
def demo_performance_benchmarks():
    """Demo performance benchmarks"""
    print("\n⚡ Performance Benchmarks (Estimated):")
    print("=" * 40)
    
    benchmarks = [
        {"format": "EPUB → PDF", "size": "300 pages", "time": "30-60 seconds"},
        {"format": "EPUB → TXT", "size": "300 pages", "time": "10-20 seconds"},
        {"format": "EPUB → Markdown", "size": "300 pages", "time": "15-30 seconds"},
        {"format": "MOBI → PDF", "size": "300 pages", "time": "45-90 seconds"},
        {"format": "CBZ → TXT", "size": "24 pages", "time": "5-10 seconds"}
    ]
    
    for bench in benchmarks:
        print(f"  📊 {bench['format']} ({bench['size']}): {bench['time']}")

# Run complete demo
print("🎉 NOTEBOOKLM CONVERTER - COMPLETE DEMO")
print("=" * 60)

# Launch application
if run_notebooklm_converter():
    # Demo workflow
    demo_complete_workflow()
    
    # Performance info
    demo_performance_benchmarks()
    
    print(f"\n🎯 READY FOR NOTEBOOKLM!")
    print("✨ Convert any ebook to NotebookLM-optimized format")
    print("🚀 Upload to NotebookLM and start AI-powered analysis")
    
    print(f"\n📞 Next Steps:")
    print("  1. Run: python main.py")
    print("  2. Select your ebook file") 
    print("  3. Choose output format")
    print("  4. Convert for NotebookLM")
    print("  5. Upload and analyze!")

print("\n✅ NotebookLM Converter Demo Complete!")