In [None]:
import requests
import json
import os
import re
from markdown_pdf import MarkdownPdf

OLLAMA_URL = "http://localhost:11434/api/generate"
MODEL_NAME = "llama3.2:latest"


OUTPUT_DIR = "book_markdown"
subject = "Independence of Bangladesh"
author = "hissain.khan@gmail.com"

os.makedirs(OUTPUT_DIR, exist_ok=True)


def generate_toc(subject):
    """Generate a Table of Contents for the given subject."""
    prompt = f"""
    You are an expert assistant skilled at creating detailed book outlines.
    Given a subject, your task is to create a comprehensive Table of Contents for a book on the given subject.
    
    Each chapter should have sections, and when relevant, sections should have subsubsections. 
    Structure the Table of Contents hierarchically to ensure maximum detail and granularity.
    Response json elements must be consistent with comma seperated array element. 
    
    Now generate a detailed Table of Contents for the book with Subject: {subject}.
    
    Structure the response as a valid JSON object:
    {{
        "Table_of_Contents": [
            {{
                "Title": "Chapter 1: Title of Chapter",
                "Sections": [
                    {{
                        "Title": "Section 1.1: Title of Section",
                        "Subsections": [
                            "Subsubsection 1.1.1: Title of Subsubsection",
                            "Subsubsection 1.1.2: Title of Subsubsection"
                        ]
                    }},
                    {{
                        "Title": "Section 1.2: Title of Section",
                        "Subsections": []
                    }}
                ]
            }},
            {{
                "Title": "Chapter 2: Title of Chapter",
                "Sections": []
            }}
        ]
    }}
    """
    payload = {"model": MODEL_NAME, "prompt": prompt, "stream": False}
    response = requests.post(OLLAMA_URL, json=payload)
    
    if response.status_code != 200:
        raise Exception(f"Error from Ollama: {response.text}")
    
    content = response.json().get("response", "")
    # Extract JSON using regex
    match = re.search(r"\{.*\}", content, re.DOTALL)
    if match:
        json_content = match.group(0)
        try:
            return json.loads(json_content)
        except json.JSONDecodeError as e:
            raise Exception(f"Failed to parse JSON: {e}")
    else:
        raise Exception(f"No JSON object found in response: {content}")

def generate_section(title):
    """Generate content for a given chapter, section, or subsubsection title."""
    prompt = f"""
    You are a knowledgeable assistant tasked with writing detailed content for a book.
    Please write content for the following title:
    Title: '{title}'
    
    The content should be comprehensive, well-structured, and professional.
    Include examples, subtopics, and technical insights where applicable. 
    Expand on subtopics wherever possible to provide a deep understanding of the subject.
    """
    payload = {"model": MODEL_NAME, "prompt": prompt, "stream": False}
    response = requests.post(OLLAMA_URL, json=payload)
    
    if response.status_code != 200:
        raise Exception(f"Error from Ollama: {response.text}")
    
    return response.json().get("response", "")

def save_content_to_file(filename, content):
    """Save generated content to a file."""
    with open(os.path.join(OUTPUT_DIR, filename), "w") as f:
        f.write(content)


if __name__ == "__main__":
    print(f"Generating Table of Contents for subject: {subject}")
    
    #toc = generate_toc(subject)
    print("Table of Contents generated successfully.")
    #save_content_to_file(f"toc.txt", json.dumps(toc, indent=4))
    
    markdown_files = []
    
    for chapter in toc["Table_of_Contents"][:1]:
        chapter_title = chapter["Title"]
        print(f"Generating content for {chapter_title}...")
        chapter_content = generate_section(subject + ":" + chapter_title)
        chapter_file = f"{chapter_title.replace(' ', '_')}.md"
        save_content_to_file(chapter_file, chapter_content)
        markdown_files.append(os.path.join(OUTPUT_DIR, chapter_file))
        
        for section in chapter["Sections"][:1]:
            section_title = section["Title"]
            print(f"Generating content for {section_title}...")
            section_content = generate_section(subject + ":" + section_title)
            section_file = f"{section_title.replace(' ', '_')}.md"
            save_content_to_file(section_file, section_content)
            markdown_files.append(os.path.join(OUTPUT_DIR, section_file))
            
            for subsubsection_title in section["Subsections"][:1]:
                print(f"Generating content for {subsubsection_title}...")
                subsubsection_content = generate_section(subject + ":" + subsubsection_title)
                subsubsection_file = f"{subsubsection_title.replace(' ', '_')}.md"
                save_content_to_file(subsubsection_file, subsubsection_content)
                markdown_files.append(os.path.join(OUTPUT_DIR, subsubsection_file))
    
    # Step 3: Create PDF
    print("Merging content into a PDF book...")
    pdf_handler = MarkdownPDFHandler(output_dir=OUTPUT_DIR)
    pdf_handler.convert_markdown_to_pdf(
        markdown_files=markdown_files,
        output_file=f"{OUTPUT_DIR}/{subject}.pdf"
    )
    print(f"PDF book generated successfully at location: {OUTPUT_DIR}/{subject}.pdf")

In [None]:
import os
from markdown_pdf import MarkdownPdf, Section

class MarkdownPDFHandler:
    """Handles the conversion of Markdown content to PDF using markdown_pdf library."""
    def __init__(self, output_dir):
        self.output_dir = output_dir

    def create_pdf_with_pre_tasks(toc, subject, author, output_file, markdown_files):
        pdf = MarkdownPdf(toc_level=2)  # Adjust TOC levels as needed
        pdf.meta["title"] = subject
        pdf.meta["author"] = author

        for file in markdown_files:
            with open(file, 'r', encoding='utf-8') as f:
                markdown_content = f.read()
                pdf.add_section(Section(markdown_content))  # Add content to the PDF

        pdf.save(output_file)
        print(f"PDF successfully created at {output_file}")
        
        
if __name__ == "__main__":
    # Sample data
    toc = {
        "Table_of_Contents": [
            {
                "Title": "Chapter 1: Birth of Bangladesh",
                "Sections": [
                    {
                        "Title": "Section 1.1: Political Background",
                        "Subsections": [
                            "Subsubsection 1.1.1: Colonial Era",
                            "Subsubsection 1.1.2: Language Movement"
                        ]
                    },
                    {
                        "Title": "Section 1.2: Liberation War",
                        "Subsections": [
                            "Subsubsection 1.2.1: Operation Searchlight",
                            "Subsubsection 1.2.2: International Support"
                        ]
                    }
                ]
            },
            {
                "Title": "Chapter 2: Post-Independence Challenges",
                "Sections": [
                    {
                        "Title": "Section 2.1: Economic Reconstruction",
                        "Subsections": [
                            "Subsubsection 2.1.1: Agricultural Reforms",
                            "Subsubsection 2.1.2: Industrial Growth"
                        ]
                    }
                ]
            }
        ]
    }

    # Ensure output directory exists
    os.makedirs(OUTPUT_DIR, exist_ok=True)

    # Generate and save Markdown content
    for chapter in toc["Table_of_Contents"]:
        chapter_file = f"{chapter['Title'].replace(' ', '_')}.md"
        chapter_path = os.path.join(OUTPUT_DIR, chapter_file)
        chapter_content = f"# {chapter['Title']}\n\nDetailed content about {chapter['Title']}.\n"
        with open(chapter_path, "w") as f:
            f.write(chapter_content)

        for section in chapter["Sections"]:
            section_file = f"{section['Title'].replace(' ', '_')}.md"
            section_path = os.path.join(OUTPUT_DIR, section_file)
            section_content = f"## {section['Title']}\n\nDetailed content about {section['Title']}.\n"
            with open(section_path, "w") as f:
                f.write(section_content)

            for subsubsection in section["Subsections"]:
                subsubsection_file = f"{subsubsection.replace(' ', '_')}.md"
                subsubsection_path = os.path.join(OUTPUT_DIR, subsubsection_file)
                subsubsection_content = f"### {subsubsection}\n\nDetailed content about {subsubsection}.\n"
                with open(subsubsection_path, "w") as f:
                    f.write(subsubsection_content)


# Generate PDF
pdf_handler = MarkdownPDFHandler(output_dir=OUTPUT_DIR)
pdf_handler.create_pdf_with_pre_tasks(
    toc=toc,
    subject=subject,
    author=author,
    output_file=f"{OUTPUT_DIR}/{subject}.pdf"
)


In [None]:
from markdown_pdf import MarkdownPdf, Section

def create_pdf_with_pre_tasks(toc, subject, author, output_file, markdown_files):
    pdf = MarkdownPdf(toc_level=2)  # Adjust TOC levels as needed
    pdf.meta["title"] = subject
    pdf.meta["author"] = author

    for file in markdown_files:
        with open(file, 'r', encoding='utf-8') as f:
            markdown_content = f.read()
            pdf.add_section(Section(markdown_content))  # Add content to the PDF

    pdf.save(output_file)
    print(f"PDF successfully created at {output_file}")


In [None]:
toc