# eBook to Audio project

### install libs

In [12]:
pip install gTTS pydub ebooklib bs4 edge-tts

Collecting edge-tts
  Downloading edge_tts-7.0.2-py3-none-any.whl.metadata (5.5 kB)
Collecting aiohttp<4.0.0,>=3.8.0 (from edge-tts)
  Downloading aiohttp-3.12.13-cp311-cp311-win_amd64.whl.metadata (7.9 kB)
Collecting srt<4.0.0,>=3.4.1 (from edge-tts)
  Downloading srt-3.5.3.tar.gz (28 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Collecting tabulate<1.0.0,>=0.4.4 (from edge-tts)
  Downloading tabulate-0.9.0-py3-none-any.whl.metadata (34 kB)
Collecting aiohappyeyeballs>=2.5.0 (from aiohttp<4.0.0,>=3.8.0->edge-tts)
  Downloading aiohappyeyeballs-2.6.1-py3-none-any.whl.metadata (5.9 kB)
Collecting aiosignal>=1.1.2 (from aiohttp<4.0.0,>=3.8.0->edge-tts)
  Downloading aiosignal-1.4.0-py3-none-any.whl.metada

## extract chapters

In [3]:
import re
from ebooklib import epub, ITEM_DOCUMENT
from bs4 import BeautifulSoup

def clean_text_for_tts(text):
    # 1. Decode escaped characters like \n, \'
    text = text.encode("utf-8").decode("unicode_escape")

    # 2. Ensure spacing after sentence-ending punctuation (before any letter or quote)
    text = re.sub(r'([.?!])(?=["A-Za-z])', r'\1 ', text)

    # 3. Add longer pause for sentence ends
    text = re.sub(r'\.\s+', '. ... ', text)
    text = re.sub(r'\?\s+', '? ... ', text)
    text = re.sub(r'!\s+', '! ... ', text)

    # 4. Fix smashed dialogue punctuation (e.g., ‘.”Word’ → ‘. ”Word’)
    text = re.sub(r'([.?!])(")', r'\1 \2', text)

    # 5. Add pause after newlines
    text = re.sub(r'\n+', '\n... \n', text)

    # 6. Remove leftover backslashes
    text = text.replace("\\", "")

    return text.strip()

def extract_actual_chapters(epub_path):
    book = epub.read_epub(epub_path)
    chapters = []

    for idx, item in enumerate(book.get_items(), start=1):
        if item.get_type() != ITEM_DOCUMENT:
            continue

        soup = BeautifulSoup(item.get_content(), 'html.parser')
        text = soup.get_text().strip()

        if idx == 1:
            continue  # Skip full dump
        if idx == 2 and "Chapter 701" in text:
            parts = re.split(r"(Chapter\s+\d+[:\-]?\s+)", text)
            for i in range(1, len(parts) - 1, 2):
                title = parts[i].strip()
                body = parts[i + 1].strip()
                if len(body) > 30:
                    full = f"{title}\n\n{body}"
                    cleaned = clean_text_for_tts(full)
                    chapters.append(cleaned)
            continue

        match = re.search(r"(Chapter\s+\d+[:\-]?\s+.*)", text, re.IGNORECASE)
        if match:
            chapter_start = match.start()
            chapter_text = text[chapter_start:].strip()
            if len(chapter_text) > 50:
                cleaned = clean_text_for_tts(chapter_text)
                chapters.append(cleaned)

    return chapters


## Save Audio

In [6]:
import os
import edge_tts
import asyncio

async def save_chapters_to_m4a(chapters, output_dir="chapters_m4a", max_chapters=10, start_index=0, voice="en-US-AriaNeural"):
    os.makedirs(output_dir, exist_ok=True)

    end_index = start_index + max_chapters
    chapters_to_save = chapters[start_index:end_index]

    print(f"\n🚀 Starting conversion from Chapter {start_index + 1} to Chapter {min(end_index, len(chapters))}")
    print(f"📚 Total Chapters to Save: {len(chapters_to_save)}\n")

    for i, chapter in enumerate(chapters_to_save, start=start_index + 1):
        try:
            chapter_title_line = next((line for line in chapter.splitlines() if line.lower().startswith("chapter")), f"Chapter_{i}")
            chapter_num = chapter_title_line.split()[1].strip(":")
            file_path = os.path.join(output_dir, f"chapter_{chapter_num}.m4a")

            print(f"🔊 [Chapter {i}] Preparing: {chapter_title_line}")
            print(f"📄 Characters: {len(chapter)}")

            communicate = edge_tts.Communicate(text=chapter, voice=voice)
            await communicate.save(file_path)

            file_size_kb = os.path.getsize(file_path) // 1024
            print(f"✅ Saved: {file_path} ({file_size_kb} KB)\n")

        except Exception as e:
            print(f"❌ Error in Chapter {i}: {e}\n")

In [9]:
chapters = extract_actual_chapters("audio_books/9kafe.com-my-vampire-system-c701-1400.epub")
print(chapters[2])  # Should print Chapter 703 with body

await save_chapters_to_m4a(chapters, max_chapters=5, start_index=36)

Chapter 702: Moving on
... 
When leaving the Parasites to their own devices, there was the chance that they could use this time to run away. ... To escape the planet, however, where would they even go? ... Who would they run off to? ... Wevil, had promised Quinn that he would follow him from now onwards, and to leave everything to him and Hana. ... Even though Tony was the vice leader, the faction members actually had the most respect for these two. ... Whenever something had gone wrong, they would be the ones that tried to reason with Mantis, to give them part of the antidote earlier than needed. ... While Tony would be the one who was Mantis's yes men. ... Saying Yes to every suggestion and everything he did. ... "Alright everyone, it's time for you all to decide! ... " Wevil shouted from on top of the platform. ... The faction members were recovering and they had gained enough strength to move again. ... But they were unaware of the bombshell that Wevil was about to drop on them all