# eBook to Audio project

### install libs

In [5]:
# pip install gTTS pydub ebooklib bs4 edge-tts

## Variables to select chapters

### Quick note, Remove <b><u>token.json</u></b> file before starting script

In [None]:
# Variables
# start index = 700 + start_index (701 => 1, 800 => 100, 900 => 200, etc.)
# Save chapters starting from Chapter 701 (index 70) to 50 chapters
# 930 done!
next_chapter = 601
number_of_chapters = 100

## extract chapters

In [7]:
import re
from ebooklib import epub, ITEM_DOCUMENT
from bs4 import BeautifulSoup

def clean_text_for_tts(text):
    # 1. Decode escaped characters like \n, \'
    text = text.encode("utf-8").decode("unicode_escape")

    # 2. Ensure spacing after sentence-ending punctuation (before any letter or quote)
    text = re.sub(r'([.?!])(?=["A-Za-z])', r'\1 ', text)

    # 3. Add longer pause for sentence ends
    text = re.sub(r'\.\s+', '. ... ', text)
    text = re.sub(r'\?\s+', '? ... ', text)
    text = re.sub(r'!\s+', '! ... ', text)

    # 4. Fix smashed dialogue punctuation (e.g., ‘.”Word’ → ‘. ”Word’)
    text = re.sub(r'([.?!])(")', r'\1 \2', text)

    # 5. Add pause after newlines
    text = re.sub(r'\n+', '\n... \n', text)

    # 6. Remove leftover backslashes
    text = text.replace("\\", "")

    return text.strip()

def extract_actual_chapters(epub_path):
    book = epub.read_epub(epub_path)
    chapters = []

    # Updated regex pattern to allow decimals and flexible separators
    chapter_pattern = r"(Chapter\s+\d+(?:\.\d+)?\s*[:\-]?\s+.*)"

    for idx, item in enumerate(book.get_items(), start=1):
        if item.get_type() != ITEM_DOCUMENT:
            continue

        soup = BeautifulSoup(item.get_content(), 'html.parser')
        text = soup.get_text().strip()

        if idx == 1:
            continue  # Skip full dump

        match = re.search(chapter_pattern, text, re.IGNORECASE)
        if match:
            chapter_start = match.start()
            chapter_text = text[chapter_start:].strip()
            if len(chapter_text) > 50:
                cleaned = clean_text_for_tts(chapter_text)
                chapters.append(cleaned)

    return chapters


## Save Audio

In [None]:
import os
import edge_tts
import asyncio
import textwrap

async def synthesize_chunks(chapter_text, voice, base_filename, output_dir):
    CHUNK_SIZE = 9000  # safe limit for edge-tts
    chunks = textwrap.wrap(chapter_text, CHUNK_SIZE, break_long_words=False, replace_whitespace=False)

    print(f"🧩 Chapter split into {len(chunks)} chunk(s)\n")

    for i, chunk in enumerate(chunks):
        # 👇 Use base_filename.m4a if only 1 chunk; else add _partN
        if len(chunks) == 1:
            file_path = os.path.join(output_dir, f"{base_filename}.m4a")
        else:
            file_path = os.path.join(output_dir, f"{base_filename}_part{i+1}.m4a")

        try:
            print(f"🎤 Synthesizing chunk {i+1}/{len(chunks)}: {len(chunk)} characters")
            communicate = edge_tts.Communicate(text=chunk, voice=voice)
            await communicate.save(file_path)
            await asyncio.sleep(0.5)

            if not os.path.exists(file_path) or os.path.getsize(file_path) < 1024:
                raise Exception(f"Incomplete chunk saved: {file_path}")

            print(f"✅ Saved: {file_path} ({os.path.getsize(file_path) // 1024} KB)\n")
        
        except Exception as e:
            print(f"❌ Error saving chunk {i+1}: {e}\n")
    print("\n\n")

async def save_chapters_to_m4a(chapters, output_dir="chapters_m4a", max_chapters=10, start_index=0, voice="en-US-JennyNeural"):
    os.makedirs(output_dir, exist_ok=True)

    end_index = start_index + max_chapters
    chapters_to_save = chapters[start_index:end_index]

    print(f"\n🚀 Starting conversion from Chapter {start_index + 1} to Chapter {min(end_index, len(chapters))}")
    print(f"📚 Total Chapters to Save: {len(chapters_to_save)}\n")

    for i, chapter in enumerate(chapters_to_save, start=start_index + 1):
        try:
            chapter_title_line = next((line for line in chapter.splitlines() if line.lower().startswith("chapter")), f"Chapter_{i}")
            chapter_num = chapter_title_line.split()[1].strip(":")
            base_filename = f"chapter_{chapter_num}"

            print(f"🔊 [Chapter {i}] Preparing: {chapter_title_line}")
            print(f"📄 Characters: {len(chapter)}")

            print("Chapter Text Preview: ", chapter)

            await synthesize_chunks(chapter, voice, base_filename, output_dir)

        except Exception as e:
            print(f"❌ Error in Chapter {i}: {e}\n")


In [9]:
chapters = extract_actual_chapters("audio_books/9kafe.com-my-vampire-system-c701-1400.epub")
# print(chapters[2])  # Should print Chapter 702 with body

#start index = 700 + start_index (701 => 1, 800 => 100, 900 => 200, etc.)
# Save chapters starting from Chapter 701 (index 70) to 50 chapters
#930 done!
# for x in range(number_of_chapters):
#     print(chapters[next_chapter + x])  # Print each chapter to be saved
    
await save_chapters_to_m4a(chapters, max_chapters=number_of_chapters, start_index=next_chapter)


🚀 Starting conversion from Chapter 522 to Chapter 601
📚 Total Chapters to Save: 80

🔊 [Chapter 522] Preparing: Chapter 1220: Vincentâs truth
📄 Characters: 9281
Chapter Text Preview:  Chapter 1220: Vincentâs truth
... 
Listening to the story, Vincent could only imagine the horror those on the Cursed ship had been through. ... At first, they were forced to go against their fellow humans, the strongest ability users that were known to exist. ... After feeling like they had pulled through, the Dalki had arrived. ... Although, judging from the story, the Dalki had allowed them a bit of rest, but only for a moment. ... Then when they had finally crashed as their only option. ... They thought that they could head to safety at the Shelter, that everything was over, only to enter a den full of Dalki. ... "It was my fault. ... " Sam continued to say. ... "I was the one that led them to this Shelter, not thinking about it properly. ... Perhaps we could have tried to find a better place on th

In [10]:
import os
import shutil
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload
from google.auth.transport.requests import Request

# If modifying scopes, delete the file token.json.
SCOPES = ['https://www.googleapis.com/auth/drive.file']

def upload_file_to_drive(filepath, filename=None, folder_id=None):
    creds = None
    if os.path.exists('token.json'):
        creds = Credentials.from_authorized_user_file('token.json', SCOPES)

    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                'credentials.json', SCOPES)
            creds = flow.run_local_server(port=0)
        with open('token.json', 'w') as token:
            token.write(creds.to_json())

    service = build('drive', 'v3', credentials=creds)

    file_metadata = {
        'name': filename or os.path.basename(filepath),
        'parents': [folder_id] if folder_id else []
    }

    media = MediaFileUpload(filepath, resumable=True)

    try:
        file = service.files().create(
            body=file_metadata,
            media_body=media,
            fields='id'
        ).execute()
        print(f"Uploaded file ID: {file.get('id')}")
    finally:
        # Explicitly close the file handle
        if media._fd:
            media._fd.close()


def upload_all_files_in_folder(local_folder_path, drive_folder_id):
    done_folder_path = os.path.join(local_folder_path, 'done')
    os.makedirs(done_folder_path, exist_ok=True)  # Create 'done' folder if it doesn't exist

    for filename in os.listdir(local_folder_path):
        file_path = os.path.join(local_folder_path, filename)

        # Skip if it's a directory (including 'done' folder)
        if not os.path.isfile(file_path):
            continue

        try:
            print(f"Uploading {filename}...")
            upload_file_to_drive(file_path, filename=filename, folder_id=drive_folder_id)
            
            # Move file to done/
            shutil.move(file_path, os.path.join(done_folder_path, filename))
            print(f"Moved {filename} to 'done/' folder.\n")
        except Exception as e:
            print(f"Error uploading {filename}: {e}")


upload_all_files_in_folder('chapters_m4a', '1wk9tIDqagqaGUAo-rhcFkllJZAmOkr7K')

Uploading chapter_1220_part1.m4a...
Uploaded file ID: 10yx3xznm6PdTLeoCA3w-3_qFVmD5Yb9B
Moved chapter_1220_part1.m4a to 'done/' folder.

Uploading chapter_1220_part2.m4a...
Uploaded file ID: 1pCOqJ_tLbunI7lyMMWwM0cgkHwhwZGFV
Moved chapter_1220_part2.m4a to 'done/' folder.

Uploading chapter_1221_part1.m4a...
Uploaded file ID: 1KSoOp8rRX0LB3mWougHEZoUd7D0_3Mpk
Moved chapter_1221_part1.m4a to 'done/' folder.

Uploading chapter_1221_part2.m4a...
Uploaded file ID: 1BXIrc7-mZLlxJ1otgHirQoQ0Joet_Zv9
Moved chapter_1221_part2.m4a to 'done/' folder.

Uploading chapter_1222_part1.m4a...
Uploaded file ID: 1pYOeh01HSckzU5H-JtoUEyqSpdYZnWtH
Moved chapter_1222_part1.m4a to 'done/' folder.

Uploading chapter_1222_part2.m4a...
Uploaded file ID: 11vi_gP20enDNNm7BteFxJfAw2_cr-GHV
Moved chapter_1222_part2.m4a to 'done/' folder.

Uploading chapter_1223_part1.m4a...
Uploaded file ID: 1blHw7NekkUJFDqzKcf2LgAYepaYZsSYo
Moved chapter_1223_part1.m4a to 'done/' folder.

Uploading chapter_1223_part2.m4a...
Uploa