In [1]:
!pip install torch==2.0.0
!pip install streamlit==1.24.1
!pip install pydub==0.25.1
!pip install PyPDF2==3.0.1
!pip install numpy==1.25.1
!pip install urllib3
!pip install requests



In [None]:
import requests

model_url = 'https://models.silero.ai/models/tts/en/v3_en.pt'
model_file = 'v3_en.pt'

# Download the model file
response = requests.get(model_url)
with open(model_file, 'wb') as file:
    file.write(response.content)

print('Model downloaded successfully!')

Model downloaded successfully!


In [3]:
import os
import torch
import wave
import numpy as np
from PyPDF2 import PdfReader
from pydub import AudioSegment
from urllib.request import urlretrieve
import logging
from IPython.display import display, FileLink, clear_output
import ipywidgets as widgets
from tqdm import tqdm

logging.basicConfig(filename='app.log', filemode='w', format='%(name)s - %(levelname)s - %(message)s')
logging.getLogger().setLevel(logging.INFO)

# Title and Instructions
print("# 📚 PDF to Audiobook Converter")
print("""
Upload a PDF, select a speaker, then click Convert to Audiobook.
(Please note the bigger the book the longer the process will take)
""")

# Available speakers
speakers = [f'en_{i}' for i in range(1, 118)]  # Update with your available speakers

# Select a speaker
speaker_dropdown = widgets.Dropdown(options=speakers, description='Select a Speaker:')
display(speaker_dropdown)

# Wait for the user to select a speaker
while speaker_dropdown.value is None:
    pass

speaker = speaker_dropdown.value

# File uploader
upload_widget = widgets.FileUpload(accept='.pdf', multiple=False)
download_button = widgets.Button(description='Download Audiobook')
book_name = None

def convert_to_audiobook(btn):
    global book_name
    if upload_widget.value:
        # Save the uploaded file
        uploaded_file = upload_widget.value[next(iter(upload_widget.value))]
        uploaded_file_path = os.path.join(os.getcwd(), uploaded_file['metadata']['name'])
        with open(uploaded_file_path, 'wb') as file:
            file.write(uploaded_file['content'])

        # Get the filename without extension
        book_name = os.path.splitext(uploaded_file_path)[0]

        def generate_audio_chunk(chunk, speaker, sample_rate, model):
            try:
                audio_paths = model.save_wav(text=chunk, speaker=speaker, sample_rate=sample_rate)
                chunk_audio = AudioSegment.from_wav(audio_paths)
                return chunk_audio
            except Exception as e:
                clear_output()
                print('Please wait while we generate the audio file')
                logging.error("Exception occurred", exc_info=True)
                # If the chunk is too long, halve it and try again
                if "too long" in str(e) and len(chunk) > 1:
                    return generate_audio_chunk(chunk[:len(chunk) // 2], speaker, sample_rate, model)
                return None

        def text_to_audio_book(uploaded_file, speaker):
            device = torch.device('cpu')
            torch.set_num_threads(torch.get_num_threads())
            local_file = 'v3_en.pt'

            if not os.path.isfile(local_file):
                url = 'https://models.silero.ai/models/tts/en/v3_en.pt'
                urlretrieve(url, local_file)

            model = torch.package.PackageImporter(local_file).load_pickle("tts_models", "model")
            model.to(device)

            sample_rate = 48000

            pdf_reader = PdfReader(uploaded_file)
            num_pages = len(pdf_reader.pages)

            audiobook_file = f"{book_name}_audiobook.wav"

            wavef = wave.open(audiobook_file, 'w')
            wavef.setnchannels(1)  # mono
            wavef.setsampwidth(2)
            wavef.setframerate(sample_rate)

            with tqdm(total=num_pages, desc="Converting", unit="page") as pbar:
                for page_num in range(num_pages):
                    text = pdf_reader.pages[page_num].extract_text().replace('\n', ' ')

                    if text == '':
                        logging.warning(f"⚠️ No text found on page {page_num}, skipping this page.")
                        continue

                    chunks = [text[i:i+1000] for i in range(0, len(text), 1000)]
                    for chunk in chunks:
                        while True:
                            chunk_audio = generate_audio_chunk(chunk, speaker, sample_rate, model)
                            if chunk_audio is not None:
                                break
                            # Retry with a smaller chunk size
                            chunk = chunk[:len(chunk) // 2]

                        wavef.writeframesraw(np.array(chunk_audio.get_array_of_samples()))

                    logging.info(f"✔️ Audio for page {page_num} saved.")
                    pbar.update(1)

            wavef.close()

            logging.info("Audiobook generation completed.")

        text_to_audio_book(uploaded_file_path, speaker)
        print(f'Successfully converted {book_name} to Audiobook. 🎉')

        # Display the download button after successful conversion
        display(download_button)

def download_audiobook(btn):
    # Provide download link
    audiobook_file = f"{book_name}_audiobook.wav"
    download_link = FileLink(audiobook_file)
    display(download_link)

upload_button = widgets.Button(description='Convert to Audiobook')
upload_button.on_click(convert_to_audiobook)

download_button.on_click(download_audiobook)

display(upload_widget, upload_button)


Please wait while we generate the audio file
