<a href="https://colab.research.google.com/github/itsokaypiyush/Text-to-speech-/blob/main/PDF2PODCAST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install required libraries (Uncomment if running in Colab or local)
# !pip install pymupdf transformers requests pydub
# !apt-get install ffmpeg

from google.colab import files
import fitz  # PyMuPDF
import requests
from transformers import pipeline
import IPython.display as ipd
from pydub import AudioSegment
import os

# STEP 1: Upload PDF
print("Upload your PDF file:")
uploaded = files.upload()
filename = list(uploaded.keys())[0]

# STEP 2: Extract text from PDF
doc = fitz.open(filename)
print("Extracting text from PDF...")
text = ""
for page in doc:
    text += page.get_text()
print(f"Extracted {len(text)} characters of text.")

# STEP 3: Initialize summarizer pipeline
print("Loading summarization model...")
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

# Helper function: chunk text for summarization
def chunk_text(text, max_len=1000):
    paragraphs = text.split('\n')
    chunks = []
    current_chunk = ""
    for para in paragraphs:
        if len(current_chunk) + len(para) < max_len:
            current_chunk += para + "\n"
        else:
            if current_chunk:
                chunks.append(current_chunk.strip())
            current_chunk = para + "\n"
    if current_chunk:
        chunks.append(current_chunk.strip())
    return chunks

# STEP 4: Summarize the text in chunks
print("Splitting text into chunks for summarization...")
chunks = chunk_text(text, max_len=1000)
print(f"Total chunks to summarize: {len(chunks)}")

print("Summarizing chunks...")
final_summary = ""
for i, chunk in enumerate(chunks):
    print(f"Summarizing chunk {i+1}/{len(chunks)}...")
    summary = summarizer(chunk, max_length=130, min_length=30, do_sample=False)
    final_summary += summary[0]['summary_text'] + " "

print("\nSummary ready.")

# STEP 5: Split summary into two parts for two voices
half = len(final_summary) // 2
text1 = final_summary[:half].strip()
text2 = final_summary[half:].strip()

print("Split summary into two parts for two voices.")

# STEP 6: ElevenLabs API details (API Key placeholder)
api_key = "YOUR_API_KEY_HERE"  # <-- Insert your ElevenLabs API key here

# Your custom voice IDs (replace with your actual IDs)
voice_id_1 = "YOUR_VOICE_ID_1"  # Example: "cgSgspJ2msm6clMCkdW9"
voice_id_2 = "YOUR_VOICE_ID_2"  # Example: "IKne3meq5aSn9XLyUdCD"

# Function to convert text to speech using ElevenLabs API
def text_to_speech(text, voice_id, filename):
    url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
    headers = {
        "xi-api-key": api_key,
        "Content-Type": "application/json"
    }
    data = {
        "text": text,
        "voice_settings": {
            "stability": 0.4,
            "similarity_boost": 0.75
        }
    }
    print(f"Generating speech for voice_id: {voice_id} ...")
    try:
        response = requests.post(url, headers=headers, json=data)
        response.raise_for_status()
        with open(filename, "wb") as f:
            f.write(response.content)
        print(f"Saved audio to {filename}")
    except requests.exceptions.RequestException as e:
        print(f"Error generating speech: {e}")

# STEP 7: Generate audio files for both voices
text_to_speech(text1, voice_id_1, "voice1.mp3")
text_to_speech(text2, voice_id_2, "voice2.mp3")

# STEP 8: Merge audio files sequentially with a short pause
print("Merging audio files...")
audio1 = AudioSegment.from_file("voice1.mp3")
audio2 = AudioSegment.from_file("voice2.mp3")
pause = AudioSegment.silent(duration=700)  # 0.7 seconds pause

final_audio = audio1 + pause + audio2
final_audio.export("final_podcast.mp3", format="mp3")
print("Final podcast saved as 'final_podcast.mp3'.")

# STEP 9: Play the final podcast audio
ipd.display(ipd.Audio("final_podcast.mp3"))
