# ============ Podcast2Podcast ============

This notebook demonstrates a fully-automated pipeline to produce a podcast summary podcast from any podcast!

## 📄 Instructions


1.   Fill out the configuration and run the cell. This will provide an episode selection.
2.   Select your episode
3.   Run the remaining cells. This will take awhile, especially to get higher-quality results from TortoiseTTS.
4.   Listen to the results on the last cell of this notebook

In [None]:
#@title ⚙️ Configuration

#@markdown **RSS URL:** If you don't know the RSS feed URL, Google `"<your-podcast> apple podcasts"` to find the iTunes page. Copy the iTunes page URL into [https://getrssfeed.com/](https://getrssfeed.com/). Then, right-click on "RSS feed" and select "Copy Link Address."
podcast_rss_feed_url = "https://feeds.captivate.fm/gradient-dissent/" #@param ["https://talkpython.fm/episodes/rss", "https://feeds.captivate.fm/gradient-dissent/", "http://rss.acast.com/mydadwroteaporno", "https://lexfridman.com/feed/podcast/"] {allow-input: true}

#@markdown **OpenAI API Token:** Go to [https://beta.openai.com/account/api-keys](https://beta.openai.com/account/api-keys)
openai_token = "" #@param {type:"string"}

#@markdown **TortoiseTTS quality setting :**
tortoise_settings = "high_quality" #@param ["ultra_fast", "fast", "standard", "high_quality"]

#@markdown **Trim Audio (seconds)** Only listen to the first `listen_up_to` seconds on the podcast before starting to summarize. If `0`, listen to the whole thing.
listen_up_to = 300 #@param {type:"number"}

#@markdown **Save output** The pipeline takes a long time and it is a good idea to save the output to Google Drive in case the colab interface gets disconnected.
save_to_google_drive = True #@param {type:"boolean"}
gdrive_export_fp = "/gdrive/MyDrive/podcast_summary.mp3" #@param {type:"string"}

if save_to_google_drive:
    from google.colab import drive
    drive.mount('/gdrive')

!pip install --quiet untangle

import os
import requests
import ipywidgets as widgets
from IPython.display import display
import pandas as pd
import untangle

os.environ["DYNACONF_OPENAI_TOKEN"] = openai_token

resp = requests.get(podcast_rss_feed_url)
resp.raise_for_status()

try:
    xml = untangle.parse(resp.content.decode())
except Exception as e:
    raise ValueError(
        f"Could not parse {podcast_rss_feed_url}. Is this an RSS feed?"
    )

episodes = xml.rss.channel.item
episodes_titles = [e.title.cdata for e in episodes]
episodes_urls = [e.enclosure.get_attribute("url") for e in episodes]

assert len(episodes_titles) == len(episodes_urls)

episode_title_selection = widgets.Select(
    options=episodes_titles,
    value=episodes_titles[0],
    rows=15,
    description="Episode: ",
)

episode_title_selection

In [None]:
#@title 👷‍♂️ Install Dependencies
!(  pip install loguru \
 && pip install -q git+https://github.com/jeremyadamsfisher/podcast2podcast \
 && git clone https://github.com/jnordberg/tortoise-tts.git \
 && cd tortoise-tts \
 && pip install --quiet -r requirements.txt \
 && python setup.py install \
 && pip uninstall -y ffmpeg ffmpeg-python \
 && pip install ffmpeg-python \
 && pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116 \
 && pip install protobuf==3.19.0 ) \
  > /content/podcast2podcast.installs.log 2>&1
%cd /content/tortoise-tts/

In [None]:
#@title ⚡️ Run it!
%%time

episode_title = episode_title_selection.value
episode_url = episodes_urls[episodes_titles.index(episode_title)]
podcast_title = xml.rss.channel.title.cdata

print("="*60)
print("="*60)
print(f"We'll be summarizing the following podcast:")
print(f"Podcast Title: {podcast_title}")
print(f"Episode Title: {episode_title}")
print(f"Episode URL:   {episode_url}")
print("="*60)
print("="*60)

from podcast2podcast import pipeline

audio = pipeline(
    episode_url,
    podcast_title,
    episode_title,
    duration=None if listen_up_to == 0 else listen_up_to * 1000,
    tts_method="tortoise",
    whisper_model_size="small",
)

if save_to_google_drive:
    audio.export(gdrive_export_fp, format="mp3")

audio