# **Youtube Audio Fetching**

This Notebook is a quick step to fetch all YouTube audios from a predetermined list.

Though this extremely threaded measure of downloading audio seems to make Google mad and will potentially deny connection.

In [None]:
#@markdown # Save data in Google Drive 💾
#@markdown Enter a Google Drive path and run this cell if you want to store the results inside Google Drive.

# Uncomment to copy generated images to drive, faster than downloading directly from colab in my experience.
from google.colab import drive
from pathlib import Path
from IPython.display import display, Markdown, YouTubeVideo
from concurrent.futures import ThreadPoolExecutor
! pip install pytube
from pytube import YouTube

drive_mount_path = Path("/") / "content" / "drive"
drive.mount(str(drive_mount_path))
#drive_mount_path /= "My Drive"
#@markdown ---
drive_path = "MyDrive/GDC_Audio" #@param {type:"string"}
#@markdown ---
#@markdown **Run this cell again if you change your Google Drive path.**

drive_whisper_path = drive_mount_path / Path(drive_path.lstrip("/"))
drive_whisper_path.mkdir(parents=True, exist_ok=True)

In [None]:
#@markdown # **Video selection from predetermined list** 📺

#@markdown Enter the path of video id list

ListPath = "MyDrive/GDC_Whisper/video_to_download.txt" #@param {type:"string"}

def download_audio(video_id, output_folder):
  link = f"https://www.youtube.com/watch?v={video_id}"
  print(f"Downloading audio from {link}...")

  try:
      yt = YouTube(link)
      audio_stream = yt.streams.filter(only_audio=True).first()
      output_file = audio_stream.download(output_path=output_folder, filename=video_id+".mp3")
  except Exception as e:
      print(f"Error downloading audio from {link}: {e}")

video_path = drive_mount_path / Path(ListPath.lstrip("/"))
if not video_path.is_file():
  display(Markdown(f"**{str(video_path)} does not exist.**"))
else:
  video_path_local = Path(".").resolve() / (video_path.name)
  display(Markdown(f"**{str(video_path)} selected to download.**"))
  with open(video_path, "r") as file:
    video_ids = file.readlines()
  video_id_parsed = [video_id.strip() for video_id in video_ids]
  with ThreadPoolExecutor() as executor:
    executor.map(lambda video_id: download_audio(video_id, drive_whisper_path), video_id_parsed)