# Downloading Youtube Video using Python

In [None]:
!pip install pytubefix
!pip install pydub

In [None]:
from pytubefix import YouTube
from pytubefix.cli import on_progress
import os
from pydub import AudioSegment

# Paths
SAVE_PATH = "/content/videos"  # Ensure this path exists
AUDIO_PATH = "/content/audio"   # Ensure this path exists

# Link of the video to be downloaded
link = "https://www.youtube.com/watch?v=1aA1WGON49E"

# Create directories if they don't exist
os.makedirs(SAVE_PATH, exist_ok=True)
os.makedirs(AUDIO_PATH, exist_ok=True)

# Download the video
yt = YouTube(link, on_progress_callback=on_progress)
video_title = yt.title

print(f"Downloading: {video_title}")

# Save the video
video_stream = yt.streams.get_highest_resolution()
video_stream.download(output_path=SAVE_PATH)

# Extract audio and save it
audio_stream = yt.streams.get_audio_only()
audio_file_path = audio_stream.download(output_path=AUDIO_PATH)

# Convert to MP3
mp3_file_path = os.path.join(AUDIO_PATH, f"{video_title}.mp3")

# Load the audio file
audio = AudioSegment.from_file(audio_file_path)

# Export as MP3
audio.export(mp3_file_path, format="mp3")

# Optionally, remove the original audio file
os.remove(audio_file_path)

print(f"Audio saved as MP3: {mp3_file_path}")

Downloading: A one minute TEDx Talk for the digital age | Woody Roseland | TEDxMileHigh
Audio saved as MP3: /content/audio/A one minute TEDx Talk for the digital age | Woody Roseland | TEDxMileHigh.mp3




# Converting Audio into Text


In [None]:
!pip install deepgram-sdk

Collecting deepgram-sdk
  Downloading deepgram_sdk-3.7.4-py3-none-any.whl.metadata (13 kB)
Collecting httpx>=0.25.2 (from deepgram-sdk)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting websockets>=12.0 (from deepgram-sdk)
  Downloading websockets-13.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)
Collecting dataclasses-json>=0.6.3 (from deepgram-sdk)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting aiofiles>=23.2.1 (from deepgram-sdk)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting aenum>=3.1.0 (from deepgram-sdk)
  Downloading aenum-3.1.15-py3-none-any.whl.metadata (3.7 kB)
Collecting deprecation>=2.1.0 (from deepgram-sdk)
  Downloading deprecation-2.1.0-py2.py3-none-any.whl.metadata (4.6 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json>=0.6.3->deepgram-sdk)
  Downloading marshmallow-3.23.0-py3-none-any.whl.metadata (

In [None]:
import os
from google.colab import userdata

from deepgram import (
    DeepgramClient,
    PrerecordedOptions,
    FileSource,
)

# Path to the audio file
AUDIO_FILE = "/content/audio/A one minute TEDx Talk for the digital age | Woody Roseland | TEDxMileHigh.mp3"

API_KEY = userdata.get('Deepgram_ApiKey')


def main():
    try:
        # STEP 1 Create a Deepgram client using the API key
        deepgram = DeepgramClient(API_KEY)

        with open(AUDIO_FILE, "rb") as file:
            buffer_data = file.read()

        payload: FileSource = {
            "buffer": buffer_data,
        }

        #STEP 2: Configure Deepgram options for audio analysis
        options = PrerecordedOptions(
            model="nova-2",
            smart_format=True,
        )

        # STEP 3: Call the transcribe_file method with the text payload and options
        response = deepgram.listen.prerecorded.v("1").transcribe_file(payload, options)
        transcription = response['results']['channels'][0]['alternatives'][0]['transcript']

        # STEP 4: Print the response
        return transcription
        # print(response.to_json(indent=4))

    except Exception as e:
        print(f"Exception: {e}")


text = main()

  response = deepgram.listen.prerecorded.v("1").transcribe_file(payload, options)


# Converting Text back to Audio

In [None]:
!pip install elevenlabs

In [None]:
from elevenlabs import ElevenLabs, VoiceSettings
from google.colab import userdata

client = ElevenLabs(
    api_key = userdata.get('ElevenLabs_ApiKey'),
)
response = client.text_to_speech.convert(
    voice_id="pMsXgVXv3BLzUgSXRplE",
    optimize_streaming_latency="0",
    output_format="mp3_22050_32",
    text=text,
    voice_settings=VoiceSettings(
        stability=0.1,
        similarity_boost=0.3,
        style=0.2,
    ),
)

# Create the directory if it doesn't exist
output_directory = '/content/generated_audio'
os.makedirs(output_directory, exist_ok=True)

# Save the audio file in the specified directory
output_file_path = os.path.join(output_directory, "output_audio.mp3")
with open(output_file_path, "wb") as audio_file:
    for chunk in response:
        audio_file.write(chunk)

# Comparing Both Audios

In [None]:
from IPython.display import Audio
print("Original Audio")
Audio(mp3_file_path)

Original Audio


In [None]:
print("Generated Audio")
Audio(output_file_path)

Generated Audio
