Installing and importing dependencies

In [None]:
!pip install spleeter
!pip install yt_dlp
!pip install pydub

In [None]:
import spleeter
from __future__ import unicode_literals
import yt_dlp as youtube_dl
from pydub import AudioSegment
from pydub.silence import split_on_silence
import csv
from pathlib import Path
from termcolor import colored
import os

Function to adjust characteristics of audio

In [None]:
def convert_audio(audio_file):
    """
    Corrects the channels, sample rate, and sample width of the audios.
    Replaces the original audio file with the one generated.
    """
    sound = AudioSegment.from_file(audio_file)
    sound = sound.set_frame_rate(16000)
    sound = sound.set_channels(1)
    sound = sound.set_sample_width(2) # 2 corresponds to 16-bit sample width in Pydub
    sound.export(audio_file, format ="wav")

Some necessary variables

In [None]:
ydl_opts = {
    "format": "bestaudio/best",
    "audio-format": "wav",
    "outtmpl": "audio.wav",
    "noplaylist" : True
} # customizing the downloaded audio for our needs
link_num = 1 # iterates over the links in the TXT file
links_file = "/content/links.txt" # File containing links to YouTube videos

Looping over links and generating audio chunks

In [None]:
with open(links_file) as fp:
   for link in fp:
        print("\nStarting processing for link number ", link_num)

        # Step 1: Extract and download the audio
        try:
            with youtube_dl.YoutubeDL(ydl_opts) as ydl:
                ydl.download([link])
        except Exception as e:
            print(colored("Link number {} cannot be downloaded. Exception: {}".format(link_num, e), 'red'))
            continue # continue with the next link in the file

        # Step 2: Separate voice from the audio
        !spleeter separate -p spleeter:2stems -o output "/content/audio.wav"

        # Step 3: Adjust the sampling rate, sample width, and channels
        convert_audio("/content/output/audio/vocals.wav")

        # Step 4: Split into smaller parts
        sound_file = AudioSegment.from_wav("/content/output/audio/vocals.wav")
        audio_chunks = split_on_silence(sound_file,
            # must be silent for at least half a second
            min_silence_len=500,

            # consider it silent if quieter than -16 dBFS
            silence_thresh=-50
        )
        print ("exporting files for link number: ", link_num)
        os.mkdir(str(link_num)) # making folder named after link number we are processing
        for i, chunk in enumerate(audio_chunks):
            out_file = "{0}/{0}_{1}.wav".format(link_num, i)
            chunk.export(out_file, format="wav")

        link_num += 1

        # deleting the redundant files gnerated for previous link to save space
        !rm -rf /content/output
        !rm /content/audio.wav


Starting processing for link number  1
[youtube] Extracting URL: https://youtu.be/sxLAc9mdoqE?si=Wn8ZnPI25N6HY-8K
[youtube] sxLAc9mdoqE: Downloading webpage
[youtube] sxLAc9mdoqE: Downloading ios player API JSON
[youtube] sxLAc9mdoqE: Downloading web creator player API JSON
[youtube] sxLAc9mdoqE: Downloading player 5f8f5b0f
[youtube] sxLAc9mdoqE: Downloading m3u8 information
[info] sxLAc9mdoqE: Downloading 1 format(s): 251
[download] Destination: audio.wav
[download] 100% of    5.84MiB in 00:00:01 at 5.52MiB/s   
INFO:spleeter:Downloading model archive https://github.com/deezer/spleeter/releases/download/v1.4.0/2stems.tar.gz
INFO:spleeter:Validating archive checksum
INFO:spleeter:Extracting downloaded 2stems archive
INFO:spleeter:2stems model file(s) extracted
INFO:spleeter:File output/audio/vocals.wav written succesfully
INFO:spleeter:File output/audio/accompaniment.wav written succesfully
exporting files for link number:  1
