## Overview

- The main purpose of this file is to be able to receive a URL for a youTube video and download the video using the yt_dlp library. 
- Once downloaded, the video file can be converted into a .wav files using the moviepy library and then sliced into 30 second audio clips, which can be stored in an array to be converted into mel-spectrograms. These m_spects will then passed into our trained model, which will give us a genre prediction.
- Once the process has been completed, we delete the video and audio files to preserve space. 


In [1]:
import yt_dlp
import librosa
from moviepy.editor import VideoFileClip
import tempfile
import os
import numpy as np

In [2]:
class InvalidUrlError(Exception):
    pass

In [3]:
def download_video(url: str, temp_dir_name) -> dict:
    """Receives a youTube url as a parameter, saves the video as 'audiofile.mp4'
    and return a dictionary that stores data related to the downloaded file.
    """
    if "youtube.com" not in url:
        raise InvalidUrlError
    # result stores a dict with information related to the file.

    ydl_opts = {
        "format": "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best",
        "merge_output_format": "mp4",
        "outtmpl": f"{temp_dir_name}/%(id)s.%(ext)s",
        "noplaylist": True,
    }
    ydl = yt_dlp.YoutubeDL(ydl_opts)

    meta = ydl.extract_info(url, download=True)
    video_file_path = os.path.join(temp_dir_name, f"{meta['id']}.mp4")

    return meta, video_file_path

In [4]:
def convert_video_to_wav(video_file_path, temp_dir_name:str) -> None:
    """ Receives a filename of a video file, isolates the audio portion, and saves said audio
    as a .wav file."""
    video = VideoFileClip(video_file_path)
    audio = video.audio
    wav_path = os.path.join(temp_dir_name, 'output.wav')
    audio.write_audiofile(wav_path)
    return wav_path

In [5]:
def slice_audio(input_audio_file: str) -> list:
    """Receives a .wav audio file name and a duration length. Slices the received audio
    file in a number of  .wav files equal to the section_duration * the sample rate and
    stores them in an array."""
    window = 0.06
    overlap = 0.3

    audio, sr = librosa.load(input_audio_file)  # Load the .wav audio
    audio_shape = audio.shape[0]
    chunk = int(audio_shape * window)
    offset = int(chunk * (1 - overlap))
    individual_split_song = []

    for i in range(0, audio_shape - chunk + offset, offset):
        individual_split_song.append(audio[i : i + chunk])

    return individual_split_song, chunk, sr

In [6]:
def create_spectrograms(individual_split_song, chunk, sr, max_length=78):
    split_spects_mel_db = []
    for sample in individual_split_song:
        if sample.shape[0] != chunk:
            continue
        mel_spec = librosa.feature.melspectrogram(
            y=sample, sr=sr, n_fft=2048, hop_length=512, n_mels=128
        )
        mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)

        # Pad outputs to ensure uniformity
        if mel_spec_db.shape[1] < max_length:
            padding = max_length - mel_spec_db.shape[1]
            mel_spec_db = np.pad(
                mel_spec_db, pad_width=((0, 0), (0, padding)), mode="constant"
            )
        else:
            mel_spec_db = mel_spec_db[:, :max_length]

        # split_spects_mel_only.append(mel_spec)
        split_spects_mel_db.append(mel_spec_db)

    return split_spects_mel_db

In [7]:
def process_youtube_link(url: str) -> list:
    """Runs necessary functions to fully process a received youTube URL."""
    
    with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as temp_dir_name:
        meta, video_file_path = download_video(url, temp_dir_name)
        wav_path = convert_video_to_wav(video_file_path, temp_dir_name)
        individual_split_song, chunk, sr = slice_audio(wav_path)
        split_specs_mel_db = create_spectrograms(individual_split_song, chunk, sr)
    return np.array(split_specs_mel_db)
        
        

In [8]:
process_youtube_link("https://www.youtube.com/watch?v=DEJxtwrAhsc&ab_channel=TheDude")

[youtube] Extracting URL: https://www.youtube.com/watch?v=DEJxtwrAhsc&ab_channel=TheDude
[youtube] DEJxtwrAhsc: Downloading webpage
[youtube] DEJxtwrAhsc: Downloading ios player API JSON
[youtube] DEJxtwrAhsc: Downloading android player API JSON
[youtube] DEJxtwrAhsc: Downloading m3u8 information
[info] DEJxtwrAhsc: Downloading 1 format(s): 614+140
[hlsnative] Downloading m3u8 manifest
[hlsnative] Total fragments: 57
[download] Destination: /var/folders/yn/1ryx5r916kbdyq0ly4_ddqsh0000gn/T/tmprg5doziv/DEJxtwrAhsc.f614.mp4
[download] 100% of   11.12MiB in 00:00:04 at 2.74MiB/s                   
[download] Destination: /var/folders/yn/1ryx5r916kbdyq0ly4_ddqsh0000gn/T/tmprg5doziv/DEJxtwrAhsc.f140.m4a
[download] 100% of    4.68MiB in 00:00:00 at 5.48MiB/s   
[Merger] Merging formats into "/var/folders/yn/1ryx5r916kbdyq0ly4_ddqsh0000gn/T/tmprg5doziv/DEJxtwrAhsc.mp4"
Deleting original file /var/folders/yn/1ryx5r916kbdyq0ly4_ddqsh0000gn/T/tmprg5doziv/DEJxtwrAhsc.f614.mp4 (pass -k to keep)
Del

                                                                      

MoviePy - Done.
