#### Dependencies

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import librosa
import time
import multiprocessing
from multiprocessing.shared_memory import SharedMemory
from dotenv import load_dotenv
import tqdm

from essentia.standard import (
    MonoLoader,
    Danceability,
    Spectrum,
    FrameCutter,
    Loudness,
    RhythmExtractor2013,
    KeyExtractor,
    Energy,
    TonalExtractor,
    Inharmonicity,
    MFCC,
    OnsetRate,
    SpectralCentroidTime,
    DynamicComplexity,
    SpectralPeaks,
    NoveltyCurve,
    Spectrum,
    FrameGenerator,
    Windowing,
    MelBands,
    BeatsLoudness,
    Beatogram,
    Meter,
    HumDetector,
)

2024-10-24 21:44:29.426041: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


#### Global Constants

In [2]:
load_dotenv()
DOWNLOAD_FOLDER = os.getenv('DOWNLOAD_FOLDER')
CPU_THREADS = multiprocessing.cpu_count()

#### Data

In [3]:
songs_data = pd.read_csv('data/songs_final.csv')

#### Feature Extraction Functions

In [4]:
def create_spectrogram_image(spectrogram_db, sample_rate):
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(spectrogram_db, sr=sample_rate, x_axis='time', y_axis='mel', fmax=11025)
    plt.colorbar(format='%+2.0f dB')
    plt.title(f"Mel-Spectrogram")
    plt.tight_layout()
    plt.show()
    plt.close()

In [5]:
def mp3_to_spectrogram(audio_path, sample_rate, create_image=False):
    mp3, _ = librosa.load(audio_path, sr=sample_rate)
    spectrogram = librosa.feature.melspectrogram(y=mp3, sr=sample_rate, n_mels=128, fmax=11025)
    spectrogram_db = librosa.power_to_db(spectrogram, ref=np.max)

    if create_image:
        create_spectrogram_image(spectrogram_db, sample_rate)

    return spectrogram_db

In [6]:
def get_mel_bands(audio):
    spectrum = Spectrum()
    frame_generator = FrameGenerator(audio, frameSize=2048, hopSize=1024)
    window = Windowing(type='hann')

    mel_bands = MelBands(numberBands=40)
    mel_band_energies = []

    for frame in frame_generator:
        spec = spectrum(window(frame))
        mel_band_energies.append(mel_bands(spec))

    mel_band_energies = np.array(mel_band_energies)
    return mel_band_energies

In [7]:
def run_essentia_algorithms(audio44k, audio16k):
    _, mfcc_coeffs = MFCC(inputSize=len(audio16k))(audio16k)
    danceability_score = Danceability()(audio44k)
    loudness_score = Loudness()(audio16k)
    bpm, beat_positions, _, _, _ = RhythmExtractor2013(method="multifeature")(audio44k)
    key, scale, _ = KeyExtractor()(audio44k)
    energy_score = Energy()(audio16k)

    ### Chord Significances
    _, _, _, _, chords, _, _, _, _, _, _, _ = TonalExtractor()(audio44k)
    unique_chords, counts = np.unique(chords, return_counts=True)
    chords_significance = {chord: significance for (chord, significance) in zip(unique_chords, counts)}

    ### Inharmonicity
    frames = []
    frameCutter = FrameCutter()
    while True:
        frame = frameCutter(audio44k)
        if not len(frame):
            break
        frames.append(frame)
        
    spectrum_magnitudes = []
    for frame in frames:
        spectrum_magnitudes_frame = Spectrum()(frame)
        spectrum_magnitudes.append(spectrum_magnitudes_frame)
    spectrum_magnitudes = np.array(spectrum_magnitudes).flatten()
    
    frequencies, magnitudes = SpectralPeaks()(audio44k)
    hnr_score = None
    if frequencies[0]: 
        hnr_score = Inharmonicity()(frequencies, magnitudes)
    ###
    
    onset_rate_score = OnsetRate()(audio44k)
    brightness_score = SpectralCentroidTime()(audio44k)
    dynamic_complexity_score, _ = DynamicComplexity()(audio16k)
    
    mel_bands = get_mel_bands(audio44k)
    novelty_curve = NoveltyCurve()(mel_bands)
    novelty_score = np.median(np.abs(np.diff(novelty_curve)))
    
    beats_loudness, beats_loudness_band_ratio = BeatsLoudness(beats=beat_positions)(audio44k)
    beatogram = Beatogram()(beats_loudness, beats_loudness_band_ratio)
    time_signature = Meter()(beatogram)
    
    #_, _, saliences, hum_starts, hum_ends = HumDetector()(audio44k)
    #hum_intervals = [(hum_start, hum_end, salience) for hum_start, hum_end, salience in zip(hum_starts, hum_ends, saliences)]
    
    features = {
        'Danceability': danceability_score[0],
        'Loudness': loudness_score,
        'BPM': bpm,
        'Key': key,
        'Key Scale': scale,
        'Energy': energy_score,
        'Chords Significance': chords_significance,
        'Inharmonicity': hnr_score,
        'Timbre': np.mean(mfcc_coeffs),
        'Onset Rate': onset_rate_score[1],
        'Brightness': brightness_score,
        'Dynamic Complexity': dynamic_complexity_score,
        'Novelty': novelty_score,
        'Time Signature': time_signature,
        #'Hum Intervals': hum_intervals
    }
    return features

In [8]:
def extract_audio_features(audio_file):
    # Load the audio file
    audio44k = MonoLoader(filename=audio_file)()
    audio16k = MonoLoader(filename=audio_file, sampleRate=16000)()

    # Run algorithms
    startTime = time.perf_counter()
    algorithm_features = run_essentia_algorithms(audio44k, audio16k)
    print(f"Algos: {time.perf_counter() - startTime}")
    
    startTime = time.perf_counter()
    spectrogram = mp3_to_spectrogram(audio_file, 22050)
    print(f"Spectro: {time.perf_counter() - startTime}")

    # Merge results
    features = algorithm_features | {'Spectrogram': spectrogram}
    return features

#### Main Code

In [9]:
# Class constructed from song path
# Song path must follow this format: /some/path/(int)^(video id)^(title).mp3
#                               e.g  /some/path/0^LlWGt_84jpg^Special Breed.mp3
class SongPath:
    def __init__(self, song_path: str):
        self.path = song_path
        self.filename = os.path.basename(song_path)

        song_filename_split = self.filename.split('^')
        if len(song_filename_split) != 3:
            raise Exception("The song's filename doesn't follow the correct format: /some/path/(int)^(video id)^(title).mp3")
        
        self.index, self.video_id, self.title_with_extension = song_filename_split

        self.index = int(self.index)
        self.title = os.path.splitext(self.title_with_extension)[0]

    def __str__(self):
        return f"Idx: {self.index},  videoID: {self.video_id}, title: {self.title_with_extension}"

In [10]:
def process_song(args):
    #startTime = time.perf_counter()
    song_path, shared_songs_data, lock, process_times = args
    song = SongPath(song_path)

    # Extract song features
    song_features = extract_audio_features(song.path)

    # Lock shared data and add results
    with lock:
        # Add features to shared_songs_data
        songs_data_df = shared_songs_data['data']
        for feature, value in song_features.items():
            if feature not in songs_data_df.columns and isinstance(value, (tuple, set, list, np.ndarray, dict)):
                songs_data_df[feature] = np.nan
                songs_data_df[feature] = songs_data_df[feature].astype(object)
            songs_data_df.at[song.index, feature] = value
        shared_songs_data['data'] = songs_data_df

        # Add time to process_times
        #end_time = time.perf_counter() - startTime
        #idx = 0
        #while idx < len(process_times) and process_times[idx] != 0:
        #    idx += 1
        #process_times[idx] = end_time

In [23]:
def process_songs():
    manager = multiprocessing.Manager()
    lock = manager.Lock()

    songs_data_full = songs_data.copy(deep=True)
    shared_songs_data = manager.dict({'data': songs_data_full})
    
    songs_data_lower, songs_data_higher = [0, len(songs_data)//2]

    #process_times = manager.Array('d', [0]*songs_count)
    process_times = None
    song_paths = [os.path.join(DOWNLOAD_FOLDER, song_filename) for song_filename in os.listdir(DOWNLOAD_FOLDER)]

    args = [(song_path, shared_songs_data, lock, process_times) for song_path in song_paths]
    args = args[songs_data_lower:songs_data_higher]

    with multiprocessing.Pool(CPU_THREADS) as pool:
        pool.map(process_song, args)

    return shared_songs_data['data'], process_times

In [24]:
songs_data_full, process_times = process_songs()
songs_data_full

50522


TypeError: cannot unpack non-iterable NoneType object

In [17]:
#df = songs_data_full[songs_data_full['Danceability'].notna()]
df2 = songs_data_full[songs_data_full['Danceability'].notna()]

In [18]:
df2

Unnamed: 0,title,artist,views,videoID,duration,Danceability,Loudness,BPM,Key,Key Scale,Energy,Chords Significance,Inharmonicity,Timbre,Onset Rate,Brightness,Dynamic Complexity,Novelty,Time Signature,Spectrogram
0,Special Breed,PolyCulture,34,LlWGt_84jpg,331,1.365855,5662.711914,128.000244,F,minor,399511.15625,"{'A': 1364, 'Ab': 44, 'Am': 1214, 'C': 364, 'C...",,-19.498604,4.38148,1235.493164,4.430043,176.946777,4.0,"[[-80.0, -59.25853, -42.22901, -38.483425, -34..."
10000,Unmelted Snow,,5,q_GejATTMXE,182,0.91771,1031.092896,109.975029,Bb,minor,31438.314453,"{'Ab': 176, 'Bb': 155, 'Bbm': 834, 'C#': 662, ...",,-20.458689,1.075405,854.641968,2.354395,2.455002,4.0,"[[-80.0, -80.0, -80.0, -77.90633, -73.52021, -..."
100000,Gris,J Balvin,11657594,5XkHKg2Vr0M,177,1.656936,3486.126953,92.989853,F#,minor,193677.546875,"{'A': 277, 'Ab': 32, 'Abm': 57, 'Am': 13, 'B':...",0.244598,-16.540567,4.217937,1639.488037,3.102225,4375.3125,4.0,"[[-80.0, -80.0, -80.0, -80.0, -80.0, -80.0, -8..."
100001,La Cumbia Tribalera (feat. La Trakalosa & Viol...,El Pelon del Mikrophone,11657063,M_A58j6A-hg,218,1.347578,3015.59375,133.968338,F,minor,155988.890625,"{'A': 44, 'Ab': 141, 'Abm': 17, 'Am': 45, 'B':...",0.313198,-18.797073,5.190122,2116.46167,3.617415,4546.5,2.0,"[[-50.703056, -53.052444, -69.95146, -78.82106..."
100002,Midnight Blues,"Snowy White, The White Flames",11654806,4DU4iaqsi_M,513,0.981207,2156.395752,132.20697,E,minor,94561.421875,"{'A': 648, 'Abm': 3, 'Am': 534, 'Bb': 18, 'Bbm...",0.114499,-50.442608,2.717391,1216.583374,7.383959,10.602936,3.0,"[[-80.0, -80.0, -80.0, -80.0, -80.0, -80.0, -8..."
100003,Bad Things,Cults,11654361,X6-aOcG-wys,219,1.058506,3092.887939,116.518448,E,major,161993.90625,"{'A': 1357, 'Ab': 93, 'Abm': 147, 'Am': 121, '...",0.287478,-16.01387,2.237372,1341.236328,3.325788,1149.412109,16.0,"[[-80.0, -80.0, -80.0, -80.0, -80.0, -80.0, -7..."
100004,The High Road (Official HD Video),Broken Bells,11651617,gWBG1j_flrg,232,1.211902,3067.97168,79.828369,G,major,160049.96875,"{'A': 10, 'Am': 465, 'Bm': 3, 'C': 736, 'C#m':...",0.067172,-20.075367,3.455128,1329.336182,3.053781,520.966064,16.0,"[[-80.0, -80.0, -80.0, -80.0, -80.0, -80.0, -8..."
100005,Brave Shine,Aimer,11650967,hvVN2i6o4A4,233,0.980451,3772.988281,112.866844,Ab,major,217939.703125,"{'A': 2, 'Ab': 1543, 'Abm': 114, 'Am': 74, 'B'...",0.044984,-16.555666,3.242063,1778.145752,2.286354,1139.421875,16.0,"[[-80.0, -80.0, -80.0, -80.0, -80.0, -80.0, -8..."
100006,Pepas (Tiësto Remix),"Farruko, Tiësto",11646247,yBSH4NocOwM,223,1.449566,5603.924316,129.968521,A,minor,393336.65625,"{'A': 330, 'Ab': 25, 'Am': 1206, 'B': 52, 'Bb'...",0.249297,-9.036757,3.97073,1246.279419,2.665078,6428.171875,2.0,"[[-31.806156, -23.298433, -16.804958, -10.9806..."
100007,Cat & Dog,TOMORROW X TOGETHER,11645878,J7XQgqQmRME,188,1.122777,4656.84082,161.91066,Bb,major,298375.5,"{'A': 269, 'Ab': 7, 'Am': 884, 'Bb': 872, 'Bbm...",0.060236,-15.260021,4.126181,1782.347534,2.657992,19021.71875,8.0,"[[-80.0, -80.0, -80.0, -80.0, -80.0, -80.0, -8..."
