#### Dependencies

In [370]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import librosa
import time
import multiprocessing
from multiprocessing.shared_memory import SharedMemory
from dotenv import load_dotenv
import tqdm

from essentia.standard import (
    MonoLoader,
    Danceability,
    Spectrum,
    FrameCutter,
    Loudness,
    RhythmExtractor2013,
    KeyExtractor,
    Energy,
    TonalExtractor,
    Inharmonicity,
    MFCC,
    OnsetRate,
    SpectralCentroidTime,
    DynamicComplexity,
    SpectralPeaks,
    NoveltyCurve,
    Spectrum,
    FrameGenerator,
    Windowing,
    MelBands,
    BeatsLoudness,
    Beatogram,
    Meter,
    HumDetector,
)

#### Global Constants

In [371]:
load_dotenv()
DOWNLOAD_FOLDER = os.getenv('DOWNLOAD_FOLDER')
CPU_THREADS = multiprocessing.cpu_count()

#### Data

In [372]:
songs_data = pd.read_csv('data/songs_final.csv')

#### Feature Extraction Functions

In [373]:
def create_spectrogram_image(spectrogram_db, sample_rate):
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(spectrogram_db, sr=sample_rate, x_axis='time', y_axis='mel', fmax=11025)
    plt.colorbar(format='%+2.0f dB')
    plt.title(f"Mel-Spectrogram")
    plt.tight_layout()
    plt.show()
    plt.close()

In [374]:
def mp3_to_spectrogram(audio_path, create_image=False):
    mp3, sample_rate = librosa.load(audio_path, sr=22050)
    spectrogram = librosa.feature.melspectrogram(y=mp3, sr=sample_rate, n_mels=128, fmax=11025)
    spectrogram_db = librosa.power_to_db(spectrogram, ref=np.max)

    if create_image:
        create_spectrogram_image(spectrogram_db, sample_rate)

    return spectrogram_db, sample_rate

In [375]:
def get_mel_bands(audio):
    spectrum = Spectrum()
    frame_generator = FrameGenerator(audio, frameSize=2048, hopSize=1024)
    window = Windowing(type='hann')

    mel_bands = MelBands(numberBands=40)
    mel_band_energies = []

    for frame in frame_generator:
        spec = spectrum(window(frame))
        mel_band_energies.append(mel_bands(spec))

    mel_band_energies = np.array(mel_band_energies)
    return mel_band_energies

In [376]:
def run_essentia_algorithms(audio44k):
    startTime = time.perf_counter()
    _, mfcc_coeffs = MFCC(inputSize=len(audio44k))(audio44k)
    danceability_score = Danceability()(audio44k)
    loudness_score = Loudness()(audio44k)
    bpm, beat_positions, _, _, _ = RhythmExtractor2013(method="multifeature")(audio44k)
    key, scale, _ = KeyExtractor()(audio44k)
    energy_score = Energy()(audio44k)
    print(time.perf_counter() - startTime)

    ### Chord Significances
    startTime = time.perf_counter()
    _, _, _, _, chords, _, _, _, _, _, _, _ = TonalExtractor()(audio44k)
    unique_chords, counts = np.unique(chords, return_counts=True)
    chords_significance = {chord: significance for (chord, significance) in zip(unique_chords, counts)}
    print(time.perf_counter() - startTime)

    ### Inharmonicity
    startTime = time.perf_counter()
    frames = []
    frameCutter = FrameCutter()
    while True:
        frame = frameCutter(audio44k)
        if not len(frame):
            break
        frames.append(frame)
        
    spectrum_magnitudes = []
    for frame in frames:
        spectrum_magnitudes_frame = Spectrum()(frame)
        spectrum_magnitudes.append(spectrum_magnitudes_frame)
    spectrum_magnitudes = np.array(spectrum_magnitudes).flatten()
    
    frequencies, magnitudes = SpectralPeaks()(audio44k)
    hnr_score = None
    if frequencies[0]: 
        hnr_score = Inharmonicity()(frequencies, magnitudes)
    print(time.perf_counter() - startTime)
    ###
    
    startTime = time.perf_counter()
    onset_rate_score = OnsetRate()(audio44k)
    brightness_score = SpectralCentroidTime()(audio44k)
    dynamic_complexity_score, _ = DynamicComplexity()(audio44k)
    
    mel_bands = get_mel_bands(audio44k)
    novelty_curve = NoveltyCurve()(mel_bands)
    novelty_score = np.median(np.abs(np.diff(novelty_curve)))
    
    beats_loudness, beats_loudness_band_ratio = BeatsLoudness(beats=beat_positions)(audio44k)
    beatogram = Beatogram()(beats_loudness, beats_loudness_band_ratio)
    time_signature = Meter()(beatogram)
    print(time.perf_counter() - startTime)
    
    startTime = time.perf_counter()
    _, _, saliences, hum_starts, hum_ends = HumDetector()(audio44k)
    hum_intervals = [(hum_start, hum_end, salience) for hum_start, hum_end, salience in zip(hum_starts, hum_ends, saliences)]
    print(time.perf_counter() - startTime)    
    
    features = {
        'Danceability': danceability_score[0],
        'Loudness': loudness_score,
        'BPM': bpm,
        'Key': key,
        'Key Scale': scale,
        'Energy': energy_score,
        'Chords Significance': chords_significance,
        'Inharmonicity': hnr_score,
        'Timbre (MFCC Coefficients Mean)': np.mean(mfcc_coeffs),
        'Onset Rate': onset_rate_score[1],
        'Brightness': brightness_score,
        'Dynamic Complexity': dynamic_complexity_score,
        'Novelty': novelty_score,
        'Time Signature': time_signature,
        'Hum Intervals': hum_intervals
    }
    return features

In [377]:
def extract_audio_features(audio_file):
    # Load the audio file
    print(audio_file)
    audio44k = MonoLoader(filename=audio_file)()

    # Run algorithms
    startTime = time.perf_counter()
    algorithm_features = run_essentia_algorithms(audio44k)
    print(f"Algos: {time.perf_counter() - startTime}")
    
    startTime = time.perf_counter()
    spectrogram, sample_rate = mp3_to_spectrogram(audio_file)
    print(f"Spectro: {time.perf_counter() - startTime}")

    # Merge results
    features = algorithm_features | {'Spectrogram': spectrogram, 'Spectrogram Sample Rate': sample_rate}
    return features

#### Main Code

In [378]:
# Class constructed from song path
# Song path must follow this format: /some/path/(int)^(video id)^(title).mp3
#                               e.g  /some/path/0^LlWGt_84jpg^Special Breed.mp3
class SongPath:
    def __init__(self, song_path: str):
        self.path = song_path
        self.filename = os.path.basename(song_path)

        song_filename_split = self.filename.split('^')
        if len(song_filename_split) != 3:
            raise Exception("The song's filename doesn't follow the correct format: /some/path/(int)^(video id)^(title).mp3")
        
        self.index, self.video_id, self.title_with_extension = song_filename_split
        self.title = os.path.splitext(self.title_with_extension)[0]

    def __str__(self):
        return f"Idx: {self.index},  videoID: {self.video_id}, title: {self.title_with_extension}"

In [379]:
class SharedNumpyArray:
    '''
    Wraps a numpy array so that it can be shared quickly among processes,
    avoiding unnecessary copying and (de)serializing.
    '''
    def __init__(self, array):
        '''
        Creates the shared memory and copies the array therein
        '''
        # create the shared memory location of the same size of the array
        self._shared = SharedMemory(create=True, size=array.nbytes)
        
        # save data type and shape, necessary to read the data correctly
        self._dtype, self._shape = array.dtype, array.shape
        
        # create a new numpy array that uses the shared memory we created.
        # at first, it is filled with zeros
        res = np.ndarray(
            self._shape, dtype=self._dtype, buffer=self._shared.buf
        )
        
        # copy data from the array to the shared memory. numpy will
        # take care of copying everything in the correct format
        res[:] = array[:]

    def read(self):
        '''
        Reads the array from the shared memory without unnecessary copying.
        '''
        # simply create an array of the correct shape and type,
        # using the shared memory location we created earlier
        return np.ndarray(self._shape, self._dtype, buffer=self._shared.buf)

    def copy(self):
        '''
        Returns a new copy of the array stored in shared memory.
        '''
        return np.copy(self.read_array())
        
    def unlink(self):
        '''
        Releases the allocated memory. Call when finished using the data,
        or when the data was copied somewhere else.
        '''
        self._shared.close()
        self._shared.unlink()

In [380]:
class SharedPandasDataFrame:
    '''
    Wraps a pandas dataframe so that it can be shared quickly among processes,
    avoiding unnecessary copying and (de)serializing.
    '''
    def __init__(self, df):
        '''
        Creates the shared memory and copies the dataframe therein
        '''
        self._values = SharedNumpyArray(df.values)
        self._index = df.index
        self._columns = df.columns

    def read(self):
        '''
        Reads the dataframe from the shared memory
        without unnecessary copying.
        '''
        return pd.DataFrame(
            self._values.read(),
            index=self._index,
            columns=self._columns
        )
    
    def copy(self):
        '''
        Returns a new copy of the dataframe stored in shared memory.
        '''
        return pd.DataFrame(
            self._values.copy(),
            index=self._index,
            columns=self._columns
        )
        
    def unlink(self):
        '''
        Releases the allocated memory. Call when finished using the data,
        or when the data was copied somewhere else.
        '''
        self._values.unlink()

In [381]:
def process_song(args):
    song_path, shared_songs_data = args
    song = SongPath(song_path)
    songs_data_df = shared_songs_data.read()

    # Extract song features
    song_features = extract_audio_features(song.path)

    # Add results to global dataframe
    for feature, value in song_features.items():
        if feature not in songs_data_df.columns and isinstance(value, (tuple, set, list, np.ndarray, dict)):
            songs_data_df[feature] = np.nan
            songs_data_df[feature] = songs_data_df[feature].astype(object)
        print(song.index, feature, value)
        songs_data_df.at[song.index, feature] = value
        print(songs_data_df.iloc[0])
        break

    print(songs_data_df.iloc[0])
    print(shared_songs_data.read())

In [382]:
def process_songs():
    shared_songs_data = SharedPandasDataFrame(songs_data)
        
    process_times = []
    song_paths = [os.path.join(DOWNLOAD_FOLDER, song_filename) for song_filename in os.listdir(DOWNLOAD_FOLDER)]

    args = [(song_path, shared_songs_data) for song_path in song_paths]
    args = [args[0]]

    with multiprocessing.Pool(CPU_THREADS) as pool:
        pool.map(process_song, args)

    shared_songs_data.unlink()

In [383]:
process_songs()

/mnt/d/Alex Stuff/Songs/0^LlWGt_84jpg^Special Breed.mp3
6.812440928999422
1.040354507000302
3.0060924230001547
1.675882477999039
2.374679857000956
Algos: 14.964224246001322
Spectro: 9.111353979000341
0 Danceability 1.365855097770691
title           Special Breed
artist            PolyCulture
views                      34
videoID           LlWGt_84jpg
duration                  331
Danceability              NaN
Name: 0, dtype: object
title           Special Breed
artist            PolyCulture
views                      34
videoID           LlWGt_84jpg
duration                  331
Danceability              NaN
Name: 0, dtype: object
                               title  \
0                      Special Breed   
1                       Unnoticeable   
2             Time Dawdles Immersion   
3                           Justness   
4                     INTRANSIGEANCE   
...                              ...   
101040                        Misery   
101041             What Lies Beneath   
1