First we must install external audio analysis libs

In [5]:
# Install a pip package in the current Jupyter kernel
import sys
!{sys.executable} -m pip install librosa
!{sys.executable} -m pip install essentia


Collecting librosa
  Using cached librosa-0.9.1-py3-none-any.whl (213 kB)
Collecting resampy>=0.2.2
  Using cached resampy-0.2.2-py3-none-any.whl
Collecting audioread>=2.1.5
  Using cached audioread-2.1.9-py3-none-any.whl
Collecting soundfile>=0.10.2
  Using cached SoundFile-0.10.3.post1-py2.py3-none-any.whl (21 kB)
Installing collected packages: soundfile, resampy, audioread, librosa
Successfully installed audioread-2.1.9 librosa-0.9.1 resampy-0.2.2 soundfile-0.10.3.post1
Collecting essentia
  Using cached essentia-2.1b6.dev778-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.7 MB)
Installing collected packages: essentia
Successfully installed essentia-2.1b6.dev778


In [6]:
import numpy as np
import pandas as pd
import os
import essentia.standard as es
import librosa

[   INFO   ] MusicExtractorSVM: no classifier models were configured by default


In [92]:

# assign directory
directory = './sample_music'

feature_vecs = pd.DataFrame()

# iterate over files in
# that directory
for filename in os.listdir(directory):
    f = os.path.join(directory, filename)
    feature_vec = pd.DataFrame()
    # checking if it is a file
    if os.path.isfile(f):
        
        print('generating features for ' + filename)
        # Compute all features, aggregate only 'mean' and 'stdev' statistics for all low-level, rhythm and tonal frame features
        features, features_frames = es.MusicExtractor(lowlevelStats=['mean', 'stdev'])(f)
        
        #Loading into librosa
        y, sr = librosa.load(f)
        hop_length = 512
        frame_length = 1024
        n_fft = 1
        
        #generating energy and chromagram features
        rmse = librosa.feature.rms(y = y , center=True)
        chromagram = librosa.feature.chroma_stft(y=y, sr=sr, hop_length=hop_length)
        
        print('finished feature generation for ' + filename)
        print('creating feature vector')
        
        song_name = pd.DataFrame({'song': [filename]})
        feature_vec['song'] = song_name
        
        loudness = pd.DataFrame({'loudness': [features['lowlevel.loudness_ebu128.integrated']]})
        feature_vec['loudness'] = loudness
        
        energy_df = pd.DataFrame({'energy': [np.mean(rmse)]})
        feature_vec['energy'] = energy_df
        
        #condensing chromagram data
        chromagram_mean = np.mean(chromagram, axis=1)
        sdict =""
        i=1
        for s in chromagram_mean:
          sdict += "'chroma_{0}':[{1}],".format(i,str(s)) 
          i+=1

        sdict = "{"+sdict+"}"
        
        chroma_df = pd.DataFrame.from_dict(eval(sdict))
        feature_vec[chroma_df.columns] = chroma_df
        
        print('finished creating vector')
        
    print('appending feature vec and clearing for next song')
    feature_vecs = feature_vecs.append(other = feature_vec, ignore_index = True)
    del feature_vec
    
print(feature_vecs)
        

generating features for Independent Music Licensing Collective (IMLC) - Hot Sand.mp3


[   INFO   ] MusicExtractor: Read metadata
[   INFO   ] MusicExtractor: Compute md5 audio hash, codec, length, and EBU 128 loudness
[   INFO   ] MusicExtractor: Replay gain
[   INFO   ] MusicExtractor: Compute audio features
[   INFO   ] MusicExtractor: Compute aggregation
[   INFO   ] All done
  return f(*args, **kwargs)


finished feature generation for Independent Music Licensing Collective (IMLC) - Hot Sand.mp3
creating feature vector
finished creating vector
appending feature vec and clearing for next song
generating features for One Man Book - Heresy of Paraphrase.mp3


[   INFO   ] MusicExtractor: Read metadata
[   INFO   ] MusicExtractor: Compute md5 audio hash, codec, length, and EBU 128 loudness
[   INFO   ] MusicExtractor: Replay gain
[   INFO   ] MusicExtractor: Compute audio features
[   INFO   ] MusicExtractor: Compute aggregation
[   INFO   ] All done
  return f(*args, **kwargs)


finished feature generation for One Man Book - Heresy of Paraphrase.mp3
creating feature vector
finished creating vector
appending feature vec and clearing for next song
generating features for AKMV-18 - Quantum Metanoia.mp3


[   INFO   ] MusicExtractor: Read metadata
  return f(*args, **kwargs)
[   INFO   ] MusicExtractor: Compute md5 audio hash, codec, length, and EBU 128 loudness
[   INFO   ] MusicExtractor: Replay gain
[   INFO   ] MusicExtractor: Compute audio features
[   INFO   ] MusicExtractor: Compute aggregation
[   INFO   ] All done


finished feature generation for AKMV-18 - Quantum Metanoia.mp3
creating feature vector
finished creating vector
appending feature vec and clearing for next song
generating features for Metre - Viscid.mp3


[   INFO   ] MusicExtractor: Read metadata
[   INFO   ] MusicExtractor: Compute md5 audio hash, codec, length, and EBU 128 loudness
[   INFO   ] MusicExtractor: Replay gain
[   INFO   ] MusicExtractor: Compute audio features
[   INFO   ] MusicExtractor: Compute aggregation
[   INFO   ] All done
  return f(*args, **kwargs)


finished feature generation for Metre - Viscid.mp3
creating feature vector
finished creating vector
appending feature vec and clearing for next song
generating features for PSRV - Toca Raul - STUDIO RAW.mp3


[   INFO   ] MusicExtractor: Read metadata
[   INFO   ] MusicExtractor: Compute md5 audio hash, codec, length, and EBU 128 loudness
[   INFO   ] MusicExtractor: Replay gain
[   INFO   ] MusicExtractor: Compute audio features
[   INFO   ] MusicExtractor: Compute aggregation
[   INFO   ] All done
  return f(*args, **kwargs)


finished feature generation for PSRV - Toca Raul - STUDIO RAW.mp3
creating feature vector
finished creating vector
appending feature vec and clearing for next song
generating features for M33 Project - FolkPorts.mp3


[   INFO   ] MusicExtractor: Read metadata
[   INFO   ] MusicExtractor: Compute md5 audio hash, codec, length, and EBU 128 loudness
[   INFO   ] MusicExtractor: Replay gain
[   INFO   ] MusicExtractor: Compute audio features
[   INFO   ] MusicExtractor: Compute aggregation
[   INFO   ] All done
  return f(*args, **kwargs)


finished feature generation for M33 Project - FolkPorts.mp3
creating feature vector
finished creating vector
appending feature vec and clearing for next song
generating features for Audiobinger - Pre-Game Warm Up.mp3


[   INFO   ] MusicExtractor: Read metadata
[   INFO   ] MusicExtractor: Compute md5 audio hash, codec, length, and EBU 128 loudness
[   INFO   ] MusicExtractor: Replay gain
[   INFO   ] MusicExtractor: Compute audio features
[   INFO   ] MusicExtractor: Compute aggregation
[   INFO   ] All done
  return f(*args, **kwargs)


finished feature generation for Audiobinger - Pre-Game Warm Up.mp3
creating feature vector
finished creating vector
appending feature vec and clearing for next song
generating features for Mr Smith - AB-CD.mp3


[   INFO   ] MusicExtractor: Read metadata
[   INFO   ] MusicExtractor: Compute md5 audio hash, codec, length, and EBU 128 loudness
[   INFO   ] MusicExtractor: Replay gain
[   INFO   ] MusicExtractor: Compute audio features
[   INFO   ] MusicExtractor: Compute aggregation
[   INFO   ] All done
  return f(*args, **kwargs)


finished feature generation for Mr Smith - AB-CD.mp3
creating feature vector
finished creating vector
appending feature vec and clearing for next song
generating features for Lobo Loco - Technomagus Meeting (ID 502).mp3


[   INFO   ] MusicExtractor: Read metadata
[   INFO   ] MusicExtractor: Compute md5 audio hash, codec, length, and EBU 128 loudness
[   INFO   ] MusicExtractor: Replay gain
[   INFO   ] MusicExtractor: Compute audio features
[   INFO   ] MusicExtractor: Compute aggregation
[   INFO   ] All done
  return f(*args, **kwargs)


finished feature generation for Lobo Loco - Technomagus Meeting (ID 502).mp3
creating feature vector
finished creating vector
appending feature vec and clearing for next song
generating features for Blue Ducks - Floss Suffers From Gamma Radiation.mp3


[   INFO   ] MusicExtractor: Read metadata
[   INFO   ] MusicExtractor: Compute md5 audio hash, codec, length, and EBU 128 loudness
[   INFO   ] MusicExtractor: Replay gain
[   INFO   ] MusicExtractor: Compute audio features
[   INFO   ] MusicExtractor: Compute aggregation
[   INFO   ] All done
  return f(*args, **kwargs)


finished feature generation for Blue Ducks - Floss Suffers From Gamma Radiation.mp3
creating feature vector
finished creating vector
appending feature vec and clearing for next song
generating features for Jahzzar - Siesta.mp3


[   INFO   ] MusicExtractor: Read metadata
  return f(*args, **kwargs)
[   INFO   ] MusicExtractor: Compute md5 audio hash, codec, length, and EBU 128 loudness
[   INFO   ] MusicExtractor: Replay gain
[   INFO   ] MusicExtractor: Compute audio features
[   INFO   ] MusicExtractor: Compute aggregation
[   INFO   ] All done


finished feature generation for Jahzzar - Siesta.mp3
creating feature vector
finished creating vector
appending feature vec and clearing for next song
generating features for Jahzzar - Birthday Cake.mp3


[   INFO   ] MusicExtractor: Read metadata
[   INFO   ] MusicExtractor: Compute md5 audio hash, codec, length, and EBU 128 loudness
[   INFO   ] MusicExtractor: Replay gain
[   INFO   ] MusicExtractor: Compute audio features
[   INFO   ] MusicExtractor: Compute aggregation
[   INFO   ] All done
  return f(*args, **kwargs)


finished feature generation for Jahzzar - Birthday Cake.mp3
creating feature vector
finished creating vector
appending feature vec and clearing for next song
generating features for Independent Music Licensing Collective (IMLC) - Get up and Go.mp3


[   INFO   ] MusicExtractor: Read metadata
[   INFO   ] MusicExtractor: Compute md5 audio hash, codec, length, and EBU 128 loudness
[   INFO   ] MusicExtractor: Replay gain
[   INFO   ] MusicExtractor: Compute audio features
[   INFO   ] MusicExtractor: Compute aggregation
[   INFO   ] All done
  return f(*args, **kwargs)


finished feature generation for Independent Music Licensing Collective (IMLC) - Get up and Go.mp3
creating feature vector
finished creating vector
appending feature vec and clearing for next song
generating features for Audiobinger - Dream Big.mp3


[   INFO   ] MusicExtractor: Read metadata
[   INFO   ] MusicExtractor: Compute md5 audio hash, codec, length, and EBU 128 loudness
[   INFO   ] MusicExtractor: Replay gain
[   INFO   ] MusicExtractor: Compute audio features
[   INFO   ] MusicExtractor: Compute aggregation
[   INFO   ] All done
  return f(*args, **kwargs)


finished feature generation for Audiobinger - Dream Big.mp3
creating feature vector
finished creating vector
appending feature vec and clearing for next song
generating features for Windom Earle - kirblooey.mp3


[   INFO   ] MusicExtractor: Read metadata
[   INFO   ] MusicExtractor: Compute md5 audio hash, codec, length, and EBU 128 loudness
[   INFO   ] MusicExtractor: Replay gain
[   INFO   ] MusicExtractor: Compute audio features
[   INFO   ] MusicExtractor: Compute aggregation
[   INFO   ] All done
  return f(*args, **kwargs)


finished feature generation for Windom Earle - kirblooey.mp3
creating feature vector
finished creating vector
appending feature vec and clearing for next song
                                                 song   loudness    energy  \
0   Independent Music Licensing Collective (IMLC) ...  -8.784393  0.274500   
1             One Man Book - Heresy of Paraphrase.mp3 -13.526387  0.140073   
2                      AKMV-18 - Quantum Metanoia.mp3 -13.709229  0.122383   
3                                  Metre - Viscid.mp3 -13.276937  0.148825   
4                   PSRV - Toca Raul - STUDIO RAW.mp3  -0.877343  0.599803   
5                         M33 Project - FolkPorts.mp3 -11.881506  0.152173   
6                  Audiobinger - Pre-Game Warm Up.mp3 -10.930477  0.211538   
7                                Mr Smith - AB-CD.mp3 -14.197141  0.138444   
8        Lobo Loco - Technomagus Meeting (ID 502).mp3 -10.673089  0.238694   
9   Blue Ducks - Floss Suffers From Gamma Radiatio... -11.565

In [86]:
del feature_vecs

In [93]:
song_names_df = feature_vecs['song']

In [94]:
print(song_names_df)

0     Independent Music Licensing Collective (IMLC) ...
1               One Man Book - Heresy of Paraphrase.mp3
2                        AKMV-18 - Quantum Metanoia.mp3
3                                    Metre - Viscid.mp3
4                     PSRV - Toca Raul - STUDIO RAW.mp3
5                           M33 Project - FolkPorts.mp3
6                    Audiobinger - Pre-Game Warm Up.mp3
7                                  Mr Smith - AB-CD.mp3
8          Lobo Loco - Technomagus Meeting (ID 502).mp3
9     Blue Ducks - Floss Suffers From Gamma Radiatio...
10                                 Jahzzar - Siesta.mp3
11                          Jahzzar - Birthday Cake.mp3
12    Independent Music Licensing Collective (IMLC) ...
13                          Audiobinger - Dream Big.mp3
14                         Windom Earle - kirblooey.mp3
Name: song, dtype: object


In [95]:
song_feats_df = feature_vecs.iloc[:,1:15]

In [96]:
print(song_feats_df)

     loudness    energy  chroma_1  chroma_2  chroma_3  chroma_4  chroma_5  \
0   -8.784393  0.274500  0.253995  0.366471  0.470985  0.271383  0.347818   
1  -13.526387  0.140073  0.346041  0.270265  0.243985  0.499794  0.291615   
2  -13.709229  0.122383  0.348625  0.488760  0.486687  0.378523  0.417280   
3  -13.276937  0.148825  0.656225  0.571515  0.527145  0.521314  0.466644   
4   -0.877343  0.599803  0.290823  0.351870  0.447432  0.409944  0.624397   
5  -11.881506  0.152173  0.198102  0.215866  0.447313  0.221759  0.192497   
6  -10.930477  0.211538  0.351126  0.305202  0.311085  0.437736  0.678825   
7  -14.197141  0.138444  0.303869  0.371715  0.512024  0.345544  0.470262   
8  -10.673089  0.238694  0.335935  0.566485  0.886827  0.765080  0.436374   
9  -11.565697  0.257053  0.517462  0.321291  0.326017  0.420460  0.354401   
10 -11.840921  0.148653  0.292960  0.201920  0.388229  0.207586  0.375157   
11 -11.725418  0.181394  0.400483  0.375744  0.357037  0.428273  0.476233   

In [97]:
print(os.getcwd())

/home/jovyan/audio-fingerprinting-via-LSH/python/music_processing


In [102]:
song_names_df.to_csv(path_or_buf = './exports/song_names.csv', sep = ',', index = False)

In [103]:
song_feats_df.to_csv(path_or_buf = './exports/song_feats.csv', sep = ',', index = False)