In [None]:
# Create a spectrogram using Mel cepstral filter

# From Wikipedia: https://en.wikipedia.org/wiki/Mel-frequency_cepstrum
# In sound processing, the mel-frequency cepstrum (MFC) is a representation of the short-term power spectrum
# of a sound, based on a linear cosine transform of a log power spectrum on a nonlinear mel scale of frequency.

# Create spectrograms with Short-term Fourier Transformation: https://librosa.github.io/librosa/generated/librosa.core.stft.html#librosa.core.stft

#%matplotlib inline    # Enable this to print inline in the notebook

import librosa
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import os
import time

# The librosa.display module needs to be explicitly imported (https://github.com/librosa/librosa/issues/441)
import librosa.display


files_location = "/Volumes/EXTERNAL/MusicEngine/wav/"
mel_spectrogram_location = "/Volumes/EXTERNAL/MusicEngine/mel_spectrogram/"

song_id_list = os.listdir(files_location)
#song_id_list = "dt_16bars_102rap"
song_id_list = song_id_list[310:]

for song in song_id_list:
        if song.split(".")[1] == "wav":
            time.sleep(1)
            print("Working on song ID: " + song.split(".")[0])
            
            song_path = files_location + song
            audio_data, sampling_rate = librosa.load(song_path,
                                                     mono = True,                # When mono = True, the values appear to be averaged. They are slightly different from just taking one of the channels  
                                                     sr = 11025,                 # The downsampling doesn't seem to change ther results
                                                     res_type = "kaiser_best")   # Downsampling method. For a faster method use "kaiser_fast"
            

            # Calculate the Mel spectrum
            mel_spectrogram = librosa.feature.melspectrogram(
                y = audio_data,
                sr = sampling_rate,
                n_mels = 128,
                n_fft = 2048,         # Length of the window for the FFT
                hop_length = 1024)    # Number of samples between successive frames

            # Calculate audio dB along the song
            spectrogram = librosa.power_to_db(
                mel_spectrogram,
                ref = np.max)

            # Plot Mel spectrogram - do not print out axes names, and the 
            plt.figure(figsize = (12, 8))
            librosa.display.specshow(spectrogram
                                     ,fmax = 20000
                                     #,y_axis = "mel"
                                     #,x_axis = "time"
                                    )
            #plt.colorbar(format = "%+2.0f dB")
            #plt.title('Mel spectrogram')
            #plt.tight_layout()
            plt.savefig(mel_spectrogram_location + song.split(".")[0] + ".png")
print("Done creating spectrograms... \nCheck the results")

In [8]:
%whos

Variable                   Type       Data/Info
-----------------------------------------------
audio_data                 ndarray    2664752: 2664752 elems, type `float32`, 10659008 bytes (10.16522216796875 Mb)
files_location             str        /Volumes/EXTERNAL/MusicEngine/wav/
librosa                    module     <module 'librosa' from '/<...>ges/librosa/__init__.py'>
matplotlib                 module     <module 'matplotlib' from<...>/matplotlib/__init__.py'>
mel_spectrogram            ndarray    128x2603: 333184 elems, type `float64`, 2665472 bytes (2.5419921875 Mb)
mel_spectrogram_location   str        /Volumes/EXTERNAL/MusicEngine/mel_spectrogram/
np                         module     <module 'numpy' from '/Us<...>kages/numpy/__init__.py'>
os                         module     <module 'os' from '/Users<...>nda/lib/python3.5/os.py'>
plt                        module     <module 'matplotlib.pyplo<...>es/matplotlib/pyplot.py'>
sampling_rate              int        22100
song  

In [9]:
%system(ls)

['0.ideas_for_classifying_songs.ipynb',
 '1.download_videos_inmp3.ipynb',
 '2.convert_mp3_to_wav.ipynb',
 '3.create_mel_spectrogram.ipynb',
 '3.extract_features_from_wav.ipynb',
 '3.fourier_transformation_implementation.ipynb',
 '4.classify_with_melspecgram_images.ipynb',
 'dt_16bars_102rap.wav',
 'find_factor.ipynb',
 'test.wav',
 'z3.extract_features_from_wav.ipynb',
 'z3.extract_features_from_wav1.ipynb']

In [11]:
# Test the quality of the audio when the songs are downsampled
# I listened to the downsampled songs, and they seem to have very similar quality, but slighly lower frequency
test_audio_data, sampling_rate = librosa.load(path = "/Users/valentin/Documents/MusicEngine/wav/uXtfQCNf3JA.wav",
                                              sr = 11025,
                                              mono = False)
librosa.output.write_wav(y = test_audio_data,
                         sr = sampling_rate,
                         path ="/Users/valentin/Documents/MusicEngine/wav/uXtfQCNf3JA_11025.wav")