In [None]:
# Create a spectrogram using Mel cepstral filter

# From Wikipedia: https://en.wikipedia.org/wiki/Mel-frequency_cepstrum
# In sound processing, the mel-frequency cepstrum (MFC) is a representation of the short-term power spectrum
# of a sound, based on a linear cosine transform of a log power spectrum on a nonlinear mel scale of frequency.

# Create spectrograms with Short-term Fourier Transformation: https://librosa.github.io/librosa/generated/librosa.core.stft.html#librosa.core.stft

#%matplotlib inline    # Enable this to print inline in the notebook

import librosa
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import os
import time

# The librosa.display module needs to be explicitly imported (See https://github.com/librosa/librosa/issues/441)
import librosa.display


files_location = "/Volumes/EXTERNAL/MusicEngine/wav/non_cases/"
mel_spectrogram_location = "/Volumes/EXTERNAL/MusicEngine/mel_spectrogram/non_cases/"

# Create a list with all the WAV files in the directory and create Mel spectrograms
songs_idx = [x.split(".wav")[0] for x in os.listdir(files_location)]
converted_songs_idx = [x.split(".png")[0] for x in os.listdir(mel_spectrogram_location)]
song_id_list = [(x + ".wav") for x in (list(set(songs_idx) - set(converted_songs_idx)))]

song_counter = len(song_id_list) - 1

for song in song_id_list:
    if song.split(".")[1] == "wav":
        time.sleep(1)
        print("Working on song ID: %s" % song.split(".")[0])

        song_path = files_location + song
        audio_data, sampling_rate = librosa.load(song_path,
                                                 mono = True,                # When mono = True, the values appear to be averaged. They are slightly different from just taking one of the channels  
                                                 sr = 11025,                 # The downsampling doesn't seem to change ther results
                                                 res_type = "kaiser_best")   # Downsampling method. For a faster method use "kaiser_fast"

        # Calculate the Mel spectrum
        mel_spectrogram = librosa.feature.melspectrogram(
            y = audio_data,
            sr = sampling_rate,
            n_mels = 128,
            n_fft = 2048,         # Length of the window for the FFT
            hop_length = 1024)    # Number of samples between successive frames

        # Calculate audio dB along the song
        spectrogram = librosa.power_to_db(
            mel_spectrogram,
            ref = np.max)

        # Plot Mel spectrogram - do not print out axes names, and the 
        plt.figure(figsize = (12, 8))
        librosa.display.specshow(spectrogram
                                 ,fmax = 20000
                                 #,y_axis = "mel"
                                 #,x_axis = "time"
                                )
        #plt.colorbar(format = "%+2.0f dB")
        #plt.title('Mel spectrogram')
        #plt.tight_layout()
        plt.savefig(mel_spectrogram_location + song.split(".")[0] + ".png")
        
        print("->> %s more videos to create spectrograms for ..." % song_counter)
        song_counter -= 1

print("Done creating spectrograms... \nCheck the results")


In [8]:
%whos

Variable                   Type       Data/Info
-----------------------------------------------
audio_data                 ndarray    4823296: 4823296 elems, type `float32`, 19293184 bytes (18.3994140625 Mb)
converted_songs_idx        list       n=16
files_location             str        /Volumes/EXTERNAL/MusicEngine/wav/cases/
librosa                    module     <module 'librosa' from '/<...>ges/librosa/__init__.py'>
matplotlib                 module     <module 'matplotlib' from<...>/matplotlib/__init__.py'>
mel_spectrogram            ndarray    128x4711: 603008 elems, type `float64`, 4824064 bytes (4.6005859375 Mb)
mel_spectrogram_location   str        /Volumes/EXTERNAL/MusicEn<...>ne/mel_spectrogram/cases/
np                         module     <module 'numpy' from '/Us<...>kages/numpy/__init__.py'>
os                         module     <module 'os' from '/Users<...>nda/lib/python3.5/os.py'>
plt                        module     <module 'matplotlib.pyplo<...>es/matplotlib/pyplot.p

In [9]:
%system(ls)

['0.ideas_for_classifying_songs.ipynb',
 '1.download_videos_inmp3.ipynb',
 '2.convert_mp3_to_wav.ipynb',
 '3.create_mel_spectrogram.ipynb',
 '3.extract_features_from_wav.ipynb',
 '3.fourier_transformation_implementation.ipynb',
 '4.classify_with_melspecgram_images.ipynb',
 'dt_16bars_102rap.wav',
 'find_factor.ipynb',
 'test.wav',
 'z3.extract_features_from_wav.ipynb',
 'z3.extract_features_from_wav1.ipynb']

In [11]:
# Test the quality of the audio when the songs are downsampled
# I listened to the downsampled songs, and they seem to have very similar quality, but slighly lower frequency
test_audio_data, sampling_rate = librosa.load(path = "/Users/valentin/Documents/MusicEngine/wav/uXtfQCNf3JA.wav",
                                              sr = 11025,
                                              mono = False)
librosa.output.write_wav(y = test_audio_data,
                         sr = sampling_rate,
                         path ="/Users/valentin/Documents/MusicEngine/wav/uXtfQCNf3JA_11025.wav")

In [1]:
import librosa
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import os
import time

# The librosa.display module needs to be explicitly imported (See https://github.com/librosa/librosa/issues/441)
import librosa.display


files_location = "/Volumes/EXTERNAL/MusicEngine/wav/cases/"
mel_spectrogram_location = "/Volumes/EXTERNAL/MusicEngine/mel_spectrogram/cases/"

# Create a list with all the WAV files in the directory and create Mel spectrograms
songs_idx = [x.split(".wav")[0] for x in os.listdir(files_location)]
converted_songs_idx = [x.split(".png")[0] for x in os.listdir(mel_spectrogram_location)]
song_id_list = [(x + ".wav") for x in (list(set(songs_idx) - set(converted_songs_idx)))]

song_counter = len(song_id_list) - 1


In [2]:
len(songs_idx), len(converted_songs_idx), song_counter

(415, 16, 411)

In [6]:
for song in song_id_list:
    if song.split(".")[1] == "wav":
        time.sleep(1)
        print("Working on song ID: %s" % song.split(".")[0])
        

Working on song ID: 9cCVVEdlaEw
Working on song ID: vAHovB8wyP8
Working on song ID: BjlHwsb05No
Working on song ID: Tuwkf29xwKE
Working on song ID: inRNp5En-yw
Working on song ID: HtA5U0j5ojs
Working on song ID: 02i9rDbZ_-A
Working on song ID: Nz3qZW0Qegk
Working on song ID: -CPmjK5OxnA
Working on song ID: 5xL35Yy8-mA
Working on song ID: mSU7SoIoU9g
Working on song ID: lDKh0cOnN9c
Working on song ID: dm8b0vOoLJk


KeyboardInterrupt: 