## 1D features plus mfcc, contrast and chromagram (with windows in each segment)

2D Features:
- Contrast (mean - 7, var - 7)
- chromagram (mean - 12, var - 12)
- mfcc (mean - 20, var - 20)

1D Features:
- spectral centroid
- zero crossing
- spectral rolloff
- rms
- beat

total features = 88

Extra addition: Removing first and last 20 seconds of each song

In [1]:
import librosa
import librosa.display
import numpy as np
import os

In [2]:
# get the file names in the folders
prog_rock_directory = "C:/Users/dhruv.kushwaha/Dropbox (UFL)/CAP6610_Music_Genre_Classification/Code/Train_set/Progressive_Rock_Songs"
prog_rock_filenames = os.listdir(prog_rock_directory)

other_songs_directory = "C:/Users/dhruv.kushwaha/Dropbox (UFL)/CAP6610_Music_Genre_Classification/Code/Train_set/Other_Songs"
other_songs_filenames = os.listdir(other_songs_directory)

top_of_the_pops_directory = "C:/Users/dhruv.kushwaha/Dropbox (UFL)/CAP6610_Music_Genre_Classification/Code/Train_set/Top_Of_The_Pops"
top_of_the_pops_filenames = os.listdir(top_of_the_pops_directory)

song_dict = {
    "prog_rock": prog_rock_filenames,
    "other_songs": other_songs_filenames,
    "top_of_the_pops": top_of_the_pops_filenames
}

path_dict = {
    "prog_rock": prog_rock_directory,
    "other_songs": other_songs_directory,
    "top_of_the_pops": top_of_the_pops_directory
}

folders = ["prog_rock", "other_songs", "top_of_the_pops"]

# Params
segment_length = 5# Snippets in each track considered
# rp% of the spectral energy lies below the value of spectral rolloff
roll_percent = 0.85
sr = 44100 # Sampling rate - 22 kHz

In [3]:
# 1D features
# Init
label_list, feature_matrix_list = [], []
segment_count = 0

# Start loop
for folder in folders:
    os.chdir(path_dict[folder])
    for filename in song_dict[folder]:
        try:
            x , sr = librosa.load(filename, sr=sr)

            # compute number of audio segments for fixed segment length
            num_segments = int(len(x)/(sr*segment_length))

            # Sanity check
            #print("Number of segments: {}".format(num_segments))
            segment_count += num_segments

            segments = np.arange(4,num_segments-4,1,dtype=int)

            for segment in segments:
                x, sr = librosa.load(filename, sr=sr, offset=segment_length*segment, duration=segment_length)

                # zero crossings rate
                # gives the fraction of zero crossings in a timeframe
                zero_crossings = librosa.feature.zero_crossing_rate(x)[0]

                # spectral centroid
                # indicates where the center of mass lies, e.g. for metal songs generally towards the end
                # might be not that useful when cutting the songs into 30sec snippets
                spectral_centroids = librosa.feature.spectral_centroid(y=x, sr=sr)[0]

                # spectral rolloff
                spectral_rolloff = librosa.feature.spectral_rolloff(
                    y=x, sr=sr, roll_percent=roll_percent)[0]

                # chroma frequencies
                # x-axis = time
                # y-axis = chroma
                chromagram = librosa.feature.chroma_stft(y=x, sr=sr, S=None, n_fft=2048,
                                                         hop_length=512, win_length=None, window='hann', center=True,
                                                         pad_mode='constant', tuning=None, n_chroma=12)

                # mfccs
                mfccs = librosa.feature.mfcc(y=x, sr=sr)

                # spectral contrast
                spectral_contrast = librosa.feature.spectral_contrast(y=x)

                # root mean squared energy
                rms = librosa.feature.rms(y=x)[0]

                # beat
                beat = librosa.beat.plp(y=x)

                # Add label to each segment
                if folder == 'prog_rock':
                    label_list.append(1)
                else:
                    label_list.append(0)

                # Compute mean and variance for 1D and 2D features
                # Compute 1D features
                # Stack 1D features
                feature1d_temp = np.vstack((spectral_centroids.reshape(1, -1), zero_crossings.reshape(1, -1),
                                            spectral_rolloff.reshape(1, -1), rms.reshape(1, -1), beat.reshape(1, -1)))


                num_windows = int(len(spectral_centroids)/(38))
                list1d_temp1, list1d_temp2 = [], []

                for idx in range(0, len(spectral_centroids), num_windows):
                    feature1D_m = np.mean(feature1d_temp[:, idx:idx+num_windows-1], axis=1)
                    feature1D_v = np.var(feature1d_temp[:, idx:idx+num_windows-1], axis=1)

                    temp1d = np.hstack((feature1D_m, feature1D_v))
                    list1d_temp1.append(feature1D_m)
                    list1d_temp2.append(feature1D_v)

                # Convert list to numpy array and calculate average over windows
                feature1D_mean = np.mean(np.array(list1d_temp1), axis=0)
                feature1D_var = np.mean(np.array(list1d_temp2), axis=0)

                # Combine mean and var
                feature_matrix1D = np.hstack(
                    (feature1D_mean.reshape(-1, 1), feature1D_var.reshape(-1, 1)))

                feature_matrix1D = feature_matrix1D.flatten()

                # print("mean: {}; variance: {}".format(feature1D_mean, feature1D_var))

                # Compute 2D features
                feature2D_temp = np.vstack((mfccs.reshape(20, -1),
                                            chromagram.reshape(12, -1), spectral_contrast.reshape(7, -1)))

                # Compute mean and variance
                list2d_temp1, list2d_temp2 = [], []
                for idx in range(0, len(spectral_centroids), num_windows):
                    feature2D_m = np.mean(feature2D_temp[:, idx:idx+num_windows-1], axis=1)
                    feature2D_v = np.var(feature2D_temp[:, idx:idx+num_windows-1], axis=1)

                    list2d_temp1.append(feature2D_m)
                    list2d_temp2.append(feature2D_v)

                # Convert list to numpy array and calculate average over windows
                feature2D_mean = np.mean(np.array(list2d_temp1), axis=0)
                feature2D_var = np.mean(np.array(list2d_temp2), axis=0)

                # Combine mean and var
                feature_matrix2D = np.hstack(
                    (feature2D_mean.reshape(-1, 1), feature2D_var.reshape(-1, 1)))

                feature_matrix2D = feature_matrix2D.flatten()

                feature_matrix_temp = np.append(feature_matrix1D, feature_matrix2D)

                #print("Shape of final matrix:{}".format(feature_matrix_temp.shape))

                feature_matrix_list.append(feature_matrix_temp)

            # Remove song from list
            song_dict[folder].remove(filename)
        except:
            pass



  return f(*args, **kwargs)


In [4]:
# Sanity check
print(np.array(feature_matrix_list).shape)
print(segment_count)


(8560, 88)
9384


In [5]:
# Combine labels for final feature matrix
final_feature_matrix = np.hstack((np.array(label_list).reshape(-1, 1),np.array(feature_matrix_list)))

print("Dimension of final feature matrix: {}".format(final_feature_matrix.shape))

Dimension of final feature matrix: (8560, 89)


In [None]:
# save final dataset
os.chdir('C:/Users/dhruv.kushwaha/Dropbox (UFL)/CAP6610_Music_Genre_Classification/Code/Extracted_data')
np.savez_compressed('Feature_matrix_window', final_feature_matrix)

### If one feels like creating a pandas dataframe

In [None]:
# Column order 

# column names for feature dataframe
column_names = ([f"{feature}_{statistic}" for feature in ["centroid", "crossings", "rolloff", "rms", "beat"] for statistic in ["mean", "var"]] +
                [f"contrast_mean_{i}" for i in range(7)] + [f"contrast_var_{i}" for i in range(7)] +
                [f"chromogram_mean_{i}" for i in range(12)] + [f"chromogram_var_{i}" for i in range(12)] +
                [f"mfccs_mean_{i}" for i in range(20)] + [f"mfccs_var_{i}" for i in range(20)] + [f"label"])
