In [None]:
import librosa
import librosa.display
from pydub import AudioSegment
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
from sklearn.preprocessing import minmax_scale

In [None]:
sr = 22050

# get the file names in the folders
prog_rock_directory = "/Users/maltegaber/Documents/University of Florida/Machine Learning - CAP6610/Project 2/Progressive_Rock_Songs"
prog_rock_filenames = os.listdir(prog_rock_directory)

other_songs_directory = "/Users/maltegaber/Documents/University of Florida/Machine Learning - CAP6610/Project 2/Not_Progressive_Rock/Other_Songs"
other_songs_filenames = os.listdir(other_songs_directory)

top_of_the_songs_directory = "/Users/maltegaber/Documents/University of Florida/Machine Learning - CAP6610/Project 2/Not_Progressive_Rock/Top_Of_The_Pops"
top_of_the_songs_filenames = os.listdir(top_of_the_songs_directory)

song_dict = {
    "prog_rock" : prog_rock_filenames,
    "other_song" : other_songs_filenames,
    "top_of_the_songs" : top_of_the_songs_filenames
}

path_dict = {
    "prog_rock" : prog_rock_directory,
    "other_song" : other_songs_directory,
    "top_of_the_songs" : top_of_the_songs_directory
}

folders = ["prog_rock", "other_song", "top_of_the_songs"]

# column names for feature dataframe
column_names = ([f"{feature}_{statistic}" for feature in ["centroid", "crossings", "rolloff", "rms", "beat"] for statistic in ["mean", "var"]] + 
                [f"contrast_mean_{i}" for i in range(7)] + [f"contrast_var_{i}" for i in range(7)] +
                [f"chromogram_mean_{i}" for i in range(12)] + [f"chromogram_var_{i}" for i in range(12)] +
                [f"mfccs_mean_{i}" for i in range(20)] + [f"mfccs_var_{i}" for i in range(20)] + [f"label"])

# Params
segment_length = 15 # Snippets in each track considered
# rp% of the spectral energy lies below the value of spectral rolloff
roll_percent = 0.85

# Init
spectrogram_list = []
label_list = []

for folder in folders:
    os.chdir(path_dict[folder])
    for filename in song_dict[folder]:
        try:
            x , sr = librosa.load(filename, sr=sr)

            # compute number of 30sec audio segments
            num_segments = int(len(x)/(sr*segment_length))

            segments = [i for i in range(num_segments)]
            for segment in segments:
                x, sr = librosa.load(filename, sr=sr, offset=segment_length*segment, duration=segment_length)

                # waveform image
                # x-axis = time
                # y-axis = amplitude
                #plt.clf()
                #librosa.display.waveshow(x)
                #plt.savefig(f"waveform {filename} {segment}.jpeg")

                # spectocram image
                # x-axis = time
                # y-axis = frequency
                # color = amplitude
                X = librosa.stft(x)
                Xdb = librosa.amplitude_to_db(abs(X))
                spectrogram_list.append(Xdb)
                #np.save(f"spectocram {filename} {segment}.npy", Xdb)

                # zero crossings rate
                # gives the fraction of zero crossings in a timeframe
                zero_crossings = librosa.feature.zero_crossing_rate(x)[0]

                # Add label
                if folder == 'prog_rock':
                    label_list.append(1)
                else:
                    label_list.append(0)

                # spectral centroid
                # indicates where the center of mass lies, e.g. for metal songs generally towards the end
                # might be not that useful when cutting the songs into 30sec snippets
                spectral_centroids = librosa.feature.spectral_centroid(y=x, sr=sr)[0]

                # spectral rolloff
                
                spectral_rolloff = librosa.feature.spectral_rolloff(y=x, sr=sr, roll_percent=roll_percent)[0]

                # chroma frequencies
                # x-axis = time
                # y-axis = chroma
                chromagram = librosa.feature.chroma_stft(y=x, sr=sr)

                # mfccs
                mfccs = librosa.feature.mfcc(y=x, sr=sr)

                # spectral contrast
                spectral_contrast = librosa.feature.spectral_contrast(y=x)

                # root mean squared energy
                rms = librosa.feature.rms(y=x)[0]

                # beat
                beat = librosa.beat.plp(y=x)

                # num_windows is the number of analysis windows which comprise a texture window
                feature_matrix = []
                num_windows = 83 #number of windows in each snippet
                number_texture_windows = int(len(spectral_centroids)/num_windows)
                for idx in range(number_texture_windows):
                    one_dim_features = [spectral_centroids[num_windows*idx:num_windows*(idx+1)],
                                        zero_crossings[num_windows*idx:num_windows*(idx+1)],
                                        spectral_rolloff[num_windows*idx:num_windows*(idx+1)],
                                        rms[num_windows*idx:num_windows*(idx+1)],
                                        beat[num_windows*idx:num_windows*(idx+1)]]
                    multi_dim_features = [spectral_contrast[:,num_windows*idx:num_windows*(idx+1)],
                                          chromagram[:,num_windows*idx:num_windows*(idx+1)],
                                          mfccs[:,num_windows*idx:num_windows*(idx+1)]]
                    # the feature_vector stores the mean and variance over the num_windows of analysis windows
                    feature_lst = []
                    for feature in one_dim_features:
                        feature_lst += [feature.mean().tolist()] + [feature.var().tolist()]
                    for feature in multi_dim_features:
                        feature_lst += feature.mean(1).tolist() + feature.var(1).tolist()
                    feature_matrix.append(feature_lst)

                feature_matrix = pd.DataFrame(feature_matrix, columns=column_names)
                feature_matrix.to_csv(f"feature matrix {filename} {segment}.csv")

            song_dict[folder].remove(filename)
        except:
            pass