# Notebook for extracting Spectrogram/MFCC features from GTZAN dataset
##### Based on which type of features you want to extract (Spectrograms/MFCCs), change the name of the OUTPUT_PATH and comment corresponding block in the feature calculation part (highlighted directly in code also)

In [None]:
import os
import librosa
import math
import numpy as np

REBUILD_DATA = True

DATASET_PATH = "/data/shared/GTZAN-DATASET/genres_dataset"
OUTPUT_PATH = "/data/shared/GTZAN-DATASET/spectrograms_10segments" # output name based on the type of extracting features and number of segments
SAMPLE_RATE = 22050
TRACK_DURATION = 30 # measured in seconds
SAMPLES_PER_TRACK = SAMPLE_RATE * TRACK_DURATION


def save_features(dataset_path, output_path, n_fft=2048, hop_length=512, n_mfcc=40, num_segments=5):
    """Extracts features from music dataset and saves them into a file along witgh genre labels.
        :param dataset_path (str): Path to dataset
        :param output_path (str): Path to output file used to save features
        :param n_fft (int): Interval we consider to apply FFT. Measured in # of samples
        :param hop_length (int): Sliding window for FFT. Measured in # of samples
        :param n_mfcc (int): Number of coefficients to extract from MFCCs.
        :param: num_segments (int): Number of segments we want to divide sample tracks into
        :return:
        """

    # dictionary to store mapping, labels, and features
    data = {
        "mapping": [],
        "labels": [],
        "features": []
    }

    samples_per_segment = int(SAMPLES_PER_TRACK / num_segments)
    temporal_bins = math.ceil(samples_per_segment / hop_length)
    
    # loop through all genre sub-folder
    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):

        # ensure we're processing a genre sub-folder level
        if dirpath is not dataset_path:
            
            # save genre label (i.e., sub-folder name) in the mapping
            semantic_label = dirpath.split("\\")[-1]
            data["mapping"].append(semantic_label)
            print("\nProcessing: {}".format(semantic_label))

            # process all audio files in genre sub-dir
            for f in filenames:

                # load audio file
                file_path = os.path.join(dirpath, f)
                signal, sample_rate = librosa.load(file_path, sr=SAMPLE_RATE)
                
                # process all segments of audio file
                for d in range(num_segments):

                    # calculate start and finish sample for current segment
                    start = samples_per_segment * d
                    finish = start + samples_per_segment
                    
                    
                    #COMMENT THIS BLOCK IF YOU AIM TO EXTRACT MFCCs!
                    #calculating short-time fourier transform
                    S_signal = librosa.stft(signal[start:finish], n_fft=n_fft, hop_length=hop_length)
                    #extracting power spectrogram - squared magnitude of stft
                    Y_signal = np.abs(S_signal) ** 2
                    #extracting log-amplitude spectrogram
                    feature = librosa.power_to_db(Y_signal)
                    y_bins = int(n_fft / 2 + 1)
                    
                    
                    #COMMENT THIS BLOCK IF YOU AIM TO EXTRACT SPECTROGRAMS!
                    #extracting mfccs
                    feature = librosa.feature.mfcc(y=signal[start:finish], n_mfcc=n_mfcc, sr=SAMPLE_RATE)
                    y_bins = n_mfcc
                    
                    
                    #store features and label
                    if np.shape(feature) == (y_bins, temporal_bins):
                        data["features"].append(feature)
                        data["labels"].append(i-1)
                        print("{}, segment:{}".format(file_path, d+1))
                    else:
                        print("ERROR: expected shape of ({}, {}), but got {} instead".format(y_bins, temporal_bins, np.shape(feature)))
                        print("ERROR: {}, segment:{}".format(file_path, d+1))
        
    # save data to a file
    np.save(OUTPUT_PATH, data)
        
        
if REBUILD_DATA:
    save_features(DATASET_PATH, OUTPUT_PATH, num_segments=10)
