# Notebook to convert the Audio data into spectrograms and folder splits
## To be used for artificially expanding the dataset

- Converting the the audio files to square Mel-spectrograms, and saving in spectrograms/(BigRoom, DrumAndBass, Techno)
- Splitting into train, validate, test folders

In [None]:
# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
sns.color_palette("Blues", as_cmap=True)

# Standard Packages
import numpy as np
import warnings
import pandas as pd
import os
import shutil
import random
import time
import PIL
pd.set_option('display.max_colwidth', None)
warnings.filterwarnings("ignore")

# ML Libraries
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import keras
import tensorflow as tf
from tensorflow.keras.models import Model, Sequential, load_model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, concatenate
from tensorflow.keras.preprocessing.image import ImageDataGenerator


#for loading and visualizing audio files
import librosa
import librosa.display
from scipy import signal
from scipy.io import wavfile
import wave
from pydub import AudioSegment

#to play audio
import IPython.display as ipd

random.seed(123)

### Getting list of genre_names and setting the audio_fpath variable

In [None]:
audio_fpath = "../data/subgenre_dataset/"
genre_names = os.listdir(audio_fpath)
print(genre_names)

### Getting list of file paths for the 3 selected genres

In [None]:
big_room_file_paths = sorted([audio_fpath + 'BigRoom/' + f for f in os.listdir(audio_fpath + 'BigRoom/') if '.wav' in f])
# big_room_file_paths

In [None]:
dnb_file_paths = sorted([audio_fpath + 'DrumAndBass/' + f for f in os.listdir(audio_fpath + 'DrumAndBass/') if '.wav' in f])
# dnb_file_paths

In [None]:
techno_file_paths = sorted([audio_fpath + 'Techno/' + f for f in os.listdir(audio_fpath + 'Techno/') if '.wav' in f])
# techno_file_paths

### Previewing one song from each genre with ipd

In [None]:
# ipd.Audio(big_room_file_paths[0], autoplay=True)

In [None]:
# ipd.Audio(dnb_file_paths[0], autoplay=True)

In [None]:
# ipd.Audio(techno_file_paths[0], autoplay=True)

## Create spectrogram dataset from the audio files

#### Function that converts to spectrogram, saves it to a file with path: ../spectrograms/halves/'genre_name' and plots it in the notebook

In [None]:
def save_two_mel_spectrograms(audio_path):

    # Load audio file
    y, sr = librosa.load(audio_path)

    # Compute Mel spectrogram
    n_fft = 4096            # Increase this to increase fft window size --> Increase resolution. More processing time
    hop_length = 512
    n_mels = 128
    S = librosa.feature.melspectrogram(y, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)

    # Convert to decibels
    S_dB = librosa.power_to_db(S, ref=np.max)

    # Display & Save Mel spectrogram
    window_length = S_dB.shape[1]//2             # Width of image to chop up 5169//4 = 1292
    prev = 0
    for x in range(2):
        plt.figure(figsize=(6, 6))
        librosa.display.specshow(S_dB[:, prev:prev + window_length], sr=sr, hop_length=hop_length, cmap='gray_r')
        prev += window_length
        plt.axis('off')
        plt.tight_layout()
        plt.savefig('../spectrograms/halves/' + audio_path.split('/')[-2] + '/' 
                    + audio_path.split('/')[-1].split('.')[0] + '_half' + str(x+1),  
                    bbox_inches='tight', pad_inches=0, dpi=1200)
    
    # To view colorbar and axes
    # librosa.display.specshow(S_dB, x_axis='time', y_axis='mel', sr=sr, hop_length=hop_length)
    # plt.colorbar(format='%+2.0f dB')
    # plt.title('Mel-frequency spectrogram')
    # plt.show()

# save_two_mel_spectrograms(techno_file_paths[0])


# y, sr = librosa.load(techno_file_paths[0])
# n_fft = 4096            # Increase this to increase fft window size --> Increase resolution. More processing time
# hop_length = 512
# n_mels = 128
# S = librosa.feature.melspectrogram(y, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)
# S_dB = librosa.power_to_db(S, ref=np.max)
# plt.figure(figsize=(6, 6))
# librosa.display.specshow(S_dB, sr=sr, hop_length=hop_length, cmap='gray_r')
# plt.axis('off')
# plt.tight_layout()

#### Loop that creates the image datasets for BigRoom, DrumAndBass, and Techno

In [None]:
start_time = time.time()
for i, (b, d, t) in enumerate(zip(big_room_file_paths, dnb_file_paths, techno_file_paths)):
    iter_start_time = time.time()
    save_two_mel_spectrograms(b)
    save_two_mel_spectrograms(d)
    save_two_mel_spectrograms(t)
    print('Iteration ' + str(i) + ': ' + str(time.time() - iter_start_time))
print(time.time() - start_time)

## Train, Validate, Test Split

In [None]:
# Set the root directory containing the subdirectories for each class
root_dir = '../spectrograms/halves/'

# Create the train, validation, and test directories
train_dir = root_dir + 'train/'
val_dir = root_dir + 'validate/'
test_dir = root_dir + 'test/'

In [None]:
###### DO NOT RE-RUN WITHOUT DELETING PREVIOUS DIRECTORIES ######


# Set the names of the subdirectories for each class
class_names = ['BigRoom', 'DrumAndBass', 'Techno']

# Set the proportion of images to use for train, validation, and test
train_prop = 0.8
val_prop = 0.1
test_prop = 0.1

os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Loop over each class
for class_name in class_names:
    # Create subdirectories for each class in the train, validation, and test directories
    os.makedirs(os.path.join(train_dir, class_name), exist_ok=True)
    os.makedirs(os.path.join(val_dir, class_name), exist_ok=True)
    os.makedirs(os.path.join(test_dir, class_name), exist_ok=True)

    # Get the list of image filenames for the current class
    image_filenames = os.listdir(os.path.join(root_dir, class_name))

#     # Shuffle the list of image filenames - NOT SHUFFLING FOR NOW - USE SEED IF SO 
    
    random.shuffle(image_filenames)

    # Split the image filenames into train, validation, and test sets
    num_images = len(image_filenames)
    num_train = int(num_images * train_prop)
    num_val = int(num_images * val_prop)
    num_test = int(num_images * test_prop)

    train_filenames = image_filenames[:num_train]
    val_filenames = image_filenames[num_train:num_train+num_val]
    test_filenames = image_filenames[num_train+num_val:]

    # Copy the images to the appropriate train, validation, or test directory
    for filename in train_filenames:
        src_path = os.path.join(root_dir, class_name, filename)
        dest_path = os.path.join(train_dir, class_name, filename)
        shutil.copy(src_path, dest_path)

    for filename in val_filenames:
        src_path = os.path.join(root_dir, class_name, filename)
        dest_path = os.path.join(val_dir, class_name, filename)
        shutil.copy(src_path, dest_path)

    for filename in test_filenames:
        src_path = os.path.join(root_dir, class_name, filename)
        dest_path = os.path.join(test_dir, class_name, filename)
        shutil.copy(src_path, dest_path)


# Increasing dataset size

Methodology
- Chopping up a song into pieces
- Maybe different window for Fourier transforms? Do research on this
- Just the drops?