In [21]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import os
from glob import glob

DATA_PREFIX = 'data/raw/'
GTZAN_PREFIX = DATA_PREFIX + 'genres/'
GTZAN_SUFFIX = '.au'
KEY_FILE_PREFIX = DATA_PREFIX + 'gtzan_key-master/gtzan_key/genres/'
KEY_FILE_SUFFIX = '.lerch.txt'

FS = 22050
LENGTH_GTZAN = 30
NUM_SAMPLES_GTZAN = FS_GTZAN * LENGTH_GTZAN

GENRES = ['country', 'pop', 'hiphop', 'reggae', 'classical', 'jazz', 'rock', 'blues', 'disco', 'metal']
GENRE_SIZES = [99, 94, 81, 97, 0, 79, 98, 98, 98, 93]
TOTAL_SIZE = np.sum(GENRE_SIZES)

STRING_MAP = ['A\tmajor',
             'A#\tmajor',
             'B\tmajor',
             'C\tmajor',
             'C#\tmajor',
             'D\tmajor',
             'D#\tmajor',
             'E\tmajor',
             'F\tmajor',
             'F#\tmajor',
             'G\tmajor',
             'G#\tmajor',
             'A\tminor',
             'A#\tminor',
             'B\tminor',
             'C\tminor',
             'C#\tminor',
             'D\tminor',
             'D#\tminor',
             'E\tminor',
             'F\tminor',
             'F#\tminor',
             'G\tminor',
             'G#\tminor']


In [22]:
def get_vector_from_key(key):
    vector = np.zeros(24)
    if key == -1: # Unknown
        return vector
    if key < 12: #major
        vector[key] = 1
        vector[(key + 7) % 12] = 0.5
        vector[(key + 9) % 12 + 12] = 0.3
        vector[key + 12] = 0.2
    else: # minor
        vector[key] = 1
        vector[(key + 7) % 12 + 12] = 0.5
        vector[(key + 3) % 12] = 0.3
        vector[key - 12] = 0.2
        
    vector /= 2
    
    return vector

In [23]:
def get_string_from_vector(vector):
    return STRING_MAP[np.argmax(vector)]

In [24]:
def generate_one_hot_guess(vector):
    return np.argmax(vector)

In [73]:
def read_data(file):
    # Input: file name (relative to 'genres') directory, do read data from
    # Output: audio_data, y
    #        audio_data = numpy array containing each sample's value as a float vector
    #        y = normalized ground truth scoring vector for the given file *FROM get_vector_from_key method, above.
    y = get_vector_from_key(int(open(KEY_FILE_PREFIX + file + KEY_FILE_SUFFIX, 'r').read()))
    y = np.reshape(y, (24, 1))
    audio_data, _ = librosa.core.load(GTZAN_PREFIX + file + GTZAN_SUFFIX, sr=FS, mono=True)
    audio_data = np.array(audio_data)
    audio_data = np.reshape(audio_data, (len(audio_data), 1))
    return audio_data, y

In [72]:
def cut_or_pad_to_length(vector, length):
    if vector.shape[0] != length:
        length_to_save = min(length, vector.shape[0])
        new_vector = np.zeros((length, 1))
        new_vector[:length_to_save, 0] = vector[:length_to_save, 0]
        return new_vector
    
    return vector

In [75]:
def load_all_data(directory):
    # Read all music files, and return them in arrays.
    # Output: audio_data, keys
    #        X = [num_samples, num_files] size matrix, containing the audio data, cut or padded with 0s to 30 seconds in length
    #        Y = [24, num_files] size matrix, containing the key vect
    file_list = [y for x in os.walk(directory) for y in glob(os.path.join(x[0], '*' + GTZAN_SUFFIX))]
    
    X = np.zeros((NUM_SAMPLES_GTZAN, 0))
    Y = np.zeros((24, 0))
    
    file_num = 0
    for file in file_list:
        if file_num % 10 == 0:
            print('Loading File ' + str(file_num) + '/' + str(len(file_list)))
        file_num += 1
        
        _, file_name = file.split('genres/')
        audio_data1, y1 = read_data(file_name[:-len(GTZAN_SUFFIX)])
        
        if np.sum(y1) == 0:
            print('WARNING: key unknown/modulation, skipping: file=' + file_name)
            continue
        
        audio_data1 = cut_or_pad_to_length(audio_data1, NUM_SAMPLES_GTZAN)
        
        X = np.append(X, audio_data1, axis=1)
        
        Y = np.append(Y, y1, axis=1)
        
    return X, Y
        

In [76]:
def write_np_data(X, Y, prefix):
    np.savez_compressed('data/working/' + prefix + '.npz', X=X, Y=Y)

In [77]:
def load_np_data(prefix):
    loaded = np.load('data/working/' + prefix + '.npz')
    
    return loaded['X'], loaded['Y']

In [83]:
def process_data_into_np_files():
    for genre in GENRES:
        print(genre)
        X, Y = load_all_data('data/raw/genres/' + genre)
        write_np_data(X, Y, genre)

In [84]:
def get_genre_and_song_idx_given_idx(idx, inclusive=True):
    if idx >= TOTAL_SIZE:
        if inclusive:
            return len(GENRE_SIZES) - 1, GENRE_SIZES[-1] - 1
        else:
            return len(GENRE_SIZES) - 1, GENRE_SIZES[-1]
        
    if idx <= 0:
        return 0, 0
    
    if not inclusive:
        genre_idx, song_idx = get_genre_and_song_idx_given_idx(idx - 1)
        return genre_idx, song_idx + 1
    
    for genre_idx in range(len(GENRES)):
        genre_size = GENRE_SIZES[genre_idx]
        genre_start_idx = int(np.sum(GENRE_SIZES[:genre_idx]))
        
        if genre_start_idx + genre_size > idx:
            return genre_idx, idx - genre_start_idx
    
    return len(GENRE_SIZES) - 1, GENRE_SIZES[-1]

In [85]:
def load_from_range(from_idx, to_idx):
    # Load the sample from a given index (inclusive) to a given index (exclusive)
    from_genre_idx, from_song_idx = get_genre_and_song_idx_given_idx(from_idx)
    to_genre_idx, to_song_idx = get_genre_and_song_idx_given_idx(to_idx, inclusive=False)
    
    #print('loading from ' + str((from_genre_idx, from_song_idx)) + ' to ' + str((to_genre_idx, to_song_idx)))
    
    X = np.zeros((NUM_SAMPLES_GTZAN, 0))
    Y = np.zeros((24, 0))
    
    for genre_idx in range(from_genre_idx, to_genre_idx + 1):
        genre_X, genre_Y = load_np_data(GENRES[genre_idx])
        
        to = genre_X.shape[1]
        if to_genre_idx == genre_idx:
            to = to_song_idx
        
        X = np.append(X, genre_X[:, from_song_idx:to], axis=1)
        Y = np.append(Y, genre_Y[:, from_song_idx:to], axis=1)
        
        from_song_idx = 0
    
    return X, Y

In [86]:
def load_song_by_idx(idx):
    return load_from_range(idx, idx + 1)

In [88]:
process_data_into_np_files()

country
Loading File 0/100
Loading File 10/100
Loading File 20/100
Loading File 30/100
Loading File 40/100
Loading File 50/100
Loading File 60/100
Loading File 70/100
Loading File 80/100
Loading File 90/100
pop
Loading File 0/100
Loading File 10/100
Loading File 20/100
Loading File 30/100
Loading File 40/100
Loading File 50/100
Loading File 60/100
Loading File 70/100
Loading File 80/100
Loading File 90/100
hiphop
Loading File 0/100
Loading File 10/100
Loading File 20/100
Loading File 30/100
Loading File 40/100
Loading File 50/100
Loading File 60/100
Loading File 70/100
Loading File 80/100
Loading File 90/100
reggae
Loading File 0/100
Loading File 10/100
Loading File 20/100
Loading File 30/100
Loading File 40/100
Loading File 50/100
Loading File 60/100
Loading File 70/100
Loading File 80/100
Loading File 90/100
classical
Loading File 0/100
Loading File 10/100
Loading File 20/100
Loading File 30/100
Loading File 40/100
Loading File 50/100
Loading File 60/100


Loading File 70/100
Loading File 80/100
Loading File 90/100
jazz
Loading File 0/100
Loading File 10/100
Loading File 20/100
Loading File 30/100
Loading File 40/100
Loading File 50/100
Loading File 60/100
Loading File 70/100
Loading File 80/100
Loading File 90/100
rock
Loading File 0/100
Loading File 10/100
Loading File 20/100
Loading File 30/100
Loading File 40/100
Loading File 50/100
Loading File 60/100
Loading File 70/100
Loading File 80/100
Loading File 90/100
blues
Loading File 0/100
Loading File 10/100
Loading File 20/100
Loading File 30/100
Loading File 40/100
Loading File 50/100
Loading File 60/100
Loading File 70/100
Loading File 80/100
Loading File 90/100
disco
Loading File 0/100
Loading File 10/100
Loading File 20/100
Loading File 30/100
Loading File 40/100
Loading File 50/100
Loading File 60/100
Loading File 70/100
Loading File 80/100
Loading File 90/100
metal
Loading File 0/100
Loading File 10/100
Loading File 20/100
Loading File 30/100
Loading File 40/100
Loading File 50/1