In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import audioread

DATA_PREFIX = 'data/raw/'
GTZAN_PREFIX = DATA_PREFIX + 'genres/'
GTZAN_SUFFIX = '.au'
KEY_FILE_PREFIX = DATA_PREFIX + 'gtzan_key-master/gtzan_key/genres/'
KEY_FILE_SUFFIX = '.lerch.txt'

STRING_MAP = ['A\tmajor',
             'A#\tmajor',
             'B\tmajor',
             'C\tmajor',
             'C#\tmajor',
             'D\tmajor',
             'D#\tmajor',
             'E\tmajor',
             'F\tmajor',
             'F#\tmajor',
             'G\tmajor',
             'G#\tmajor',
             'A\tminor',
             'A#\tminor',
             'B\tminor',
             'C\tminor',
             'C#\tminor',
             'D\tminor',
             'D#\tminor',
             'E\tminor',
             'F\tminor',
             'F#\tminor',
             'G\tminor',
             'G#\tminor']


In [2]:
def get_vector_from_key(key):
    vector = np.zeros(24)
    if key < 12: #major
        vector[key] = 1
        vector[(key + 7) % 12] = 0.5
        vector[(key + 9) % 12 + 12] = 0.3
        vector[key + 12] = 0.2
    else: # minor
        vector[key] = 1
        vector[(key + 7) % 12 + 12] = 0.5
        vector[(key + 3) % 12] = 0.3
        vector[key - 12] = 0.2
        
    vector /= 2
    
    return vector

In [3]:
def get_string_from_vector(vector):
    return STRING_MAP[np.argmax(vector)]

In [4]:
def generate_one_hot_guess(vector):
    return np.argmax(vector)

In [5]:
def read_audio(infile):
    # Input: file name (including path) to read as audio, any format, probably
    # Output: fs, audio_data
    #        fs = sample rate, in Hz
    #        audio_data = numpy array containing each sample's value as a 16-bit signed integer
    with audioread.audio_open(infile) as f:
        fs = f.samplerate
        audio_data = []
        for buf in f:
            audio_data.extend(np.frombuffer(buf, dtype='<i2'))
        audio_data = np.array(audio_data)
        return fs, audio_data

In [6]:
def read_data(file):
    # Input: file name (relative to 'genres') directory, do read data from
    # Output: fs, audio_data, y
    #        fs = sample rate, in Hz *FROM read_audio method, above.
    #        audio_data = numpy array containing each sample's value as a 16-bit signed integer *FROM read_audio method, above
    #        y = normalized ground truth scoring vector for the given file *FROM get_vector_from_key method, above.
    y = get_vector_from_key(int(open(KEY_FILE_PREFIX + file + KEY_FILE_SUFFIX, 'r').read()))
    fs, audio_data = read_audio(GTZAN_PREFIX + file + GTZAN_SUFFIX)
    return fs, audio_data, y

In [7]:
# EXAMPLE:
fs, audio_data, y = read_data('blues/blues.00000')

In [8]:
fs

22050

In [9]:
audio_data

array([  240,   544,   250, ..., -1822, -2001, -2103], dtype=int16)

In [10]:
y

array([0.  , 0.15, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.1 ,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.25, 0.  , 0.  , 0.  , 0.  ,
       0.5 , 0.  ])