In [1]:
import glob
import pickle

import scipy.io
import scipy.signal
import librosa
import numpy as np

In [2]:
files = glob.glob('data/raw/*wav')

In [3]:
files

['data/raw/10 Cheye.wav',
 'data/raw/04 Resol.wav',
 'data/raw/06 Jammi.wav',
 'data/raw/03 Death.wav',
 'data/raw/08 Magnu.wav',
 'data/raw/06 Stars.wav',
 'data/raw/12 Snake.wav',
 "data/raw/21 Let's.wav",
 'data/raw/10 My De.wav',
 'data/raw/15 Rocki.wav',
 'data/raw/06 I Use.wav',
 "data/raw/25 Don't.wav",
 'data/raw/04 I Was.wav',
 'data/raw/04 Learn.wav',
 'data/raw/11 Lucky.wav',
 'data/raw/12 Treas.wav',
 'data/raw/04 U Got.wav',
 'data/raw/22 Caref.wav',
 'data/raw/11 Macau.wav',
 'data/raw/02 Anody.wav',
 'data/raw/02 Astro.wav',
 'data/raw/17 Your .wav',
 'data/raw/Intro - .wav',
 'data/raw/08 Allig.wav',
 'data/raw/02 Win.wav',
 'data/raw/06 Rap G.wav',
 'data/raw/05 Foxey.wav',
 'data/raw/07 Bad O.wav',
 'data/raw/07 Ripco.wav',
 'data/raw/02 Circu.wav',
 'data/raw/05 Firew.wav',
 'data/raw/12 Dodo .wav',
 'data/raw/07 The N.wav',
 'data/raw/09 UR.wav',
 'data/raw/10 I Wil.wav',
 'data/raw/05 Highw.wav',
 'data/raw/13 Play .wav',
 "data/raw/08 Goin'.wav",
 'data/raw/10 In 

In [23]:
WINDOW_LENGTH = 3  # in seconds
DESIRED_SAMPLE_RATE = 44100
HOP_LENGTH = 2048
N_CLIPS = 20
OUTPUT_DIR = '/home/dante_gates/repos/music-rec/data/train'

In [29]:
import random
import os

def read_wav(filename):
    """Read wav file and return the sample rate and audio."""
    sr, audio = scipy.io.wavfile.read(filename)
    return _process_audio(sr, audio)

def _process_audio(sr, audio):
    if sr < DESIRED_SAMPLE_RATE:
        raise ValueError('audio has a sample rate less than %s'
                         % DESIRED_SAMPLE_RATE)
    return sr, audio

# TODO: not doing anything with right channel now.
def make_features(sr, audio):
    n_samples = audio.shape[0]
    L, R = audio[:, 0], audio[:, 1]
    window = sr * WINDOW_LENGTH
    samples = random.sample(list(range(n_samples - window)), k=N_CLIPS)
    melspecs = []
    for clip_begin in samples:
        clip = L[clip_begin:clip_begin+window]
        melspec = librosa.feature.melspectrogram(clip, sr, hop_length=HOP_LENGTH)
        melspecs.append(np.reshape(melspec, -1))
    return melspecs

_expected_shape = (8320,)  # for sanity check
def create_training_data(files):
    for file in files:
        try:
            sr, audio = read_wav(file)
        except ValueError as e:
            print(file, e)
        else:
            features = make_features(sr, audio)
            basename = os.path.basename(file)
            for i, feature in enumerate(features, start=1):
                if feature.shape == _expected_shape:
                    saveto = '%s - sample %s.npy' % (basename, i)
                    saveto = os.path.join(OUTPUT_DIR, saveto)
                    np.save(saveto, feature)
                else:
                    print('%s did not have expected shape' % file)

In [None]:
create_training_data(files)