In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import fastai
fastai.__version__

'1.0.37'

In [3]:
from fastai import *
from fastai_audio import *

In [4]:
import librosa
from utils import read_file
from tqdm import tqdm_notebook as tqdm

In [5]:
DATA = Path('data')
NSYNTH_AUDIO = DATA/'nsynth_audio' # contains train and valid folders

In [6]:
LABEL_DIR = Path('tmp_labels')
GUITAR_TRN = LABEL_DIR/'train_guitar_clean_40_88.csv'
GUITAR_VAL = LABEL_DIR/'valid_guitar_clean_40_88.csv'
GUITAR_TST = LABEL_DIR/'test_guitar_clean_40_88.csv'

In [7]:
trn_df, val_df, tst_df = [pd.read_csv(CSV) for CSV in [GUITAR_TRN, GUITAR_VAL, GUITAR_TST]]
trn_df.head(2)

Unnamed: 0,note_str,pitch
0,guitar_acoustic_001-082-050,82
1,guitar_electronic_035-062-127,62


In [8]:
len(trn_df), len(val_df), len(tst_df)

(16915, 1112, 347)

In [9]:
sr = 16000

In [10]:
def load_trim_split(df, path, suffix='.wav', max_len=32000, top_db=50, frame_len=1024):
    rows = []
    for _, row in tqdm(df.iterrows(), total=len(df)):
        filename = row['note_str'] + '.wav'
        x, _ = read_file(filename, path=path)
        x = x[:max_len]
        x = librosa.effects.trim(x, ref=1.0, top_db=top_db)[0]
        
        n_frames = x.shape[0] // frame_len
        frames = [x[n*frame_len:(n+1)*frame_len] 
                  for n in range(n_frames)]
        
        for frame in frames:
            if frame.shape[0] == frame_len:
                rows.append((row['note_str'], row['pitch'], frame))

    return pd.DataFrame(rows, columns=['note_str', 'pitch', 'frame'])


In [None]:
trn_frames = load_trim_split(trn_df, NSYNTH_AUDIO/'train', max_len=sr*2)

In [None]:
val_frames = load_trim_split(val_df, NSYNTH_AUDIO/'valid', max_len=sr*2)

In [None]:
tst_frames = load_trim_split(tst_df, NSYNTH_AUDIO/'test', max_len=sr*2)

In [None]:
trn_frames.shape, val_frames.shape, tst_frames.shape

In [None]:
trn_frames_np = np.stack(trn_frames['frame'].values)
np.save('trn_frames.npy', trn_frames_np)

In [None]:
val_frames_np = np.stack(val_frames['frame'].values)
np.save('val_frames.npy', val_frames_np)

In [None]:
tst_frames_np = np.stack(tst_frames['frame'].values)
np.save('tst_frames.npy', tst_frames_np)

In [None]:
def save_frames_df(df, name):
    frames_np = np.stack(df['frame'].values)
    np.save(name + '.npy', frames_np)
    df[['note_str', 'pitch']].to_csv(name + '.csv', index=False)

In [None]:
save_frames_df(trn_frames, 'trn_frames_1024')

In [None]:
save_frames_df(val_frames, 'val_frames_1024')

In [None]:
save_frames_df(tst_frames, 'tst_frames_1024')

In [75]:
def load_frames_df(name):
    df = pd.read_csv(name + '.csv')
    frames_np = np.load(name + '.npy')
    list_frames = np.array_split(frames_np, frames_np.shape[0])
    df['frame'] = pd.Series(map(np.squeeze, list_frames))
    return df

In [51]:
frames_np = np.load('val_frames_1024.npy')

In [None]:
s = pd.Series(np.array_split(frames_np, frames_np.shape[0]))

In [59]:
fnp = frames_np[:4, :12]; fnp.shape

(4, 12)

In [60]:
s = pd.Series(np.array_split(fnp, fnp.shape[0]))

In [67]:
a = np.array_split(fnp, fnp.shape[0]); type(a), a[0].shape

(list, (1, 12))

In [71]:
a = list(map(np.squeeze, a))

In [72]:
a[0].shape

(12,)

In [69]:
np.squeeze(a[0]).shape

(12,)

In [76]:
trn_frames = load_frames_df('trn_frames_1024')
val_frames = load_frames_df('val_frames_1024')
tst_frames = load_frames_df('tst_frames_1024')

In [77]:
trn_list = AudioFrameList.from_df(trn_frames, NSYNTH_AUDIO, cols=2)
val_list = AudioFrameList.from_df(val_frames, NSYNTH_AUDIO, cols=2)

In [78]:
to_db = ToDecibels(ref=1.0, top_db=50.0)
window = to_device(torch.hann_window(1024).unsqueeze_(0), None)

def batch_fft(inputs):
    xs, ys = inputs
    xs = torch.rfft(xs * window, 1, normalized=True).pow_(2.0).sum(-1)
    xs = to_db(xs)
    return xs, ys

batch_tfms = [batch_fft]

In [79]:
bs = 32
data = (ItemLists(NSYNTH_AUDIO, trn_list, val_list)
            .label_from_df('pitch')
            .databunch(bs=bs, tfms=batch_tfms))
xs, ys = data.one_batch()
xs.shape, ys.shape

(torch.Size([32, 513]), torch.Size([32]))