In [1]:
# %matplotlib notebook
%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt
import torch, torchvision
import torch.nn as nn
import pickle

if torch.cuda.is_available():
    computing_device = torch.device("cuda")
else:
    computing_device = torch.device("cpu")
    
# hack: change the default (master) device
torch.cuda.set_device(2)

reader = wave.open('feather.wav', 'rb')
print(reader.getframerate(), reader.getsampwidth(), reader.getnframes())
nframes = reader.getnframes()
d = np.empty(nframes)
d = np.frombuffer(reader.readframes(nframes), dtype=np.float32, count=nframes)

fs, data = wavfile.read('feather.wav')
print(data.shape, d.shape)

In [2]:
# # dataloader inits
# from util import *

# chunk_size = 20
# window_size = 2048
# window_overlap = 1024
# batch_size = 1

# train_files = ['feather.wav'] 
# # train_files = ['data/05_8K.wav']#, 'data/02.wav', 'data/03.wav']#, 'data/04_8K.wav', 'data/05_8K.wav']
# val_files = ['data/01_8K.wav']

# train_gen = DataGenerator(train_files, chunk_size, window_size, 
#                           window_overlap, batch_size, vocoder=True)
# val_gen = None

In [3]:
# import maestro
import pandas as pd
from sklearn.utils import shuffle 

maestro_root = "/imgne/maestro-v1.0.0/"

meta_df_orig = pd.read_csv(maestro_root + "maestro-v1.0.0.csv")
meta_df_orig["audio_filename"] = meta_df_orig["audio_filename"].apply(lambda x: maestro_root + x)
meta_df = meta_df_orig[meta_df_orig["year"] == 2017]

shuffle(meta_df)

train_files = meta_df[meta_df["split"] == "train"]["audio_filename"].values
val_files = meta_df[meta_df["split"] == "validation"]["audio_filename"].values
test_files = meta_df[meta_df["split"] == "test"]["audio_filename"].values

print(train_files[0])

/imgne/maestro-v1.0.0/2017/MIDI-Unprocessed_066_PIANO066_MID--AUDIO-split_07-07-17_Piano-e_3-02_wav--3.wav


In [4]:
# dataloader init

from util import *

chunk_size = 20
window_size = 2048
window_overlap = 1024
batch_size = 8

train_val_data_file = "data/maestro2017_train_val_pv.pkl"
# test_files = ['data/05_8K.wav']#, 'data/02_8K.wav', 'data/03_8K.wav', 'data/04_8K.wav', 'data/05_8K.wav']
# test_files = in_files

if os.path.exists(train_val_data_file): 
    train_gen, val_gen = pickle.load(open(train_val_data_file, 'rb'))
else:
    train_gen = DataGenerator(train_files, chunk_size, window_size, window_overlap, batch_size, vocoder=True)
    val_gen = DataGenerator(val_files, chunk_size, window_size, window_overlap, batch_size, vocoder=True)
    pickle.dump((train_gen, val_gen), open(train_val_data_file, 'wb'))

In [5]:
# model setup

from lstm import *

input_dim = train_gen.X_list[0].shape[2] #TODO
hidden_dim = 4096
num_layers = 1
print(input_dim)

# model = LSTMBasic(input_dim, hidden_dim, num_layers=num_layers, batch_size=batch_size)
# model = LSTMFC(input_dim, hidden_dim, hidden_dim, num_layers=num_layers, batch_size=batch_size, dropout_p=0.2)
model = LSTMCNN(input_dim, hidden_dim, num_layers=num_layers, batch_size=batch_size, decoder="2fc", dropout_p=0.5)

model = model.to(computing_device)
criterion = nn.MSELoss().to(computing_device)
dp = nn.DataParallel(model, dim=1, device_ids=[2,3]).to(computing_device)
m = dp.module
optimizer = torch.optim.Adam(dp.parameters(), lr=0.001)

2048


In [6]:
# trainer setup

from lstm_trainer import *
trainer = LSTMTrainer(dp, criterion, optimizer, session_name="pv_dropout")

Using cuda


In [7]:
# load trained model? 

load_model = False

if load_model:
    epochs_trained = 1000
    model_file = "models/pv_dropout/cs{}_h{}_e{}.ckpt".format(chunk_size, hidden_dim, epochs_trained)
#     model_file = "models/v_cs20_h2048_e4000.ckpt"
    print("Loading model: {}".format(model_file))
    trainer.load_model(model_file, epochs_trained)

In [None]:
%%time

# training

train_model = True
iter_epochs = 100
iters = 50
dump_epochs = 10

if train_model:
    
#     fig = plt.figure(figsize=(6,3))
#     ax = fig.add_subplot(1,1,1)
#     fig.show(); fig.canvas.draw()
    
    # train a series of models at different numbers of epochs
    curr_train_losses, curr_val_losses = [], []
    for i in range(iters):

        train_loss, val_loss = trainer.train(train_gen, val_gen, iter_epochs, 1,
                                             dump_model=True, dump_epochs=dump_epochs, dump_loss=True)
        curr_train_losses += train_loss  # train_loss is a 2D python list
        curr_val_losses += val_loss
        
        # plot loss curve
#         ax.clear()
#         ax.plot(np.array(curr_train_losses).mean(axis=1))
#         fig.canvas.draw()
        
print()

[Train] Epoch 0039 | MIDI-Unprocessed_053 | Chunk 027 [########################------]078.8% | T+5:30:18.737808

In [None]:
# import pickle
# import numpy as np

# train_loss = []
# val_loss = []
# for i in range(80):
#     t,v = pickle.load(open("models/model_h150_e{}.ckpt.loss.pkl".format((i+1)*10), 'rb'))
#     train_loss += [t]
#     val_loss += [v]
# plt.plot(np.average(np.array(train_loss).reshape((800,3799)), axis=1))
# plt.plot(np.average(np.array(val_loss).reshape((800,674)), axis=1))

# misc. tests below

In [None]:
raise Exception("STOP") # dirty way to stop the notebook

In [None]:
chunk_size = 20
window_size = 2048
#window_overlap = 1023
batch_size = 1

train_files = ['feather.wav'] 
#, 'data/02_8K.wav', 'data/03_8K.wav', 'data/04_8K.wav', 'data/05_8K.wav']

test_gen = DataGenerator(train_files, chunk_size, window_size, window_overlap, batch_size)

In [None]:
fname, X, T = test_gen[0]
X.shape

In [None]:
m.eval()

with torch.no_grad():

    m.curr_state = m.init_hidden()

    eval_output = []
    cell_states = []
    hidden_states = []
    
    # prime the model with 30 seconds of input
    primer = X[:600].to(computing_device)
    for i in range(600):
        out, states = m(primer[i:i+1], m.curr_state)
        eval_output += [out.cpu().numpy()]
        cell_states += [states[0].cpu().numpy()]
        hidden_states += [states[1].cpu().numpy()]

    # start generation 
    for i in range(200):
        print("{}/{}".format(i+1, 200), end='\r')
        out, states = m(out, m.curr_state)
        eval_output += [out.cpu().numpy()]
        cell_states += [states[0].cpu().numpy()]
        hidden_states += [states[1].cpu().numpy()]

In [None]:
eo = torch.FloatTensor(eval_output)[:, 0]
eo.shape

In [None]:
#fname, X, T = train_gen[0]
#print(X.shape)
#print(X[0:chunk_size].shape)
#chunks = torch.tensor(e[i*chunk_size*10:(i+1)*chunk_size*10])
#print(chunks.shape)
print(eo.min(), eo.max())
t,x = train_gen.reassemble_istft(eo[400:, :1])

In [None]:
import matplotlib.pyplot as plt

fs = 8000

plt.specgram(x[:1000], Fs=fs, NFFT=1024, noverlap=window_overlap)
plt.show()

In [None]:
t = np.load('biiig_test.npy')
#t = np.load('feather_stft.npy')

print(t.shape)
frames = np.fft.irfft(t).real
print(frames.shape)

In [None]:
wavy = []
for x in frames:
    # apply hanning window to frame
    print(x.shape)
    time = np.arange(window_size)
    hanning = 0.5 * (1 - np.cos(2 * np.pi * time / window_size))
    x *= hanning
    
    
    #np.clip(buffer, -1, 1, out=buffer)

    #n = buffer.shape[1]
    #frames = (buffer.T.reshape((-1,)) * 32676).astype(np.int16).tobytes()
    #self._writer.writeframes(frames) lmao

In [None]:
wavfile.write("test2.wav", fs, x)

In [None]:
cell_states = np.array(cell_states)[:, 0, 0]
hidden_states = np.array(hidden_states)[:, 0, 0]

In [None]:
plt.figure(figsize=(15,15))
plt.imshow(eo[:,0].transpose(0,1), cmap='gray')
plt.show()
plt.figure(figsize=(15,15))
plt.imshow(cell_states.transpose(), cmap='gray')
plt.show()
X.max()