# Demonstration of training process

import packages

In [1]:
import os 
os.environ['CUDA_VISIBLE_DEVICES']="2,3"


In [2]:
import sys
import os
import math
import time
import glob
import datetime
import random
import pickle
import json
import numpy as np
from tqdm.notebook import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.nn.utils import clip_grad_norm_
from torch.utils.data import Dataset, DataLoader
from main import *

In [4]:
MODE = 'train' 

###--- data ---###
path_data_root = './'
path_train_data = os.path.join(path_data_root, 'train.npz')
path_test_data = os.path.join(path_data_root, 'test.npz')
path_dictionary =  os.path.join(path_data_root, 'dictionary.pkl')

###--- training config ---###
path_exp = './exp'
batch_size = 64
init_lr = 0.0005
max_grad_norm = 3
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 


Load data


In [5]:
def get_train_data():
  dictionary = pickle.load(open(path_dictionary, 'rb'))
  event2word, word2event = dictionary
  train_data = np.load(path_train_data,allow_pickle=True)
  return train_data, event2word, word2event, dictionary
def get_test_data():
  dictionary = pickle.load(open(path_dictionary, 'rb'))
  event2word, word2event = dictionary
  test_data = np.load(path_test_data,allow_pickle=True)
  return test_data, event2word, word2event, dictionary

In [6]:
# Load data
train_data, event2word, word2event, dictionary = get_train_data()


# config
n_class = []
for key in event2word.keys():
    n_class.append(len(dictionary[0][key]))

# log
print('num of classes:', n_class)

# unpack
train_x = train_data['x']
train_y = train_data['y']
train_mask = train_data['mask']
train_label = train_data['label']
factor=np.load('factor.npy')  # repetition learning matrix

# run
start_time = time.time()
#tempo chord barbeat type pitch duration velocity ins   

num of classes: [56, 135, 18, 3, 87, 18, 25]


In [7]:
from collections import Counter
Counter(train_label)

Counter({2: 73774, 4: 287070, 3: 152060, 1: 27852, 0: 21807})

Load model

In [8]:
net = TransformerModel(n_class)
# info_load_model = ("./exp/",'60')
# load model
if info_load_model:
    path_ckpt = info_load_model[0] # path to ckpt dir
    loss = info_load_model[1] # loss
    name = 'loss_' + str(loss)
    path_saved_ckpt = os.path.join(path_ckpt, name + '_params.pt')
    net.load_state_dict(torch.load(path_saved_ckpt),False)
        
# # init
net= nn.DataParallel(net)
net.to(device)
net.train()
n_parameters = network_paras(net)
print('n_parameters: {:,}'.format(n_parameters))



# # optimizers
optimizer = optim.Adam(net.parameters(), lr=init_lr)




>>>>>: [56, 135, 18, 3, 87, 18, 25]
n_parameters: 8,627,611


Train

In [10]:
num_batch = len(train_x) // batch_size
print('num_batch:', num_batch,'\ntrain_x:', train_x.shape,'\ntrain_y:', train_y.shape,'\ntrain_mask:', train_mask.shape)

n_epoch = 1
start_time = time.time()
for epoch in range(n_epoch):
    acc_loss = 0
    acc_losses = np.zeros(len(n_class)+1)
    with tqdm(range(num_batch)) as bar:
        for bidx in range(num_batch): # num_batch 
              # index
            bidx_st = batch_size * bidx
            bidx_ed = batch_size * (bidx + 1)
              # unpack batch data
            batch_x = train_x[bidx_st:bidx_ed]
            batch_y = train_y[bidx_st:bidx_ed]
            batch_mask = train_mask[bidx_st:bidx_ed]
            batch_label= train_label[bidx_st:bidx_ed]
            batch_mask1= factor[bidx_st:bidx_ed]  # repetition learning matrix
            batch_x = torch.from_numpy(batch_x).long().to(device)
            batch_y = torch.from_numpy(batch_y).long().to(device)
            batch_label = torch.from_numpy(batch_label).long().reshape(len(batch_label),1).to(device)
            batch_mask = torch.from_numpy(batch_mask).float().to(device)
            batch_mask1 = torch.from_numpy(batch_mask1).float().to(device)

            if isinstance(net, torch.nn.DataParallel):
                net = net.module
          # run
            losses = net.train_step(batch_x, batch_y, batch_mask,batch_mask1,batch_label)
            loss_1 = (losses[0] + losses[1] + losses[2] + losses[3] + losses[4] + losses[5] + losses[6]) / 7
            loss_2=losses[7]
            loss= (loss_1+loss_2)/2
          # Update
            net.zero_grad()
            loss.backward()
            if max_grad_norm is not None:
                clip_grad_norm_(net.parameters(), max_grad_norm)
            optimizer.step()
          # print
            sys.stdout.write('{}/{} | Loss: {:06f} | {:04f}, {:04f}, {:04f}, {:04f}, {:04f}, {:04f}, {:04f}, {:04f}\r'.format(
                bidx, num_batch, loss, losses[0], losses[1], losses[2], losses[3], losses[4], losses[5], losses[6],losses[7]))
            sys.stdout.flush()
            bar.update()
          # acc
            acc_losses += np.array([l.item() for l in losses])
            acc_loss += loss.item()

    # epoch loss
    runtime = time.time() - start_time
    epoch_loss = acc_loss / num_batch
    acc_losses = acc_losses / num_batch
    print('------------------------------------')
    print('epoch: {}/{} | Loss: {} | time: {}'.format(
        epoch+1, n_epoch, epoch_loss, str(datetime.timedelta(seconds=runtime))))
    fn = int(epoch_loss * 10) * 10
    torch.save(net.state_dict(), os.path.join('./', 'loss_' + str(fn)+'_params.pt'))
    
    


num_batch: 8790 
train_x: (562563, 120, 7) 
train_y: (562563, 120, 7) 
train_mask: (562563, 120)


  0%|          | 0/8790 [00:00<?, ?it/s]

------------------------------------4, 0.392686, 0.250256, 0.643726, 1.800636, 2.634145, 1.549636, 0.213727
epoch: 1/1 | Loss: 0.7842369870317674 | time: 0:08:35.870620


Calculate repetition learning matrix (based on your dataset).

In [11]:
# a=4
# b=2
# c=1
# label_type_matrix=np.float32([[c,c,c,c,a,c,c],
#      [c,c,c,c,a,c,c], 
#      [c,c,b,c,a,b,b], 
#      [c,c,b,c,a,b,b], 
#      [c,c,b,c,a,b,b]])

# factor=np.ones((train_x.shape[0],train_x.shape[1],train_x.shape[2]))
# for n in range(len(train_x)):
#     for k in range(7):
#         count=Counter(train_x[n][:,k])
#         for j in range(len(train_x[n])):
#             if train_x[n][j][k] !=0:
#                 factor[n,j,k] = label_type_matrix[train_label[n]][k]*(1+count[train_x[n][j][k]]/(len(train_x[n])-count[0]))
                
# np.save('factor.npy',factor)