In [1]:
import json
import torch
from gmm_model import *
import os
from sklearn.model_selection import train_test_split
from ptb_v2 import *
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pretty_midi
from IPython.display import Audio
from tqdm import tqdm
from polyphonic_event_based_v2 import *
from collections import Counter
import matplotlib.pyplot as plt
from adversarial_test import *
from polyphonic_event_based_v2 import parse_pretty_midi

None


In [2]:
# some initialization
# with open('gmm_model_config.json') as f:
with open('uni_model_config_2.json') as f:
    args = json.load(f)
if not os.path.isdir('log'):
    os.mkdir('log')
if not os.path.isdir('params'):
    os.mkdir('params')


from datetime import datetime
timestamp = str(datetime.now())
save_path_timing = 'params/{}.pt'.format(args['name'] + "_" + timestamp)

# model dimensions
EVENT_DIMS = 342
RHYTHM_DIMS = 3
NOTE_DIMS = 16
TEMPO_DIMS = 264
VELOCITY_DIMS = 126
CHROMA_DIMS = 24

# # Model 1: hierachical model
# save_path = "params/music_attr_vae_reg_110220.pt"
# model = MusicAttrRegVAE(roll_dims=EVENT_DIMS, rhythm_dims=RHYTHM_DIMS, note_dims=NOTE_DIMS, 
#                         tempo_dims=TEMPO_DIMS, velocity_dims=VELOCITY_DIMS, chroma_dims=CHROMA_DIMS,
#                         hidden_dims=args['hidden_dim'], z_dims=args['z_dim'], 
#                         n_step=args['time_step'])
# model.load_state_dict(torch.load(save_path))

# Model 2: normal model
model = MusicAttrRegGMVAE(roll_dims=EVENT_DIMS, rhythm_dims=RHYTHM_DIMS, note_dims=NOTE_DIMS, 
                        tempo_dims=TEMPO_DIMS, velocity_dims=VELOCITY_DIMS, chroma_dims=CHROMA_DIMS,
                        hidden_dims=args['hidden_dim'], z_dims=args['z_dim'], 
                        n_step=args['time_step'],
                        n_component=2)  
model.load_state_dict(torch.load("params/music_attr_vae_reg_gmm_v3.8.pt"))
print("Loading params/music_attr_vae_reg_gmm_v3.8.pt...")
    

if torch.cuda.is_available():
    print('Using: ', torch.cuda.get_device_name(torch.cuda.current_device()))
    model.cuda()
else:
    print('CPU mode')

step, pre_epoch = 0, 0
batch_size = args["batch_size"]
print(batch_size)
# model.train()

# dataloaders
data_lst, rhythm_lst, note_density_lst, chroma_lst = get_classic_piano()
tlen, vlen = int(0.8 * len(data_lst)), int(0.9 * len(data_lst))
train_ds_dist = MusicAttrDataset2(data_lst, rhythm_lst, note_density_lst, 
                                chroma_lst, mode="train")
train_dl_dist = DataLoader(train_ds_dist, batch_size=batch_size, shuffle=False, num_workers=0)
val_ds_dist = MusicAttrDataset2(data_lst, rhythm_lst, note_density_lst, 
                                chroma_lst, mode="val")
val_dl_dist = DataLoader(val_ds_dist, batch_size=batch_size, shuffle=False, num_workers=0)
test_ds_dist = MusicAttrDataset2(data_lst, rhythm_lst, note_density_lst, 
                                chroma_lst, mode="test")
test_dl_dist = DataLoader(test_ds_dist, batch_size=batch_size, shuffle=False, num_workers=0)
dl = train_dl_dist
print(len(train_ds_dist), len(val_ds_dist), len(test_ds_dist))

# vgmidi dataloaders
print("Loading VGMIDI...")
data_lst, rhythm_lst, note_density_lst, arousal_lst, valence_lst, chroma_lst = get_vgmidi()
vgm_train_ds_dist = MusicAttrDataset3(data_lst, rhythm_lst, note_density_lst, 
                                chroma_lst, arousal_lst, valence_lst, mode="train")
vgm_train_dl_dist = DataLoader(vgm_train_ds_dist, batch_size=32, shuffle=False, num_workers=0)
vgm_val_ds_dist = MusicAttrDataset3(data_lst, rhythm_lst, note_density_lst, 
                                chroma_lst, arousal_lst, valence_lst, mode="val")
vgm_val_dl_dist = DataLoader(vgm_val_ds_dist, batch_size=32, shuffle=False, num_workers=0)
vgm_test_ds_dist = MusicAttrDataset3(data_lst, rhythm_lst, note_density_lst, 
                                chroma_lst, arousal_lst, valence_lst, mode="test")
vgm_test_dl_dist = DataLoader(vgm_test_ds_dist, batch_size=32, shuffle=False, num_workers=0)
print(len(vgm_train_ds_dist), len(vgm_val_ds_dist), len(vgm_test_ds_dist))
print()


is_class = args["is_class"]
is_res = args["is_res"]

Loading params/music_attr_vae_reg_gmm_v3.8.pt...
Using:  Tesla V100-DGXS-32GB


  7%|▋         | 7123/103998 [00:00<00:01, 71225.48it/s]

128
1703


100%|██████████| 103998/103998 [00:01<00:00, 81502.11it/s]


Note density 13 0
(103934, 100) (103934, 16) (103934, 16) (103934, 24)
2.2783316816441204 0.8853457868971283
83147 10393 10394
Loading VGMIDI...
911 51 51



In [3]:
def convert_to_one_hot(input, dims):
    if len(input.shape) > 1:
        input_oh = torch.zeros((input.shape[0], input.shape[1], dims)).cuda()
        input_oh = input_oh.scatter_(-1, input.unsqueeze(-1), 1.)
    else:
        input_oh = torch.zeros((input.shape[0], dims)).cuda()
        input_oh = input_oh.scatter_(-1, input.unsqueeze(-1), 1.)
    return input_oh

def clean_output(out):
    recon = np.trim_zeros(torch.argmax(out, dim=-1).cpu().detach().numpy().squeeze())
    if 1 in recon:
        last_idx = np.argwhere(recon == 1)[0][0]
        recon[recon == 1] = 0
        recon = recon[:last_idx]
    return recon

def repar(mu, stddev, sigma=1):
    eps = Normal(0, sigma).sample(sample_shape=stddev.size()).cuda()
    z = mu + stddev * eps  # reparameterization trick
    return z
    

In [4]:
from arousal_clf import Discriminator
clf = Discriminator().cuda()
clf.eval()
clf.load_state_dict(torch.load("lc_params/arousal_clf.pt"))


<All keys matched successfully>

In [None]:
dl = test_dl_dist
r_mean, n_mean, t_mean, v_mean = [], [], [], []
c_r_lst, c_n_lst, c_t_lst, c_v_lst = [], [], [], []
z_r_lst, z_n_lst = [], []
r_density_lst, n_density_lst = [], []
a_lst = []
y_r_lst = []
y_n_lst = []

infer_a_lst = []

for j, x in tqdm(enumerate(dl), total=len(dl)):
    d, r, n, c, a, v, r_density, n_density = x
    d, r, n, c = d.cuda().long(), r.cuda().long(), \
                 n.cuda().long(), c.cuda().float()
    a, v, = a.cuda().long(), v.cuda().long()
    
    r_density_lst.append(r_density)
    n_density_lst.append(n_density)

    d_oh = convert_to_one_hot(d, EVENT_DIMS)
    r_oh = convert_to_one_hot(r, RHYTHM_DIMS)
    n_oh = convert_to_one_hot(n, NOTE_DIMS)

#     c_r_oh = convert_to_one_hot(c_r, 4)
#     c_n_oh = convert_to_one_hot(c_n, 4)

    res = model(d_oh, r_oh, n_oh, c, None, None, is_class=is_class, is_res=is_res)
    
    infer_a = clf(d_oh).cpu().detach().numpy().squeeze()
    infer_a[infer_a > 0.5] = 1
    infer_a[infer_a <= 0.5] = 0
    
    # package output
    output, dis, z_out, logLogit_out, qy_x_out, y_out = res
    out, r_out, n_out, _, _ = output
    z_r, z_n = z_out
    
    z_r, z_n = z_out
    dis_r, dis_n = dis
    out, r_out, n_out, _, _ = output
    
    z_r_lst.append(z_r.cpu().detach())
    z_n_lst.append(z_n.cpu().detach())
    y_r_lst.append(y_out[0].cpu().detach())
    y_n_lst.append(y_out[1].cpu().detach())
    infer_a_lst.append(torch.Tensor(infer_a))
    
#     c_r_lst.append(c_r.cpu().detach())
#     c_n_lst.append(c_n.cpu().detach())

    r_mean.append(dis_r.mean.cpu().detach())
    n_mean.append(dis_n.mean.cpu().detach())
    
    a_lst.append(a.cpu().detach())

In [None]:
r_mean = torch.cat(r_mean, dim=0)
n_mean = torch.cat(n_mean, dim=0)
# c_r_lst = torch.cat(c_r_lst, dim=0).cpu().detach().numpy()
# c_n_lst = torch.cat(c_n_lst, dim=0).cpu().detach().numpy()
z_r_lst = torch.cat(z_r_lst, dim=0).cpu().detach().numpy()
z_n_lst = torch.cat(z_n_lst, dim=0).cpu().detach().numpy()
r_density_lst = torch.cat(r_density_lst, dim=0).cpu().detach().numpy()
n_density_lst = torch.cat(n_density_lst, dim=0).cpu().detach().numpy()
a_lst = torch.cat(a_lst, dim=0).cpu().detach().numpy()
y_r_lst = torch.cat(y_r_lst, dim=0).cpu().detach().numpy()
y_n_lst = torch.cat(y_n_lst, dim=0).cpu().detach().numpy()
infer_a_lst = torch.cat(infer_a_lst, dim=0).numpy()
r_mean = r_mean.cpu().detach().numpy()
n_mean = n_mean.cpu().detach().numpy()

# find value to set at z_r_0
z_r_0_lst = z_r_lst[:, 0]
z_r_rest_lst = z_r_lst[:, 1:]
z_n_0_lst = z_n_lst[:, 0]
z_n_rest_lst = z_n_lst[:, 1:]

In [None]:
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
sns.set()

tsne = TSNE(n_components=2, verbose=3)  #metric='manhattan'
tsne_features = tsne.fit_transform(z_n_lst)

In [None]:
idx = np.where(n_density_lst < 5)
n_density_lst = n_density_lst[idx]
tsne_features = tsne_features[idx]
color = n_density_lst
cmap = sns.cubehelix_palette(as_cmap=True)
plt.figure(figsize=(8,8))
points = plt.scatter(tsne_features[:,0], tsne_features[:,1], c=color, s=50, cmap=cmap)
plt.colorbar(points)
plt.show()

In [None]:
z_n_0_lst = z_n_0_lst[idx]
color = z_n_0_lst
cmap = sns.cubehelix_palette(as_cmap=True)
plt.figure(figsize=(8,8))
points = plt.scatter(tsne_features[:,0], tsne_features[:,1], c=color, s=50, cmap=cmap)
plt.colorbar(points)
plt.show()

In [None]:
color = infer_a_lst
palette = sns.color_palette("bright", len(set(color)))
plt.figure(figsize=(8,8))
sns.scatterplot(tsne_features[:,0], tsne_features[:,1], palette=palette, hue=color, legend='full')
plt.show()

In [None]:
color = a_lst
palette = sns.color_palette("bright", len(set(color)))
plt.figure(figsize=(8,8))
sns.scatterplot(tsne_features[:,0], tsne_features[:,1], palette=palette, hue=color, legend='full')
plt.show()

In [None]:
y_n_lst = y_n_lst[idx]
color = y_n_lst
palette = sns.color_palette("bright", len(set(color)))
plt.figure(figsize=(8,8))
sns.scatterplot(tsne_features[:,0], tsne_features[:,1], palette=palette, hue=color, legend='full')
plt.show()

In [11]:
mu_r_lst = []
var_r_lst = []
mu_n_lst = []
var_n_lst = []
for k_i in torch.arange(0, 2):
    mu_k = model.mu_r_lookup(k_i.cuda())
    mu_r_lst.append(mu_k.cpu().detach())
    
    var_k = model.logvar_r_lookup(k_i.cuda()).exp_()
    var_r_lst.append(var_k.cpu().detach())
    
    mu_k = model.mu_n_lookup(k_i.cuda())
    mu_n_lst.append(mu_k.cpu().detach())
    
    var_k = model.logvar_n_lookup(k_i.cuda()).exp_()
    var_n_lst.append(var_k.cpu().detach())

r_low_to_high = mu_r_lst[1] - mu_r_lst[0]
r_high_to_low = mu_r_lst[0] - mu_r_lst[1]
n_low_to_high = mu_n_lst[1] - mu_n_lst[0]
n_high_to_low = mu_n_lst[0] - mu_n_lst[1]

In [26]:
lst = []
for i in range(500):
    d, r, n, c, a, v, r_density, n_density = vgm_train_ds_dist[i]
    if a == 1:
        lst.append(i)
print(lst)

[9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 55, 56, 57, 58, 59, 60, 61, 62, 63, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 27

In [215]:
d, r, n, c, a, v, r_density, n_density = vgm_train_ds_dist[360]
c = torch.Tensor(c).cuda().unsqueeze(0)
d_oh = convert_to_one_hot(torch.Tensor(d).cuda().long(), EVENT_DIMS)
pm = magenta_decode_midi(np.trim_zeros(d.long().numpy())[:-2])
a_1 = pm.fluidsynth()
Audio(a_1, rate=44100)

In [216]:
model.eval()
dis_r, dis_n = model.encode(d_oh.unsqueeze(0))
z_r = dis_r.rsample()
z_n = dis_n.rsample()

logLogit_qy_x_r, qy_x_r = model.approx_qy_x(z_r, model.mu_r_lookup, model.logvar_r_lookup, n_component=model.n_component)
_, y_r = torch.max(qy_x_r, dim=1)

logLogit_qy_x_n, qy_x_n = model.approx_qy_x(z_n, model.mu_n_lookup, model.logvar_n_lookup, n_component=model.n_component)
_, y_n = torch.max(qy_x_n, dim=1)

print(y_r.item(), y_n.item())


z = torch.cat([z_r, z_n, c], dim=1)        
out = model.global_decoder(z, steps=100)
print(clean_output(out))

pm = magenta_decode_midi(clean_output(out))
a_1 = pm.fluidsynth()
Audio(a_1, rate=44100)

1 1
[341  30  42  46  49  54 216 118 130 134 137 142 185  30  42  51 331  54
 188 139 142 178  47 189 118 130 135 183 341  42  47 331  51  56 188 130
 135 139 144 178 323  42  47 341  49  56 216 130 135 137 144 185 323  42
  46 341  54  58 216 130 134 142 146 185 323  41  44 341  53  56  65 216
 129 132 141 144 153 185  18  30  37 331  54 189 142 178  51 188 139 178
  51 183 139 178  53 181 106 115 125 185]


In [217]:
lmbda = 1
z_r_new = z_r + lmbda*torch.Tensor(r_high_to_low).cuda()
z_n_new = z_n + lmbda*torch.Tensor(n_high_to_low).cuda()
# z_r_new = z_r + lmbda*torch.Tensor(r_low_to_high).cuda()
# z_n_new = z_n + lmbda*torch.Tensor(n_low_to_high).cuda()

# z_r_new = Normal(mu_r_lst[0], var_r_lst[0]).sample().unsqueeze(0).cuda()
# z_n_new = Normal(mu_n_lst[0], var_n_lst[0]).sample().unsqueeze(0).cuda()

z = torch.cat([z_r_new, z_n_new, c], dim=1)        
out = model.global_decoder(z, steps=100)
print(out.shape)
print(clean_output(out))

pm = magenta_decode_midi(clean_output(out))
a_1 = pm.fluidsynth()
Audio(a_1, rate=44100)

torch.Size([1, 100, 342])
[341  30  42  46  49  54 277 183 130 134 137 142 188  39  42  47  51 277
 216 118 130 135 139  30  42  49  59 277 265 118 130 137]


In [219]:
pm.write("360.mid")

In [218]:
new_out = np.pad(clean_output(out), (0, 200 - len(clean_output(out))), 'constant', constant_values=0)
new_out = torch.Tensor(new_out).cuda().long()
new_out = convert_to_one_hot(new_out, 342).unsqueeze(0)
clf(new_out)

tensor([[1.0000]], device='cuda:0', grad_fn=<SigmoidBackward>)

In [None]:
plt.plot(lmdda_lst, output_lst)

In [None]:
_, qy_x_n = model.approx_qy_x(z_r_new, model.mu_r_lookup, model.logvar_r_lookup, n_component=model.n_component)
print("new cluster r:", torch.max(qy_x_r, dim=-1)[-1].item())
_, qy_x_n = model.approx_qy_x(z_n_new, model.mu_n_lookup, model.logvar_n_lookup, n_component=model.n_component)
print("new cluster n:", torch.max(qy_x_n, dim=-1)[-1].item())

In [None]:
pm = magenta_decode_midi(clean_output(out))
a_1 = pm.fluidsynth()
Audio(a_1, rate=44100)

In [None]:
z_r

In [None]:
z_r_gen

In [None]:
d, r, n, c, a, v, r_density, n_density = train_ds_dist[483]
c = torch.Tensor(c).cuda().unsqueeze(0)
d_oh = convert_to_one_hot(torch.Tensor(d).cuda().long(), EVENT_DIMS)
pm = magenta_decode_midi(np.trim_zeros(d)[:-1])
a_1 = pm.fluidsynth()
Audio(a_1, rate=44100)