In [1]:
import os
import numpy as np
from pathlib import Path
from music21 import *

from tqdm import tqdm
import pypianoroll
from pypianoroll import Track, Multitrack, BinaryTrack, StandardTrack

In [2]:
# Data
n_tracks = 5  # number of tracks
n_pitches = 84  # number of pitches
lowest_pitch = 24  # MIDI note number of the lowest pitch
n_samples_per_song = 4  # number of samples to extract from each song in the datset
n_measures = 4  # number of measures per sample
beat_resolution = 4  # temporal resolution of a beat (in timestep)
programs = [0, 0, 25, 33, 48]  # program number for each track
is_drums = [True, False, False, False, False]  # drum indicator for each track
track_names = ['Drums', 'Piano', 'Guitar', 'Bass', 'Strings']  # name of each track
tempo = 120

In [3]:
measure_resolution = 4 * beat_resolution
tempo_array = np.full((4 * 4 * measure_resolution, 1), tempo)
assert 24 % beat_resolution == 0, (
    "beat_resolution must be a factor of 24 (the beat resolution used in "
    "the source dataset)."
)
assert len(programs) == len(is_drums) and len(programs) == len(track_names), (
    "Lengths of programs, is_drums and track_names must be the same."
) 

In [4]:
!mkdir -p data

!gdown -O data/lpd_5_cleansed.tar.gz https://drive.google.com/uc?id=1yz0Ma-6cWTl6mhkrLnAVJ7RNzlQRypQ5
!gdown -O data/id_lists_amg.tar.gz https://drive.google.com/uc?id=1hp9b_g1hu_dkP4u8h46iqHeWMaUoI07R
!gdown -O data/id_lists_lastfm.tar.gz https://drive.google.com/uc?id=1mpsoxU2fU1AjKopkcQ8Q8V6wYmVPbnPO

!tar zxf data/lpd_5_cleansed.tar.gz -C data/
!tar zxf data/id_lists_amg.tar.gz -C data/
!tar zxf data/id_lists_lastfm.tar.gz -C data/

Downloading...
From: https://drive.google.com/uc?id=1yz0Ma-6cWTl6mhkrLnAVJ7RNzlQRypQ5
To: /workspace/MuseGAN/data/lpd_5_cleansed.tar.gz
100%|████████████████████████████████████████| 555M/555M [00:19<00:00, 28.5MB/s]
Downloading...
From: https://drive.google.com/uc?id=1hp9b_g1hu_dkP4u8h46iqHeWMaUoI07R
To: /workspace/MuseGAN/data/id_lists_amg.tar.gz
100%|███████████████████████████████████████| 63.9k/63.9k [00:00<00:00, 959kB/s]
Downloading...
From: https://drive.google.com/uc?id=1mpsoxU2fU1AjKopkcQ8Q8V6wYmVPbnPO
To: /workspace/MuseGAN/data/id_lists_lastfm.tar.gz
100%|████████████████████████████████████████| 668k/668k [00:00<00:00, 3.02MB/s]


In [5]:
dataset_root = Path("data/lpd_5/lpd_5_cleansed/")

id_list = []
for path in os.listdir("data/amg"):
    filepath = os.path.join("data/amg", path)
    # print(path)
    
    if os.path.isfile(filepath):
        with open(filepath) as f:
            id_list.extend([line.rstrip() for line in f])
id_list = list(set(id_list))

In [6]:
def msd_id_to_dirs(msd_id):
    """Given an MSD ID, generate the path prefix.
    E.g. TRABCD12345678 -> A/B/C/TRABCD12345678"""
    return os.path.join(msd_id[2], msd_id[3], msd_id[4], msd_id)

In [7]:
data = []
max_num = 500 
file_idx = 0
bar_num = 4

for msd_id in tqdm(id_list):
    song_dir = dataset_root / msd_id_to_dirs(msd_id)
    multitrack = pypianoroll.load(song_dir / os.listdir(song_dir)[0])
    
    # (shape: n_timesteps x n_pitches)
    multitrack.set_resolution(beat_resolution)
    # Stack the pianoroll (shape: n_tracks x n_timesteps x n_pitches)
    pianoroll = multitrack.stack()
    
    pianoroll = pianoroll[:, :, lowest_pitch:lowest_pitch + n_pitches]
    
    # pianoroll = np.pad(pianoroll, ((0, 0), (0,0), (lowest_pitch, 128 - lowest_pitch - n_pitches)))
    n_total_measures = multitrack.get_max_length() // measure_resolution
    candidate = n_total_measures - n_measures
    target_n_samples = min(n_total_measures // n_measures, n_samples_per_song)
    
    for idx in range(candidate):
        start = idx * measure_resolution
        end = (idx + n_measures * 6) * measure_resolution
        
        if (pianoroll.sum(axis=(1, 2)) < 10).any() or pianoroll[:, start:end].shape[1] < 384: # 96*4
            continue
        
        #tracks = []
        #for trk, (program, is_drum, track_name) in enumerate(zip(programs, is_drums, track_names)):
        #    tracks.append(StandardTrack(name=track_name, program=program, is_drum=is_drum, pianoroll=(pianoroll[trk, start:end] > 0.5)))
        #multi = Multitrack(tracks=tracks, tempo=tempo_array, resolution=beat_resolution)
        # print(pianoroll[:, start:end].shape)

        # print(pianoroll[:, start:end].reshape(n_tracks, 4, -1, n_pitches).shape)
        
        data.append(pianoroll[:, start:end].reshape(n_tracks, bar_num, -1, n_pitches)) # reshape 48 * 16
        #multi.write(f'./temp_midi_data/dataset_{file_idx}.mid')
    file_idx += 1
    if file_idx > max_num-1:
        print(file_idx)
        break

data = np.stack(data)
print(f"Successfully collect {len(data)} samples from {len(id_list)} songs")
print(f"Data shape : {data.shape}")
dataset = data.transpose(0, 2, 3, 4, 1)
print(f'finally dataset {dataset.shape}')

  7%|▋         | 499/7323 [01:16<17:28,  6.51it/s]

500





Successfully collect 18000 samples from 7323 songs
Data shape : (18000, 5, 4, 96, 84)
finally dataset (18000, 4, 96, 84, 5)


In [8]:
import os
import matplotlib.pyplot as plt
import numpy as np
from museGAN2 import MuseGAN

2021-10-23 13:31:01.325960: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


In [9]:
dataset.shape

(18000, 4, 96, 84, 5)

In [10]:
dataset_1 = dataset[:, :3, :32, :, :]

In [11]:
dataset_1.shape

(18000, 3, 32, 84, 5)

In [12]:
# run params
SECTION = 'compose'
RUN_ID = '005'
DATA_NAME = 'lpd_5'
RUN_FOLDER = f'run/{SECTION}/'
RUN_FOLDER += '_'.join([RUN_ID, DATA_NAME])

if not os.path.exists(RUN_FOLDER):
    os.makedirs(RUN_FOLDER)
    os.makedirs(os.path.join(RUN_FOLDER, 'viz'))
    os.makedirs(os.path.join(RUN_FOLDER, 'images'))
    os.makedirs(os.path.join(RUN_FOLDER, 'weights'))
    os.makedirs(os.path.join(RUN_FOLDER, 'samples'))
    os.makedirs(os.path.join(RUN_FOLDER, 'dataset'))

mode =  'build' # ' 'load' #

In [13]:
BATCH_SIZE = 64
n_bars = 3
n_steps_per_bar = 32
n_pitches = 84
n_tracks = 5

In [14]:
gan = MuseGAN(input_dim = (3, 32, 84, 5)
        , critic_learning_rate = 0.001
        , generator_learning_rate = 0.001
        , optimiser = 'adam'
        , grad_weight = 10
        , z_dim = 32
        , batch_size = BATCH_SIZE
        , n_tracks = n_tracks
        , n_bars = n_bars
        , n_steps_per_bar = n_steps_per_bar
        , n_pitches = n_pitches
        )

if mode == 'build':
    gan.save(RUN_FOLDER)
else:                 
    gan.load_weights(RUN_FOLDER)

2021-10-23 13:31:47.300089: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-10-23 13:31:47.301517: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2021-10-23 13:31:47.351721: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-10-23 13:31:47.352964: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:00:05.0 name: Tesla V100-SXM2-32GB computeCapability: 7.0
coreClock: 1.53GHz coreCount: 80 deviceMemorySize: 31.72GiB deviceMemoryBandwidth: 836.37GiB/s
2021-10-23 13:31:47.353074: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node ze

gen_output: (None, 3, 32, 84, 5)
('Failed to import pydot. You must `pip install pydot` and install graphviz (https://graphviz.gitlab.io/download/), ', 'for `pydotprint` to work.')
('Failed to import pydot. You must `pip install pydot` and install graphviz (https://graphviz.gitlab.io/download/), ', 'for `pydotprint` to work.')
('Failed to import pydot. You must `pip install pydot` and install graphviz (https://graphviz.gitlab.io/download/), ', 'for `pydotprint` to work.')


In [15]:
gan.chords_tempNetwork.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
temporal_input (InputLayer)  [(None, 32)]              0         
_________________________________________________________________
reshape (Reshape)            (None, 1, 1, 32)          0         
_________________________________________________________________
conv2d_transpose (Conv2DTran (None, 2, 1, 1024)        66560     
_________________________________________________________________
batch_normalization_7 (Batch (None, 2, 1, 1024)        4096      
_________________________________________________________________
activation (Activation)      (None, 2, 1, 1024)        0         
_________________________________________________________________
conv2d_transpose_1 (Conv2DTr (None, 3, 1, 32)          65568     
_________________________________________________________________
batch_normalization_8 (Batch (None, 3, 1, 32)          128 

In [16]:
gan.generator.summary()

Model: "model_7"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
melody_input (InputLayer)       [(None, 5, 32)]      0                                            
__________________________________________________________________________________________________
chords_input (InputLayer)       [(None, 32)]         0                                            
__________________________________________________________________________________________________
lambda (Lambda)                 (None, 32)           0           melody_input[0][0]               
__________________________________________________________________________________________________
lambda_1 (Lambda)               (None, 32)           0           melody_input[0][0]               
____________________________________________________________________________________________

In [17]:
gan.critic.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
critic_input (InputLayer)    [(None, 3, 32, 84, 5)]    0         
_________________________________________________________________
conv3d (Conv3D)              multiple                  1408      
_________________________________________________________________
leaky_re_lu (LeakyReLU)      multiple                  0         
_________________________________________________________________
conv3d_1 (Conv3D)            multiple                  32896     
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU)    multiple                  0         
_________________________________________________________________
conv3d_2 (Conv3D)            multiple                  196736    
_________________________________________________________________
leaky_re_lu_2 (LeakyReLU)    multiple                  0     

In [19]:
dataset_1.shape

(18000, 3, 32, 84, 5)

In [20]:
gan.critic_model.summary()

Model: "model_8"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
chords_input (InputLayer)       [(None, 32)]         0                                            
__________________________________________________________________________________________________
style_input (InputLayer)        [(None, 32)]         0                                            
__________________________________________________________________________________________________
melody_input (InputLayer)       [(None, 5, 32)]      0                                            
__________________________________________________________________________________________________
groove_input (InputLayer)       [(None, 5, 32)]      0                                            
____________________________________________________________________________________________

In [21]:
EPOCHS = 4000
PRINT_EVERY_N_BATCHES = 10
gan.epoch = 0

In [22]:
gan.train(dataset_1, batch_size=BATCH_SIZE, epochs=EPOCHS, run_folder=RUN_FOLDER, print_every_n_batches=PRINT_EVERY_N_BATCHES)

randAvgLayer: []
randAvgLayer input: [<tf.Tensor 'real_img_input:0' shape=(None, 3, 32, 84, 5) dtype=float32>, <tf.Tensor 'model_7/concat_bars/concat:0' shape=(None, 3, 32, 84, 5) dtype=float32>]
randAvgLayer output: Tensor("random_weighted_average/add:0", shape=(64, 3, 32, 84, 5), dtype=float32)


2021-10-23 13:38:23.938873: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10
2021-10-23 13:38:24.326515: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.7
2021-10-23 13:38:26.520300: W tensorflow/stream_executor/gpu/asm_compiler.cc:63] Running ptxas --version returned 256
2021-10-23 13:38:26.721804: W tensorflow/stream_executor/gpu/redzone_allocator.cc:314] Internal: ptxas exited with non-zero error code 256, output: 
Relying on driver to perform ptx compilation. 
Modify $PATH to customize ptxas location.
This message will be only logged once.


InvalidArgumentError: 2 root error(s) found.
  (0) Invalid argument: Incompatible shapes: [192,3,32,84,5] vs. [64,1,1,1,1]
	 [[{{node random_weighted_average/mul_1}}]]
	 [[loss/Identity_1/_4559]]
  (1) Invalid argument: Incompatible shapes: [192,3,32,84,5] vs. [64,1,1,1,1]
	 [[{{node random_weighted_average/mul_1}}]]
0 successful operations.
0 derived errors ignored.