# Getting to know the dataset and format

In [1]:
import pypianoroll
import glob

In [2]:
# get a list of the possible filepaths for the npz files
files = glob.glob('../raw_data/lpd_5/lpd_5_cleansed/**/**/**/**/*.npz')

In [3]:
type(files)

list

In [4]:
len(files)

21425

In [5]:
files[0]

'../raw_data/lpd_5/lpd_5_cleansed/J/J/J/TRJJJCW128F42218C7/988d41b1d934d6875e9d05337d83dfa0.npz'

In [6]:
# Load n files from the dataset
n = 5
multitracks = [pypianoroll.load(fp) for fp in files[0:n]]

In [7]:
len(multitracks)

5

In [8]:
multitracks[0].tracks

[StandardTrack(name='Drums', program=0, is_drum=True, pianoroll=array(shape=(11160, 128), dtype=uint8)),
 StandardTrack(name='Piano', program=0, is_drum=False, pianoroll=array(shape=(11160, 128), dtype=uint8)),
 StandardTrack(name='Guitar', program=24, is_drum=False, pianoroll=array(shape=(11160, 128), dtype=uint8)),
 StandardTrack(name='Bass', program=32, is_drum=False, pianoroll=array(shape=(11160, 128), dtype=uint8)),
 StandardTrack(name='Strings', program=48, is_drum=False, pianoroll=array(shape=(11160, 128), dtype=uint8))]

In [9]:
# Extract only the piano tracks from the multitracks (piano is the second track, hence, index = 1)
piano_tracks = []
for i in range(n):
    piano_tracks.append(multitracks[i].tracks[1])

In [11]:
piano_tracks[0]

StandardTrack(name='Piano', program=0, is_drum=False, pianoroll=array(shape=(11160, 128), dtype=uint8))

In [12]:
piano_tracks[0].pianoroll.shape

(11160, 128)

In [16]:
# Saves a midi file for all of the multitracks loaded
for i in range(n):
    pypianoroll.write(f'../raw_data/midi/track{i}.midi', multitracks[i])

# Testing the function to load and save the midi files

In [1]:
from music_generator.data import get_npz_data, multitrack_to_midi

In [4]:
m = get_npz_data(5, '../raw_data/lpd_5/lpd_5_cleansed')

In [6]:
len(m)

5

In [8]:
multitrack_to_midi(m, '../raw_data/midi')

'midi files saved successfully'