forked from akanametov/musegan
-
Notifications
You must be signed in to change notification settings - Fork 0
/
prepare_data.py
127 lines (110 loc) · 4.21 KB
/
prepare_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
"""Load and save an array to shared memory."""
""" comments made by clifford"""
import argparse
import os.path
from os.path import join
import sys
import numpy as np
import SharedArray as sa
def parse_arguments():
"""Parse and return the command line arguments."""
parser = argparse.ArgumentParser()
parser.add_argument("filepath", help="Path to the data file.")
parser.add_argument(
"--name",
default="train_x_lpd_5",
help="File name to save in SharedArray. Defaults to the original file name.",
)
parser.add_argument(
"--prefix",
default=".npz",
help="Prefix to the file name to save in SharedArray. Only effective when "
"`name` is not given.",
)
parser.add_argument(
"--dtype", default="bool", help="Datatype of the array. Defaults to bool."
)
args = parser.parse_args()
return args.filepath, args.name, args.prefix, args.dtype
def create_shared_array(name, shape, dtype):
"""Create shared array. Prompt if a file with the same name existed."""
try:
return sa.create(name, shape, dtype)
except FileExistsError:
response = ""
while response.lower() not in ["y", "n", "yes", "no"]:
response = input(
"Existing array (also named " + name + ") was found. Replace it? (y/n) "
)
if response.lower() in ("n", "no"):
sys.exit(0)
sa.delete(name)
return sa.create(name, shape, dtype)
def main():
"""Load and save an array to shared memory."""
filepath, name, prefix, dtype = parse_arguments()
if name is None:
name = os.path.splitext(os.path.basename(filepath))[0]
if prefix is not None:
name = prefix + "_" + name
print("Loading data from '{}'.".format(filepath))
if filepath.endswith(".npy"):
data = np.load(filepath)
#assert shape
if len(data.shape) == 6:
RESHAPE_PARAMS = { # Data
'num_bar': 4,
'num_beat': 4,
'num_pitch': 84,
'num_track': 8,
'num_timestep': 96,
'beat_resolution': 24,
'lowest_pitch': 24, # MIDI note number of the lowest pitch in data tensors
}
data = data.reshape(-1, RESHAPE_PARAMS['num_bar'], RESHAPE_PARAMS['num_timestep'],
RESHAPE_PARAMS['num_pitch'], RESHAPE_PARAMS['num_track'])
#continue ...
data = data.astype(dtype)
sa_array = create_shared_array(name, data.shape, data.dtype)
print("Saving data to shared memory...")
np.copyto(sa_array, data)
else:
data = data.astype(dtype)
sa_array = create_shared_array(name, data.shape, data.dtype)
print("Saving data to shared memory...")
np.copyto(sa_array, data)
# for some reason this doesnt work with the original data so avoid the large train train_x_lpd_5.phr.npz dense data
else:
with np.load(filepath) as loaded:
sa_array = create_shared_array(name, loaded["shape"], dtype)
print("Saving data to shared memory...")
sa_array[[x for x in loaded["nonzero"]]] = 1
print(
"original shape: (name='{}', shape={}, dtype={})".format(
name, sa_array.shape, sa_array.dtype
)
)
'''
-Given you have a Piano-roll Dataset with shape :
num of phrases:102378,
num of bar:4,
time resolution:48,
pitch range:84,
num of tracks:5
summary;(102378, 4, 48, 84, 5)
-You will need to transpose it to match the Generator/Critic random noise with tensor dimensions:
(batch_size, n_tracks, n_bars, n_steps_per_bar, n_pitches).
i.e (102378, 5, 4, 48, 84)
-Therefore: (102378, 4, 48, 84, 5) -> (102378, 5, 4, 48, 84)
'''
sa_array = sa_array.transpose(0, 4, 1, 2, 3)
print(
"Transposed shape: (name='{}', shape={}, dtype={})".format(
name, sa_array.shape, sa_array.dtype
)
)
os.makedirs('processed_dense_array', exist_ok=True)
np.savez(join('processed_dense_array',name) + ".npz", sa_array)
sa.delete(name)
if __name__ == "__main__":
main()