This repository has been archived by the owner on Mar 31, 2018. It is now read-only.
/
data_handler.py
149 lines (139 loc) · 4.72 KB
/
data_handler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# sample_handler: Interface for instrument ID neural networks, providing file I/O.
import scipy.io.wavfile as wav
import numpy as np
import json
# Settings
window_length_default = 1*44100
database_file_default = "data/database.json"
# Global variables, don't touch
counter = 0
def read_sample(sample, window_length=window_length_default):
# Read a song into samples
read_in = wav.read(sample)
samples = read_in[1]
num_samples = len(samples)
out_array = []
overlap_size = window_length/(2*44100)
for i in range(int(num_samples/(window_length/overlap_size))):
try:
temp = samples[i*window_length:(i+1)*window_length]
except:
pass
out_array.append(temp)
return out_array
def get_sample(sample, kind, window_length=window_length_default):
# Get samples and their accompanying kinds
read = read_sample("cache/{}/{}.wav".format(kind, sample), window_length)
kind_arr = [kind for x in range(len(read))]
return read, kind_arr
def get_next_sample_information(database_file=database_file_default):
# Generator to feed a sample and it's kind, one at a time
global counter # I hate that I need this but python calls GeneratorExit like there's no tomorrow so
db = {}
with open(database_file) as raw_db:
db = json.load(raw_db)
name_feed, kind_feed = [], []
for name, kind in db.items():
name_feed.append(name)
kind_feed.append(kind)
while True:
counter = (counter+1)%len(name_feed)
# print("Counter: {}".format(counter))
try:
# print(" name: {}, kind: {}".format(name_feed[counter],kind_feed[counter]))
yield (name_feed[counter], kind_feed[counter])
except GeneratorExit:
break
def convert_kind(kind):
# Helper function, converts a string kind to a one-hot
out = []
if kind == "drum":
out= [0, 1, 0, 0]
elif kind == "guitar":
out= [0, 0, 1, 0]
elif kind == "vocal":
out= [0, 0, 0, 1]
else:
out= [1, 0, 0, 0]
output = np.asarray(out).reshape(1,4)
return output
def deconvert_kind(kind):
# Helper function, converts a one-hot kind to a string
out = ""
if kind[0] == 1:
out = "other"
elif kind[1] == 1:
out = "drum"
elif kind[2] == 1:
out = "guitar"
elif kind[3] == 1:
out = "vocal"
return out
def feed_single_samples(window_length=window_length_default, database_file=database_file_default):
# Generator to feed a single sample and its kind (as a one-hot) at a time
i = 0
samples = []
kind = ""
name = ""
sample_generator = get_next_sample_information(database_file=database_file)
while True:
i += 1
try: # yield the next one out of the current array #.reshape((1,window_length))
try:
shaped_sample = samples[i].reshape(2,window_length)
shaped_sample = shaped_sample[1]
shaped_sample = shaped_sample.reshape(1,window_length)
except:
shaped_sample = samples[i].reshape(1,window_length)
yield (shaped_sample, convert_kind(kind))
except GeneratorExit:
break
except: # ran out of current array, get a new one
name, kind = next(sample_generator)
samples, kinds = get_sample(name, kind, window_length=window_length)
del kinds
def feed_samples(window_length=window_length_default, database_file=database_file_default, samples_in_parallel=4):
# Main generator - feeds several samples in parallel, which helps to avoid overfitting. (Hopefullly)
if samples_in_parallel > 4:
samples_in_parallel = 4
if samples_in_parallel < 1:
samples_in_parallel = 1
j = 0
i = [0, 0, 0, 0]
samples = [[], [], [], []]
kind = ["", "", "", ""]
name = ["", "", "", ""]
while True:
j = (j+1)%samples_in_parallel # select which sample group to use
i[j] += 1
try: # yield the next one out of the current array. .reshape((1,window_length))
try:
shaped_sample = samples[j][i[j]].reshape(2,window_length)
shaped_sample = shaped_sample[1]
shaped_sample = shaped_sample.reshape(1,window_length)
except:
shaped_sample = samples[j][i[j]].reshape(1,window_length)
# shaped_sample = np.fft.fft(shaped_sample)
yield (shaped_sample, convert_kind(kind[j]))
except GeneratorExit:
break
except: # ran out of current array, get a new one
name[j], kind[j] = next(get_next_sample_information(database_file=database_file))
samples[j], kinds = get_sample(name[j], kind[j], window_length=window_length)
i[j] = 0
del kinds
def get_samples_from_file(file_to_read, window_length=window_length_default):
# Basically reshapes the results of read_sample into something that Keras can use
i = 0
samples = []
samples = read_sample(file_to_read, window_length=window_length)
out_samples = []
for sample in samples:
try:
shaped_sample = samples[i].reshape(2,window_length)
shaped_sample = shaped_sample[1]
shaped_sample = shaped_sample.reshape(1,window_length)
except:
shaped_sample = samples[i].reshape(1,window_length)
out_samples.append(shaped_sample)
return out_samples