# map label to a vector

In [2]:
import numpy as np
from tqdm import tqdm_notebook

BIG = 10000000
lab = np.random.randint(5, size=BIG)

dict = {}
dict[0] = np.array([0,0,0,0,1])
dict[1] = np.array([0,0,0,1,0])
dict[2] = np.array([0,0,1,0,0])
dict[3] = np.array([0,1,0,0,0])
dict[4] = np.array([1,0,0,0,0])

print(lab.shape)

(10000000,)


In [3]:
%%timeit
# ext = np.array([dict[x] for x in tqdm_notebook(lab)])
ext = np.array([dict[x] for x in lab])

1 loop, best of 3: 5.1 s per loop


In [4]:
%%timeit
ext = np.empty((BIG,5), dtype=int)
for k,v in (dict.items()):
    ext[lab == k] = v

# cf. http://bit.ly/2Qvp3dx
#In [42]: result = np.empty(data.size, dtype=int)
#In [43]: for key, val in mapping.items():
#   ....:     result[data == key] = val
#   ....:     
#In [44]: result
#Out[44]: array([9, 0, 9, 9, 5, 0])

1 loop, best of 3: 607 ms per loop


In [5]:
%%timeit
ext = np.empty((BIG,5), dtype=int)

def func(k):
    ext[lab == k] = dict[k]

from concurrent.futures import ThreadPoolExecutor
with ThreadPoolExecutor(max_workers=5) as executor:
        res = executor.map(func, [0,1,2,3,4])

1 loop, best of 3: 339 ms per loop


# fast way of generating audio feature inputs

In [12]:
BIG = 1000
af = {k: np.random.rand(50, 20) for k in range(BIG)}

In [22]:
%%timeit
count = 0
for k in af:
    if count == 0:
        count = 1
        af_conc = af[k]
    else:
        af_conc = np.concatenate([af_conc, af[k]], axis=0)
print(af_conc.shape)

(50000, 20)
(50000, 20)
(50000, 20)
(50000, 20)
1 loop, best of 3: 2.46 s per loop


In [27]:
%%timeit
# http://bit.ly/2Prdl6Y
# af[k] equal length
af_conc = np.array([af[k] for k in af])
af_conc = np.reshape(af_conc, (-1, 20))

100 loops, best of 3: 2.14 ms per loop


In [28]:
%%timeit
af_conc = np.concatenate([af[k] for k in af], axis=0)

100 loops, best of 3: 1.96 ms per loop


In [29]:
af_conc.shape

(50000, 20)

In [16]:
def context_window(fea,left=5,right=5):
 
 N_row = fea.shape[0]
 N_fea = fea.shape[1]
 frames = np.empty((N_row-left-right, N_fea*(left+right+1)))
 
 for frame_index in range(left,N_row-right):
  right_context=fea[frame_index+1:frame_index+right+1].flatten() # right context
  left_context=fea[frame_index-left:frame_index].flatten() # left context
  current_frame=np.concatenate([left_context,fea[frame_index],right_context])
  frames[frame_index-left]=current_frame

 return frames

In [19]:
%%timeit
frames = context_window(af_conc)

1 loop, best of 3: 187 ms per loop


In [18]:
frames.shape

(49990, 220)