In [5]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

In [76]:
aud = np.load('data/fireworks/aud_feats/video_00054.npz')['features']

In [80]:
aud.mean(0).reshape((-1, 1)).shape

(159744, 1)

In [72]:
vid = np.load('audio/fireworks/vid_feats/video_00054.npz')['features']

In [73]:
vid.shape

(156, 4096)

In [27]:
def np_to_tfrecords(X, Y, file_path_prefix, verbose=True):
    def _dtype_feature(ndarray):
        assert isinstance(ndarray, np.ndarray)
        dtype_ = ndarray.dtype
        if dtype_ == np.float64 or dtype_ == np.float32:
            return lambda array: tf.train.Feature(float_list=tf.train.FloatList(value=array))
        elif dtype_ == np.int64:
            return lambda array: tf.train.Feature(int64_list=tf.train.Int64List(value=array))
        else:  
            raise ValueError("The input should be numpy ndarray. \
                               Instaed got {}".format(ndarray.dtype))
            
    assert isinstance(X, np.ndarray)
    assert len(X.shape) == 2  # If X has a higher rank, 
                               # it should be rshape before fed to this function.
    assert isinstance(Y, np.ndarray) or Y is None
    
    # load appropriate tf.train.Feature class depending on dtype
    dtype_feature_x = _dtype_feature(X)
    if Y is not None:
        assert X.shape[0] == Y.shape[0]
        assert len(Y.shape) == 2
        dtype_feature_y = _dtype_feature(Y)            
    
    # Generate tfrecord writer
    result_tf_file = file_path_prefix + '.tfrecord'
    writer = tf.python_io.TFRecordWriter(result_tf_file)
    if verbose:
        print "Serializing {:d} examples into {}".format(X.shape[0], result_tf_file)
        
    # iterate over each sample,
    # and serialize it as ProtoBuf.
    for idx in range(X.shape[0]):
        x = X[idx]
        if Y is not None:
            y = Y[idx]
        
        d_feature = {}
        d_feature['X'] = dtype_feature_x(x)
        if Y is not None:
            d_feature['Y'] = dtype_feature_y(y)
            
        features = tf.train.Features(feature=d_feature)
        example = tf.train.Example(features=features)
        serialized = example.SerializeToString()
        writer.write(serialized)
    
    if verbose:
        print "Writing {} done!".format(result_tf_file)

In [28]:
dataset = 'drums'
num_to_read = 2606

for i in range(num_to_read / 64):
    batch = np.zeros((64, 16*4096))
    for j in range(64):
        vid = np.load('data/'+dataset+'/vid_feats/video_%05d.npz'%(64*i+j))['features']
        batch[j] = vid[32:48].ravel()
    np_to_tfrecords(batch, None, './data/'+dataset+'/tfr/video_%02d'%i, verbose=True)

for i in range(num_to_read / 64):  
    batch = np.zeros((64, 16384))
    for j in range(64):
        aud = np.load('data/'+dataset+'/aud_feats/video_%05d.npz'%(64*i+j))['features']
        batch[j] = aud.mean(0)[2*16384:3*16384]
    np_to_tfrecords(batch, None, './data/'+dataset+'/tfr/audio_%02d'%i, verbose=True)

Serializing 64 examples into ./data/drums/tfr/video_00.tfrecord
Writing ./data/drums/tfr/video_00.tfrecord done!
Serializing 64 examples into ./data/drums/tfr/video_01.tfrecord
Writing ./data/drums/tfr/video_01.tfrecord done!
Serializing 64 examples into ./data/drums/tfr/video_02.tfrecord
Writing ./data/drums/tfr/video_02.tfrecord done!
Serializing 64 examples into ./data/drums/tfr/video_03.tfrecord
Writing ./data/drums/tfr/video_03.tfrecord done!
Serializing 64 examples into ./data/drums/tfr/video_04.tfrecord
Writing ./data/drums/tfr/video_04.tfrecord done!
Serializing 64 examples into ./data/drums/tfr/video_05.tfrecord
Writing ./data/drums/tfr/video_05.tfrecord done!
Serializing 64 examples into ./data/drums/tfr/video_06.tfrecord
Writing ./data/drums/tfr/video_06.tfrecord done!
Serializing 64 examples into ./data/drums/tfr/video_07.tfrecord
Writing ./data/drums/tfr/video_07.tfrecord done!
Serializing 64 examples into ./data/drums/tfr/video_08.tfrecord
Writing ./data/drums/tfr/video_0

Writing ./data/drums/tfr/audio_32.tfrecord done!
Serializing 64 examples into ./data/drums/tfr/audio_33.tfrecord
Writing ./data/drums/tfr/audio_33.tfrecord done!
Serializing 64 examples into ./data/drums/tfr/audio_34.tfrecord
Writing ./data/drums/tfr/audio_34.tfrecord done!
Serializing 64 examples into ./data/drums/tfr/audio_35.tfrecord
Writing ./data/drums/tfr/audio_35.tfrecord done!
Serializing 64 examples into ./data/drums/tfr/audio_36.tfrecord
Writing ./data/drums/tfr/audio_36.tfrecord done!
Serializing 64 examples into ./data/drums/tfr/audio_37.tfrecord
Writing ./data/drums/tfr/audio_37.tfrecord done!
Serializing 64 examples into ./data/drums/tfr/audio_38.tfrecord
Writing ./data/drums/tfr/audio_38.tfrecord done!
Serializing 64 examples into ./data/drums/tfr/audio_39.tfrecord
Writing ./data/drums/tfr/audio_39.tfrecord done!


In [140]:
vid = np.load('data/snoring/vid_feats/video_00055.npz')['features'][16:32]

In [8]:
from scipy.io import wavfile

In [23]:
aud = np.load('data/snoring/aud_feats/video_00060.npz')['features'][0][16384:2*16384]
wavfile.write('test.wav', 16000, aud.astype(np.float32))

In [2]:
test=np.loadtxt('generated_at_epoch3200.csv', delimiter=',')

In [3]:
test[0].shape

(16384,)

In [9]:
wavfile.write('test_drum2.wav', 16000, test[0].astype(np.float32))