In [1]:
import tensorflow as tf
import numpy as np
import os
import wave

In [5]:
class Model(object):
    def __init__(self, architecture, files, model_path='./models/model.ckpt'):
        self.model_path = model_path
        self.architecture = architecture
        self.transfer_fct = tf.nn.relu
        self.files = files
        self.loaded_model = False
        
        self.x = tf.placeholder(tf.float32, [None, architecture["input"][0], architecture["input"][1]])
        
        self._int_params(architecture)
        
        init = tf.global_variables_initializer()
        
        self.tf_saver = tf.train.Saver()
        
    def load_model(self, path=None):
        self.tf_sess = tf.Session()
        self.tf_sess.run(tf.global_variables_initializer())
        self.tf_saver.restore(self.tf_sess, path if path != None else self.model_path)
        self.loaded_model = True
        
    def normalize(self, x, get_mu_std=False):
        mean, var = tf.nn.moments(x, axes=[0])
        if(get_mu_std):
            return (x - mean)/var, mean, var
        else:
            return (x - mean)/var

    def restore(self, x, mu, var):
        return (x*var + mu) - tf.reduce_min(x)
        
    def get_data_generator(self, file_path, frame_with, frame_count, batch_count, rand=False, get_meta=False, get_label=False):
        if(type(file_path ) == str):
            wave_read = [wave.open(file_path, "rb")]
        else:
            wave_read = []
            for fp in file_path:
                wave_read.append (wave.open(fp, "rb"))
        if(get_meta):
            meta = {
                "nframes" : wave_read[0].getnframes(),
                "nchannels" : wave_read[0].getnchannels(),
                "sampwidth" : wave_read[0].getsampwidth(),
                "framerate" : wave_read[0].getframerate()
            }
        for k in range(batch_count):
            out = []
            labels = []
            for i in range(frame_count):
                label = np.random.randint(len(wave_read))
                chosen_file = wave_read[label]
                if rand:
                    chosen_file.setpos(np.random.randint(chosen_file.getnframes() - frame_with))           
                if(get_label == True):
                    labels.append(label)
                out.append(np.fromstring(chosen_file.readframes(frame_with), np.uint16))
            yield (out, labels) if(get_label == True) else out
        for fr in wave_read:
            fr.close()
        yield meta if get_meta else None
        
    def create_loss(self, learning_rate):
        normal_x = self.normalize(self.x)
        z, z_mean, z_log_sigma_sq = self.encode(normal_x)
        reconstruct = self.decode(z)
        reconstr_loss = tf.reduce_mean(tf.pow(normal_x - reconstruct, 2))
        latent_loss = -0.01 * tf.reduce_sum(1 + z_log_sigma_sq 
                                           - tf.square(z_mean) 
                                           - tf.exp(z_log_sigma_sq), 1)
        
        self.y =  tf.placeholder(tf.int32, [None])
        class_loss = 0.1 * tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(logits=tf.matmul(z, self.params['class']['W']) 
                                                               + self.params['class']['b'], labels=self.y))
        
        self.cost = tf.reduce_mean(reconstr_loss + latent_loss + class_loss)

        self.optimizer = \
            tf.train.AdamOptimizer(learning_rate= learning_rate).minimize(self.cost)
        
    def train(self, epoch_count, batch_size):      
        data_generator = self.get_data_generator(self.files, self.architecture['input'], batch_size, epoch_count, True, False, True)
              
        if(self.loaded_model != True):
            self.tf_sess = tf.Session()
            self.tf_sess.run(tf.global_variables_initializer())
        for i in range(epoch_count):
            data = data_generator.__next__()
            if(i % 50 == 0):
                optimizer, cost = self.tf_sess.run([self.optimizer, self.cost], feed_dict={self.x: data[0], self.y: data[1]})
                print("epoch:" +str(i)+", cost:" +str(cost))
            else:
                optimizer = self.tf_sess.run(self.optimizer, feed_dict={self.x: data[0], self.y: data[1]})
        .
        data_generator.__next__()   
        self.tf_saver.save(self.tf_sess, self.model_path)
        
    def encode(self, data):
        z_mean, z_log_sigma_sq = self._forward_pass(self.params['rec'], data)
        eps = tf.random_normal((tf.shape(data)[0], self.architecture['z']), 0, 1, 
                               dtype=tf.float32)
        return tf.add(z_mean, tf.multiply(tf.sqrt(tf.exp(z_log_sigma_sq)), eps)), z_mean, z_log_sigma_sq
    
    def decode(self, data):
        return self._forward_pass(self.params['gen'], data)
            
    def _int_params(self, architecture):
        params = {
            'rec' : {
                'W' : {
                    'layers' : [],
                    'mean' : tf.Variable(xavier_init(architecture['rec'][-1], architecture['z'])),
                    'log_sigma' : tf.Variable(xavier_init(architecture['rec'][-1], architecture['z']))
                },
                'b' : {
                    'layers' : [],
                    'mean' : tf.Variable(tf.zeros([architecture['z']], dtype=tf.float32)),
                    'log_sigma' : tf.Variable(tf.zeros([architecture['z']], dtype=tf.float32))
                }
            },
            'gen' : {
                'W' : {
                    'layers' : [],
                    'mean' : tf.Variable(xavier_init(architecture['gen'][-1], architecture['z'])),
                    'log_sigma' : tf.Variable(xavier_init(architecture['gen'][-1], architecture['z']))
                },
                'b' : {
                    'layers' : [],
                    'mean' : tf.Variable(tf.zeros([architecture['input']], dtype=tf.float32)),
                    'log_sigma' : tf.Variable(tf.zeros([architecture['input']], dtype=tf.float32))
                }
            },
            'class' : {
                'W' : tf.Variable(xavier_init(architecture['z'], len(self.files))),
                'b' : tf.Variable(tf.zeros([len(self.files)], dtype=tf.float32))
            }
        }
        
        for i in range(len(architecture['rec'])):
            if(isinstance(architecture["rec"][i], list)):
                print('capsule')
            else:
                params['rec']['W']['layers'].append(tf.Variable(xavier_init(\
                    architecture["input"] if i==0 else architecture['rec'][i-1], \
                    architecture["rec"][i])))            
                params['rec']['b']['layers'].append(tf.Variable(tf.zeros([architecture["rec"][i]], dtype=tf.float32)))  
        params['rec']['W']['mean'] = tf.Variable(xavier_init(architecture['rec'][i], architecture['z']))
        params['rec']['W']['log_sigma'] = tf.Variable(xavier_init(architecture['rec'][i], architecture['z']))
        
        for i in range(len(architecture['gen'])):
            if(isinstance(architecture["gen"][i], list)):
                  
            else:
                params['gen']['W']['layers'].append(tf.Variable(xavier_init(\
                    architecture["z"] if i==0 else architecture['gen'][i-1], \
                    architecture["gen"][i])))            
                params['gen']['b']['layers'].append(tf.Variable(tf.zeros([architecture["gen"][i]], dtype=tf.float32)))
             
        params['gen']['W']['mean'] = tf.Variable(xavier_init(architecture['gen'][i], architecture['input']))
        params['gen']['W']['log_sigma'] = tf.Variable(xavier_init(architecture['gen'][i], architecture['input']))
        self.params = params
        return params
            
    def _forward_pass(self, params, data):
        current_val = data
        for i in range(len(params['W']['layers'])):
            current_val = self.transfer_fct(tf.add(tf.matmul(current_val, params['W']['layers'][i]), 
                                           params['b']['layers'][i]))
        z_mean = tf.add(tf.matmul(current_val, params['W']['mean']), params['b']['mean'])
        
        if(params['W']['log_sigma']):
            return (z_mean, tf.add(tf.matmul(current_val, params['W']['log_sigma']), params['b']['log_sigma']))
        else:
            return z_mean
                
    def generate(self, z_mu=None):
        """ Generate data by sampling from latent space.
        
        If z_mu is not None, data for this point in latent space is
        generated. Otherwise, z_mu is drawn from prior in latent 
        space.        
        """
        if z_mu is None:
            z_mu = np.random.normal(size=self.network_architecture["z"])
        # Note: This maps to mean of distribution, we could alternatively
        # sample from Gaussian distribution
        return self.sess.run(self.x_reconstr_mean, 
                             feed_dict={self.z: z_mu})
    
    def reconstruct(self, X):
        """ Use VAE to reconstruct given data. """
        return self.sess.run(self.x_reconstr_mean, 
                             feed_dict={self.x: X})

SyntaxError: invalid syntax (<ipython-input-5-82d671f76905>, line 94)

In [6]:
def norm(x, get_mu_std=False):
    ar = np.array(x)
    mu = np.mean(ar)
    std = np.std(ar)
    if(get_mu_std):
        return (ar - mu)/std, mu, std
    else:
        return (ar - mu)/std

def restore(x, mu, std):
    return np.array(x)*std + mu

def get_file_frame(file_path, frame_with, frame_count, batch_count, rand=False, get_meta=False, get_label=False):
    if(type(file_path ) == str):
        wave_read = [wave.open(file_path, "rb")]
    else:
        wave_read = []
        for fp in file_path:
            wave_read.append (wave.open(fp, "rb"))
    if(get_meta):
        meta = {
            "nframes" : wave_read[0].getnframes(),
            "nchannels" : wave_read[0].getnchannels(),
            "sampwidth" : wave_read[0].getsampwidth(),
            "framerate" : wave_read[0].getframerate()
        }
    for k in range(batch_count):
        out = []
        labels = []
        for i in range(frame_count):
            label = np.random.randint(len(wave_read)) - 1
            chosen_file = wave_read[label]
            if rand:
                chosen_file.setpos(np.random.randint(chosen_file.getnframes() - frame_with))           
            if(get_label == True):
                labels.append(label)
            out.append(np.fromstring(chosen_file.readframes(frame_with), np.uint16))
        yield [out, labels] if(get_label == True) else out
    for fr in wave_read:
        fr.close()
    yield meta if get_meta else None
    
def write_wave(array, path, meta, mu=None, std=None):
    unrolled = array.ravel()
    writer = wave.open(path, "wb")
    writer.setnframes(meta["nframes"])
    if(mu != None and std!=None):
        unrolled = restore(unrolled, mu, std)
    bytes_arr = np.rint(unrolled).astype(np.uint16).tobytes()
    writer.setnchannels(meta["nchannels"])
    writer.setsampwidth(meta["sampwidth"])
    writer.setframerate(meta["framerate"])
    writer.writeframes(bytes_arr)
    writer.close()

def xavier_init(fan_in, fan_out, constant=1): 
    """ Xavier initialization of network weights"""
    # https://stackoverflow.com/questions/33640581/how-to-do-xavier-initialization-on-tensorflow
    low = -constant*np.sqrt(6.0/(fan_in + fan_out)) 
    high = constant*np.sqrt(6.0/(fan_in + fan_out))
    return tf.random_uniform((fan_in, fan_out), 
                             minval=low, maxval=high, 
                             dtype=tf.float32)

In [174]:
vnet = VNet([[500,200], 300, 200, 100], "gauss")
vnet.files= ["01.wav", "03.wav", "04.wav", "05.wav", "06.wav", "07.wav", "11.wav" ,"13.wav", "14.wav"]
vnet.load_model()
vnet.noise_std = 0
vnet.init_vae(70)
#rbm.train(get_file_frame, 500, 0.0000015, 700, 0.002, 5)

NameError: name 'VNet' is not defined

In [4]:
network_architecture = {
    "input":(1000, 500),
    "rec" : (700, 500, 300, 150),
    "gen" : (100, 200, 500, 700),
    "z" : 100
} 

In [5]:
model = Model(network_architecture, ["01.wav", "03.wav", "04.wav", "05.wav", "06.wav", "07.wav", "11.wav" ,"13.wav", "14.wav"])
model.create_loss(0.0002)

In [6]:
#model.load_model()
model.train(1, 500)

FileNotFoundError: [Errno 2] No such file or directory: '01.wav'

In [353]:
write_wave(decoded, "ress.wav", meta, mu, std) 

In [18]:
import matplotlib.pyplot as plt
%matplotlib inline

np.random.seed(0)
tf.set_random_seed(0)

In [2]:
#vnet = VNet([500, 300, 200, 100], "gauss")
#vnet.load_model()
gen = get_file_frame("01.wav", 500, 500, 1, False, True)
foo = gen.__next__()  
foo, mu, std = norm(foo, True)
res = vnet.encode(foo) 
meta = gen.__next__()
np.shape(res)

NameError: name 'get_file_frame' is not defined

NameError: name 'xavier_init' is not defined

In [33]:
model.train(5, 500, 0.001)

ValueError: Dimensions must be equal, but are 150 and 700 for 'MatMul_4' (op: 'MatMul') with input shapes: [?,150], [700,1000].

In [423]:
gen = get_file_frame( ["01.wav", "03.wav", "04.wav", "05.wav", "06.wav", "07.wav", "11.wav" ,"13.wav", "14.wav"], 500, 500, 10000, rand=True, get_meta=False, get_label=False);
def train(network_architecture, learning_rate=0.0001,
          batch_size=500, training_epochs=10000, display_step=100):
    vae = VariationalAutoencoder(network_architecture, 
                                 learning_rate=learning_rate, 
                                 batch_size=batch_size)
    # Training cycle
    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = int(n_samples / batch_size)
        # Loop over all batches
        
        batch_xs= norm(gen.__next__());
        # Fit training using batch data
        cost = vae.partial_fit(batch_xs)
        # Compute average loss
        avg_cost += cost / batch_size

        # Display logs per epoch step
        if epoch % display_step == 0:
            print("Epoch:", '%04d' % (epoch+1), 
                  "cost=", "{:.9f}".format(cost))
    return vae

In [6]:
#vae = train(network_architecture)

In [90]:
gen = get_file_frame("01.wav", 500, 500, 1, False, True)
foo = gen.__next__()  
foo, mu, std = norm(foo, True)
res = vae.reconstruct(foo)
meta = gen.__next__()
write_wave(res, "ress.wav", meta, mu, std) 

NameError: name 'vae' is not defined

In [103]:
gen = get_file_frame("01.wav", 50000, 5, 1, False, True)
waveform = np.array(gen.__next__()).astype('float32')
#signals = tf.placeholder(tf.float32, [None, None])

In [104]:
#import functools

frame_length = 400
frame_step = 160
stft = tf.contrib.signal.stft(waveform, frame_length, frame_step)
t_magn = tf.abs(stft)
argument = tf.angle(stft)
inverse_stft = tf.contrib.signal.inverse_stft(
    tf.cast(tf.complex(tf.cos(argument)*t_magn, tf.sin(argument)*t_magn), dtype = tf.complex64), frame_length, frame_step,
    window_fn=tf.contrib.signal.inverse_stft_window_fn(frame_step))

In [105]:
tf_sess = tf.Session()
#tf_sess.as_default()
spect = stft.eval(session = tf_sess)
newWav = inverse_stft.eval(session = tf_sess)

In [106]:
meta = gen.__next__()

write_wave(newWav, "ress.wav", meta) 

In [60]:
np.array([1,2,3]).astype('complex')

array([ 1.+0.j,  2.+0.j,  3.+0.j])

In [67]:
2+1j

(2+1j)

In [71]:
s = 3+4j

In [76]:
a = np.abs(s)

In [77]:
an = np.angle(s)

In [78]:
an

0.92729521800161219

In [85]:
im = np.sin(an)*a

In [86]:
r = np.cos(an)*a

In [88]:
comp = r+im*1j

In [89]:
comp

(3.0000000000000004+3.9999999999999996j)