In [1]:
import tensorflow as tf
import numpy as np
import os
import wave

In [None]:
class DBM():
    def __init__(self, layers, unit_type="bin", noise_std = 0.001, model_path='./models/dbm.ckpt'):
        self.unit_type = unit_type
        self.layers_nums = layers
        self.layers = []
        self.noise_std = noise_std
        self.input = tf.placeholder("bool" if self.unit_type == "bin" else 'float', [None, None], name="input")
        self.model_path = model_path
        self.loaded_model = False
        
        i = 0
        for unit_count in layers:
            self.layers.append({
                "state" : None,
                "b" : tf.Variable(tf.zeros([unit_count]), name="b_"+str(i))
            })
            i+=1
        self.W = []
        
        for i in range(len(layers)-1):
            self.W.append(tf.Variable(tf.random_normal((layers[i], layers[i+1]), mean=0.0, stddev=0.01), name="W_"+str(i)))
        
        self.tf_saver = tf.train.Saver()
        
    def __gaussian_noise_layer(self, x):
        noise = tf.random_normal(shape=tf.shape(x), mean=0.0, stddev=self.noise_std, dtype=tf.float32) 
        return x + noise
    
    def sample_forward(self, start_from=0, steps_num=None):
        if(steps_num == None or steps_num > len(self.layers)):
            steps_num = len(self.layers)-1
        for i in range(start_from, start_from + steps_num):
            mul = tf.matmul(self.layers[i]["state"], self.W[i]) + self.layers[i+1]["b"]
            
            if(self.unit_type == "bin") :
                self.layers[i+1]["state"] = tf.nn.relu(tf.sign(tf.sigmoid(mul) 
                                                               - np.random.rand(tf.shape(mul)[0], tf.shape(mul)[1])))
            else:
                 self.layers[i+1]["state"] = tf.nn.relu(self.__gaussian_noise_layer(mul))
                    
        return self.layers[i+1]["state"]
    
    def sample_back(self, start_from=None, steps_count=None):
        if(steps_count == None):
            steps_count = len(self.layers)-1
            start_from = len(self.layers)-1
        i = start_from
        while(i > start_from - steps_count):
            mul = tf.matmul(self.layers[i]["state"], tf.transpose(self.W[i-1])) + self.layers[i-1]["b"]
            if(self.unit_type == "bin"):
                self.layers[i-1]["state"] = tf.nn.relu(tf.sign(tf.sigmoid(mul) 
                                                               - np.random.rand(tf.shape(mul)[0], tf.shape(mul)[1])))
            else:
                 self.layers[i-1]["state"] = self.__gaussian_noise_layer(mul)
            i -= 1
        return self.layers[i]["state"]
    
    def __getNextBatch(self, batch_size):
        if(self.train_set):
            if(callable(self.train_set)):
                gen = self.train_set(batch_size)
                for result in gen:
                    yield result
                    
    def prepare_train_set(self, batch_size, epochs_count):
        if(callable(self.train_set)):
            return self.train_set(self.files, int(self.layers_nums[0]), batch_size, epochs_count, True)
    
    def train(self, train_set, batch_size, learning_rate, epochs_count, decrease_noise = 0, depth=None):
        if(depth == None or depth > len(self.layers)-1):
            depth = len(self.layers)
        self.train_set = train_set
        print(self.loaded_model)
        if(self.loaded_model == False):
            self.tf_sess = tf.Session()
            self.tf_sess.run(tf.global_variables_initializer())
        self.train_set = self.prepare_train_set(batch_size, epochs_count*(depth-1))
        print("start training "+str(learning_rate))
        for i in range(depth-1):
            self.noise_std = 0.001
            for j in range(epochs_count):
                self.noise_std = self.noise_std - decrease_noise*self.noise_std
                self.layers[0]["state"] = self.input
                h0_state = self.sample_forward(0, i+1)
                v0_state = self.layers[i]["state"]
                positive = tf.matmul(tf.transpose(v0_state), h0_state)
                v1_state = self.sample_back(i+1, 1)
                h1_state = self.sample_forward(i, 1)
                negative = tf.matmul(tf.transpose(v1_state), h1_state)
                w_update = self.W[i].assign_add(learning_rate*(positive - negative))
                v_loss = tf.reduce_mean(v0_state - v1_state, 0)
                vb_update = self.layers[i]["b"].assign_add(learning_rate*(v_loss))
                hb_update = self.layers[i+1]["b"].assign_add(learning_rate*(tf.reduce_mean(h0_state - h1_state, 0)))
                w_upd, bv_upd, bh_upd, loss = self.tf_sess.run([w_update, vb_update, hb_update, v_loss], feed_dict={
                    self.input : norm(self.train_set.__next__())
                })
                if(j % 100 == 0):
                    print("epoch: "+str(j)+" layer:"+str(i)+" loss:")
                    print(np.mean(np.power(loss, 2)))
        self.train_set.__next__()
        self.tf_saver.save(self.tf_sess, self.model_path)
        
    def load_model(self, path=None):
        self.tf_sess = tf.Session()
        self.tf_sess.run(tf.global_variables_initializer())
        self.tf_saver.restore(self.tf_sess, path if path != None else self.model_path)
        self.loaded_model == True
        
    def encode(self, data):
        self.layers[0]["state"] = self.input
        res = self.sample_forward()
        with tf.Session() as self.tf_sess:
           # print("WAT")
            self.tf_sess.run(tf.global_variables_initializer())
            self.tf_saver.restore(self.tf_sess, self.model_path)
            return self.tf_sess.run(res, feed_dict={
                self.input : data
            })
    
    def decode(self, data):
        self.layers[-1]["state"] = self.input
        res = self.sample_back()
        with tf.Session() as self.tf_sess:
            self.tf_sess.run(tf.global_variables_initializer())
            self.tf_saver.restore(self.tf_sess, self.model_path)
            return self.tf_sess.run(res, feed_dict={
                self.input : data
            })
        
    def loss(self):
        self.layers[0]["state"] = self.input
        return np.mean(np.pow(self.sample_back(self.sample_forvard(data) - self.input)))
    
    def finetune(self, train_set, batch_size, learning_rate, epochs_count):
        self.train_set = train_set
        if(self.decode_layers !=None):
            self.decode_layers = []
            for i in range(len(layers)-1):
                self.decode_layers.append({
                    "state" : None,
                    "b" : self.layers[i]["b"]
                })
                i+=1

            decode_W.W = []
            for i in range(len(layers)-1):
                self.decode_W.append(tf.transpose(self.W[i]))
                
        mimmaze = tf.train.AdamOptimizer(learning_rate).minimize(self.loss())
        self.train_set = self.prepare_train_set(batch_size, len(self.layers))
        
        with tf.Session() as self.tf_sess:
            self.tf_sess.run(tf.global_variables_initializer())
            self.tf_saver.restore(self.tf_sess, self.model_path)
            for i in range(epochs_count):
                 self.tf_sess.run(mimmaze, feed_dict={
                    self.input : norm(self.train_set.__next__())
                })
                
    def compute_loss(self, data):
        return np.mean(np.pow(self.decode(self.encode(data) - data)))
    
    def regression(self, train_data, epoch_count):
        for i in range(layers_nums):
            tf.stop_gradient(self.layers["b"][i])
            if(i < layers_nums):
                tf.stop_gradient(self.W[i])
        
        self.rW = tf.Variable(tf.zeros([self.layers_nums[-1], len(self.files)]), name="rW")
        self.rb = tf.Variable(tf.zeros([len(self.files)]), name="rb")
        
        if(self.loaded_model == False):
            self.tf_sess = tf.Session()
            self.tf_sess.run(tf.global_variables_initializer())
        self.train_set = self.prepare_train_set(batch_size, epochs_count*(depth-1))
            for j in range(epochs_count):
                self.layers[0]["state"] = self.input[0]
                res = self.sample_forward()
                mul = tf.matmul(res, rW) + rb
                cross_entropy = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits(labels=self.input[1], logits=mul))
                train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
                self.tf_sess.run(train_step, feed_dict = {
                    self.input : norm(self.train_set)
                })
        

In [None]:
def norm(x, get_mu_std=False):
    ar = np.array(x)
    mu = np.mean(ar)
    std = np.std(ar)
    if(get_mu_std):
        return (ar - mu)/std, mu, std
    else:
        return (ar - mu)/std

def restore(x, mu, std):
    return np.array(x)*std + mu

def get_file_frame(file_path, frame_with, frame_count, batch_count, rand=False, get_meta=False, get_label=False):
    if(type(file_path ) == str):
        wave_read = [wave.open(file_path, "rb")]
    else:
        wave_read = []
        for fp in file_path:
            wave_read.append (wave.open(fp, "rb"))
    if(get_meta):
        meta = {
            "nframes" : wave_read[0].getnframes(),
            "nchannels" : wave_read[0].getnchannels(),
            "sampwidth" : wave_read[0].getsampwidth(),
            "framerate" : wave_read[0].getframerate()
        }
    for k in range(batch_count):
        out = []
        labels = []
        for i in range(frame_count):
            label = np.random.randint(len(wave_read)) - 1
            chosen_file = wave_read[label]
            if rand:
                chosen_file.setpos(np.random.randint(chosen_file.getnframes() - frame_with))           
            if(get_label == True):
                labels.append(label)
            out.append(np.fromstring(chosen_file.readframes(frame_with), np.uint16))
            yield [out, labels] if(get_label == True) else out
    for fr in wave_read:
        fr.close()
    yield meta if get_meta else None
    
def write_wave(array, path, meta, mu=None, std=None):
    unrolled = array.ravel()
    writer = wave.open(path, "wb")
    writer.setnframes(meta["nframes"])
    unrolled = restore(unrolled, mu, std)
    bytes_arr = np.rint(unrolled).astype(np.uint16).tobytes()
    writer.setnchannels(meta["nchannels"])
    writer.setsampwidth(meta["sampwidth"])
    writer.setframerate(meta["framerate"])
    writer.writeframes(bytes_arr)
    writer.close()

In [None]:
rbm = DBM([500, 500, 250], "gauss")
rbm.files= ["01.wav", "03.wav", "04.wav", "05.wav", "06.wav", "07.wav", "11.wav" ,"13.wav", "14.wav"]
rbm.train(get_file_frame, 500, 0.0000015, 7000, 0.002)

False
start training 1.5e-06
epoch: 0 layer:0 loss:
0.00185456
epoch: 100 layer:0 loss:
0.00200522
epoch: 200 layer:0 loss:
0.00287141
epoch: 300 layer:0 loss:
0.00215182
epoch: 400 layer:0 loss:
0.00141107
epoch: 500 layer:0 loss:
0.00102902
epoch: 600 layer:0 loss:
0.00105204
epoch: 700 layer:0 loss:
0.000923881
epoch: 800 layer:0 loss:
0.000986889
epoch: 900 layer:0 loss:
0.000868584
epoch: 1000 layer:0 loss:
0.000861197
epoch: 1100 layer:0 loss:
0.000831756
epoch: 1200 layer:0 loss:
0.000783817
epoch: 1300 layer:0 loss:
0.000783477
epoch: 1400 layer:0 loss:
0.000842742
epoch: 1500 layer:0 loss:
0.000749989
epoch: 1600 layer:0 loss:
0.000790337
epoch: 1700 layer:0 loss:
0.000785229
epoch: 1800 layer:0 loss:
0.000754832
epoch: 1900 layer:0 loss:
0.0007428
epoch: 2000 layer:0 loss:
0.000690783
epoch: 2100 layer:0 loss:
0.000714734
epoch: 2200 layer:0 loss:
0.000683934
epoch: 2300 layer:0 loss:
0.000783719
epoch: 2400 layer:0 loss:
0.000701644
epoch: 2500 layer:0 loss:
0.000726078
epoc

In [None]:
rbm = DBM([500, 700, 400], "gauss")
gen = get_file_frame("01.wav", 500, 500, 1, False, True)
foo = gen.__next__()  
foo, mu, std = norm(foo, True)
res = rbm.encode(foo) 
meta = gen.__next__()

In [None]:
decoded = rbm.decode(res)

In [None]:
write_wave(decoded, "ress.wav", meta, mu, std) 

In [48]:
decoded

array([[-0.2485055 , -0.62448007, -0.50707173, ..., -0.51627713,
        -0.38464558, -0.21760647],
       [-0.0380106 , -0.5878644 , -0.42959678, ..., -0.79777837,
        -0.49409249, -1.04269648],
       [ 1.06251526, -0.31583071, -0.21873406, ...,  0.27678546,
         0.13807897,  0.05830932],
       ..., 
       [-0.44847888, -0.70732969, -0.56549644, ..., -1.13774168,
        -1.11916137, -0.80218136],
       [ 0.32676309,  0.00529569,  0.19926089, ..., -0.14910096,
        -1.17060566, -0.31726819],
       [-0.32813689, -0.6678564 ,  0.69116747, ...,  0.25979102,
        -0.63806701, -0.47313672]], dtype=float32)

In [21]:
np.trim_zeros([0,2,3])

[2, 3]

In [None]:
np.array([2,0,3]) != 0