## Loading preprosed data

In [40]:
PATH = "../../weights/wgan_{}/one_hot/version_1".format("v1")

In [41]:
import os
os.makedirs(PATH, exist_ok=True)

In [3]:
BATCH_SIZE=128
LAMBDA = 10
NUM_EPOCH = 1000
DATA_TYPE = "mini_sample"

In [4]:
from common.bio.smiles import *
from common.bio.amino_acid import *
from common.bio.blast import *
from common.bio.constants import *
from common.model.architecture import *

import tensorflow as tf
tf.__version__

'1.8.0'

In [5]:
import numpy as np
train_data = np.load("../../data/wgan/{}/train_features.npy".format(DATA_TYPE))

In [6]:
train_data = np.pad(train_data, [(0, 0), (0,8)], mode='constant', constant_values=0)

In [7]:
train_data.shape

(271, 128)

In [8]:
NUM_OF_ACIDS = 21
EMBEDDING_SIZE = 32

In [9]:
SEQUENCE_LENGTH=train_data.shape[1]
SEQUENCE_LENGTH

128

In [10]:
STEPS_PER_EPOCH = int(train_data.shape[0]/BATCH_SIZE)+1
STEPS_PER_EPOCH

3

# Model

## Discriminator

In [21]:
NUM_OF_LAYERS=6

In [22]:
def discriminator_layer(x, level, filters):
    conv = tf.layers.conv2d(
            inputs=x,
            filters=filters,
            kernel_size=[3,NUM_OF_ACIDS],
            strides=(2,1),
            padding="same",
            activation=tf.nn.leaky_relu,
            name = "dconv{}".format(level))
    bn = tf.layers.batch_normalization(conv, name = "dbn{}".format(level))
    print(bn.shape)
    return bn 

In [23]:
def discriminator(x, is_training):

    with tf.variable_scope('discriminator', reuse=tf.AUTO_REUSE) as scope:
        print('discriminator')
        layer = x
        for layer_id in range(NUM_OF_LAYERS):            
            layer = discriminator_layer(layer, layer_id, 2*(2**(layer_id+1)))
        flat = tf.layers.flatten(layer, name="dflat")
        print(flat.shape)
        
        output = tf.layers.dense(inputs=flat,
                                 activation=None,
                                 units=1,
                                 name="doutput")
        print(output.shape)
        output = tf.reshape(output, [-1])
        print(output.shape)
        return output

# Generator

In [24]:
def generator_layer(x, level, filters):
        bn = tf.layers.batch_normalization(x, name = "gbn{}".format(level))
        up = tf.keras.layers.UpSampling2D(size=(2, 1))(bn)
        print(up.shape)
        return tf.layers.conv2d(inputs=up, 
                                 filters=filters,
                                 kernel_size=[3,NUM_OF_ACIDS],
                                 padding="same",
                                 activation=tf.nn.relu,
                                 name = "conv{}".format(level))

In [25]:
import math
NUM_FILTERS=64
def generator(input_batch=None, is_training=True):
    with tf.variable_scope('generator') as scope:
        print('generator')
        if input_batch is None:
            input_batch = tf.cast(tf.random_normal([BATCH_SIZE, 32]), tf.float32)
        dim = math.floor(SEQUENCE_LENGTH/(2**NUM_OF_LAYERS))          
        print(input_batch.shape)
        dense1 = tf.layers.dense(inputs=input_batch,
                                 kernel_initializer=tf.truncated_normal_initializer(stddev=0.02),
                                 bias_initializer=tf.zeros_initializer (),
                                 units=dim*NUM_OF_ACIDS*NUM_FILTERS,
                                 activation=tf.nn.relu,
                                 name="dense1")
        layer = tf.reshape(dense1, shape=[-1, dim, NUM_OF_ACIDS, NUM_FILTERS], name='reshape1')
        for layer_id in range(NUM_OF_LAYERS):
            layer = generator_layer(layer, layer_id, NUM_FILTERS/(2**(layer_id+1)))
        final_conv = tf.layers.conv2d(inputs=layer, 
                         filters=1,
                         kernel_size=[3,NUM_OF_ACIDS],
                         padding="same",
                         activation=tf.nn.sigmoid,
                         name = "final_conv")    
        print(final_conv.shape)
        return final_conv

## Graph

In [28]:
tf.reset_default_graph()
graph = tf.Graph()
with graph.as_default():
    with tf.variable_scope('input'):
        real_sequences = tf.placeholder(tf.int32, [None, SEQUENCE_LENGTH], name='real_sequence')
        is_training = tf.placeholder(tf.bool, name='is_train')

    dataset = tf.data.Dataset.from_tensor_slices(real_sequences)
    dataset = dataset.shuffle(buffer_size=10000, reshuffle_each_iteration=True)
    dataset = dataset.apply(tf.contrib.data.batch_and_drop_remainder(BATCH_SIZE)).repeat(NUM_EPOCH)
    iterator = dataset.make_initializable_iterator()

    batch_sequences = iterator.get_next()
    embedded_sequences = tf.one_hot(batch_sequences, NUM_OF_ACIDS)
    embedded_sequences = tf.reshape(embedded_sequences, shape=[-1, SEQUENCE_LENGTH, NUM_OF_ACIDS, 1], name='embedded_sequences')
    print(embedded_sequences.shape)

(128, 128, 21, 1)


In [29]:
with graph.as_default():
    fake = generator(is_training=is_training)
    logits_real = discriminator(embedded_sequences, is_training)
    logits_fake = discriminator(fake, is_training)
    d_loss = tf.reduce_mean(logits_fake) - tf.reduce_mean(logits_real) # This optimizes the discriminator.
    g_loss = -tf.reduce_mean(logits_fake)  # This optimizes the generator.

    # # wgan-gp gradient panelty 
    with tf.name_scope("Gradient_penalty"):
        eps = tf.random_uniform([BATCH_SIZE,1, 1, 1], minval=0.0,maxval=1.0)
        interpolates = embedded_sequences + eps*(fake - embedded_sequences)

        gradients = tf.gradients(discriminator(interpolates, is_training), [interpolates])[0]
        slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1]))
        gradient_penalty = tf.reduce_mean(tf.square(slopes - 1.))
        d_loss += 10 * gradient_penalty
        tf.summary.scalar("d_loss", d_loss)
        tf.summary.scalar("g_loss", g_loss)

    D_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,'discriminator')
    G_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'generator')

    trainer_d = tf.train.AdamOptimizer(learning_rate=0.0001, beta1=0.5, beta2=0.9).minimize(d_loss, var_list=D_vars)
    trainer_g = tf.train.AdamOptimizer(learning_rate=0.0001, beta1=0.5, beta2=0.9).minimize(g_loss, var_list=G_vars)

    summ = tf.summary.merge_all()
    
    init = tf.global_variables_initializer()
    saver = tf.train.Saver(max_to_keep=3)

generator
(128, 32)
(128, 4, 21, 64)
(128, 8, 21, 32)
(128, 16, 21, 16)
(128, 32, 21, 8)
(128, 64, 21, 4)
(128, 128, 21, 2)
(128, 128, 21, 1)
discriminator
(128, 64, 21, 4)
(128, 32, 21, 8)
(128, 16, 21, 16)
(128, 8, 21, 32)
(128, 4, 21, 64)
(128, 2, 21, 128)
(128, 5376)
(128, 1)
(128,)
discriminator
(128, 64, 21, 4)
(128, 32, 21, 8)
(128, 16, 21, 16)
(128, 8, 21, 32)
(128, 4, 21, 64)
(128, 2, 21, 128)
(128, 5376)
(128, 1)
(128,)
discriminator
(128, 64, 21, 4)
(128, 32, 21, 8)
(128, 16, 21, 16)
(128, 8, 21, 32)
(128, 4, 21, 64)
(128, 2, 21, 128)
(128, 5376)
(128, 1)
(128,)


In [30]:
with graph.as_default():
    print_model_summary()

All parameters: 2865502.0 (out of them 954914 are trainable)

generator/dense1/kernel:0(32, 2688)
generator/dense1/bias:0(2688,)
generator/gbn0/gamma:0(64,)
generator/gbn0/beta:0(64,)
generator/conv0/kernel:0(3, 21, 64, 32)
generator/conv0/bias:0(32,)
generator/gbn1/gamma:0(32,)
generator/gbn1/beta:0(32,)
generator/conv1/kernel:0(3, 21, 32, 16)
generator/conv1/bias:0(16,)
generator/gbn2/gamma:0(16,)
generator/gbn2/beta:0(16,)
generator/conv2/kernel:0(3, 21, 16, 8)
generator/conv2/bias:0(8,)
generator/gbn3/gamma:0(8,)
generator/gbn3/beta:0(8,)
generator/conv3/kernel:0(3, 21, 8, 4)
generator/conv3/bias:0(4,)
generator/gbn4/gamma:0(4,)
generator/gbn4/beta:0(4,)
generator/conv4/kernel:0(3, 21, 4, 2)
generator/conv4/bias:0(2,)
generator/gbn5/gamma:0(2,)
generator/gbn5/beta:0(2,)
generator/conv5/kernel:0(3, 21, 2, 1)
generator/conv5/bias:0(1,)
generator/final_conv/kernel:0(3, 21, 1, 1)
generator/final_conv/bias:0(1,)
discriminator/dconv0/kernel:0(3, 21, 1, 4)
discriminator/dconv0/bias:0(4,)


## Helpers for training model

## Review generated examples

In [72]:
 def save_weights(saver, sess, path):
    save_path = saver.save(sess, path)
    print("Model saved in path: %s" % save_path)

In [73]:
def mean(l):
    if len(l) == 0:
        return 0
    else:
        return sum(l) / float(len(l))

In [74]:
def print_summary(steps, dLosses, gLosses):    
    if steps % int(STEPS_PER_EPOCH) == 0:
        print('steps:{} \td_loss:{:.4f} \tg_loss:{:.4f}'.format(steps, mean(dLosses), mean(gLosses)))
        dLosses, gLosses = [], [] 
    return dLosses, gLosses

In [75]:
def display_sequence():
    sequences = tf.argmax(tf.squeeze(fake), axis=2)
    generated_sequences, logits = sess.run([sequences, logits_fake], feed_dict={is_training: False})
    #indexToLetter = pretrained_emb["words"].to_dict()
    best_sequence = "".join([ ID_TO_AMINO_ACID[acid_index] for acid_index in generated_sequences[np.argmax(logits)]]) 
    worst_sequence = "".join([ ID_TO_AMINO_ACID[acid_index] for acid_index in generated_sequences[np.argmin(logits)]]) 
    print("{} | Discriminator value {}".format(best_sequence, logits[np.argmax(logits)]))
    print("{} | Discriminator value {}".format(worst_sequence, logits[np.argmin(logits)]))

In [76]:
import datetime
def save_model(saver, sess):
    # Epoch ended
    if steps % (STEPS_PER_EPOCH*50) == 0:
        display_sequence()
        print("Epoch {}. Fineshed at {}".format((steps/STEPS_PER_EPOCH), str(datetime.datetime.now()).split('.')[0]))
        save_weights(saver, sess, PATH)

## Running model

In [77]:
sess = tf.Session(graph=graph)

tb_writer = tf.summary.FileWriter("../../logs/wgan/one_hot", graph)
sess.run(init)
steps, gen_iterations = 0, 0

# CAUTION: Training the model

In [84]:
sess.run(iterator.initializer, feed_dict={real_sequences: train_data})
steps, gen_iterations

(4686, 686)

In [85]:
print ("Start training with batch size: {}, epoch num: {}".format(BATCH_SIZE, NUM_EPOCH))
dLosses, gLosses = [], [] 
while True:
    try:
        d_iters = (100 if gen_iterations < 5 or gen_iterations % 500 == 0 else 5)
        for k in range(d_iters): # Discriminator
            _, dLoss = sess.run([trainer_d, d_loss], feed_dict={is_training: True})
            steps = steps + 1
            dLosses.append(dLoss)
            dLosses, gLosses = print_summary(steps, dLosses, gLosses)
            save_model(saver, sess)

        # Generator
        _, gLoss = sess.run([trainer_g, g_loss], feed_dict={is_training: True})
        gLosses.append(gLoss)
        steps = steps + 1
        gen_iterations = gen_iterations + 1
        dLosses, gLosses = print_summary(steps, dLosses, gLosses)
        save_model(saver, sess)
    except tf.errors.OutOfRangeError:
        print ("Training is finished")
        break;            

Start training with batch size: 128, epoch num: 1000
steps:4689 	d_loss:-65.0990 	g_loss:0.0000
steps:4692 	d_loss:-65.4799 	g_loss:42.8679
steps:4695 	d_loss:-66.5921 	g_loss:0.0000
steps:4698 	d_loss:-66.4250 	g_loss:45.4151
steps:4701 	d_loss:-64.3635 	g_loss:0.0000
steps:4704 	d_loss:-65.3564 	g_loss:70.3249
steps:4707 	d_loss:-64.3661 	g_loss:0.0000
steps:4710 	d_loss:-64.6056 	g_loss:32.8408
steps:4713 	d_loss:-64.7403 	g_loss:0.0000
steps:4716 	d_loss:-64.8668 	g_loss:33.9730
steps:4719 	d_loss:-65.1235 	g_loss:0.0000
steps:4722 	d_loss:-64.9000 	g_loss:31.5867
steps:4725 	d_loss:-66.5473 	g_loss:0.0000
steps:4728 	d_loss:-66.0689 	g_loss:48.6623
steps:4731 	d_loss:-65.7678 	g_loss:0.0000
steps:4734 	d_loss:-66.3518 	g_loss:58.3514
steps:4737 	d_loss:-64.9740 	g_loss:0.0000
steps:4740 	d_loss:-65.4653 	g_loss:42.3120
steps:4743 	d_loss:-64.6816 	g_loss:0.0000
steps:4746 	d_loss:-65.2049 	g_loss:52.1220
steps:4749 	d_loss:-65.2902 	g_loss:0.0000
steps:4752 	d_loss:-66.0567 	g_los

steps:5160 	d_loss:-68.3545 	g_loss:66.5754
steps:5163 	d_loss:-65.4541 	g_loss:0.0000
steps:5166 	d_loss:-66.3594 	g_loss:23.5270
steps:5169 	d_loss:-65.3242 	g_loss:0.0000
steps:5172 	d_loss:-66.4175 	g_loss:6.4151
steps:5175 	d_loss:-65.8705 	g_loss:0.0000
steps:5178 	d_loss:-66.1645 	g_loss:4.8291
steps:5181 	d_loss:-66.8499 	g_loss:0.0000
steps:5184 	d_loss:-66.7871 	g_loss:12.2453
steps:5187 	d_loss:-66.5800 	g_loss:0.0000
steps:5190 	d_loss:-66.7124 	g_loss:2.1556
steps:5193 	d_loss:-65.5691 	g_loss:0.0000
steps:5196 	d_loss:-65.2698 	g_loss:19.8464
steps:5199 	d_loss:-66.9215 	g_loss:0.0000
steps:5202 	d_loss:-67.3966 	g_loss:54.6707
steps:5205 	d_loss:-66.2557 	g_loss:0.0000
steps:5208 	d_loss:-65.9324 	g_loss:32.2518
steps:5211 	d_loss:-65.4370 	g_loss:0.0000
steps:5214 	d_loss:-65.5136 	g_loss:18.9300
steps:5217 	d_loss:-65.8055 	g_loss:0.0000
steps:5220 	d_loss:-66.3552 	g_loss:12.0238
steps:5223 	d_loss:-66.0109 	g_loss:0.0000
steps:5226 	d_loss:-65.8617 	g_loss:31.4343
st

steps:5634 	d_loss:-67.1920 	g_loss:19.8812
steps:5637 	d_loss:-67.4151 	g_loss:0.0000
steps:5640 	d_loss:-67.0467 	g_loss:14.9201
steps:5643 	d_loss:-66.5298 	g_loss:0.0000
steps:5646 	d_loss:-66.8057 	g_loss:3.2452
steps:5649 	d_loss:-67.1270 	g_loss:0.0000
steps:5652 	d_loss:-66.9673 	g_loss:11.8823
steps:5655 	d_loss:-66.6853 	g_loss:0.0000
steps:5658 	d_loss:-67.1572 	g_loss:13.7926
steps:5661 	d_loss:-66.5988 	g_loss:0.0000
steps:5664 	d_loss:-66.8062 	g_loss:-2.3452
steps:5667 	d_loss:-66.9526 	g_loss:0.0000
steps:5670 	d_loss:-67.1093 	g_loss:11.2520
steps:5673 	d_loss:-67.3182 	g_loss:0.0000
steps:5676 	d_loss:-67.2807 	g_loss:10.9078
steps:5679 	d_loss:-66.6259 	g_loss:0.0000
steps:5682 	d_loss:-67.5511 	g_loss:24.8382
steps:5685 	d_loss:-66.7327 	g_loss:0.0000
steps:5688 	d_loss:-66.5624 	g_loss:6.9547
steps:5691 	d_loss:-67.2076 	g_loss:0.0000
steps:5694 	d_loss:-66.5684 	g_loss:4.0491
steps:5697 	d_loss:-67.2841 	g_loss:0.0000
steps:5700 	d_loss:-68.2332 	g_loss:26.9365
HY

steps:6108 	d_loss:-67.7604 	g_loss:-18.6654
steps:6111 	d_loss:-67.9825 	g_loss:0.0000
steps:6114 	d_loss:-68.6580 	g_loss:17.4772
steps:6117 	d_loss:-67.9784 	g_loss:0.0000
steps:6120 	d_loss:-68.9941 	g_loss:4.0478
steps:6123 	d_loss:-68.1527 	g_loss:0.0000
steps:6126 	d_loss:-67.6754 	g_loss:-4.9843
steps:6129 	d_loss:-67.9654 	g_loss:0.0000
steps:6132 	d_loss:-68.2219 	g_loss:10.3435
steps:6135 	d_loss:-67.4248 	g_loss:0.0000
steps:6138 	d_loss:-67.9481 	g_loss:10.8840
steps:6141 	d_loss:-67.7860 	g_loss:0.0000
steps:6144 	d_loss:-68.1528 	g_loss:-7.7235
steps:6147 	d_loss:-68.6208 	g_loss:0.0000
steps:6150 	d_loss:-68.3944 	g_loss:2.7995
IY0KIY0000Y00Y0000Y00Y0000Y00YY00YY0YY000YY0YY000YY0YY0000YYYYY000YYYYY000YYYYYY00YYYYYYY0YY000Y0000000000000000000000000000000Y | Discriminator value -0.6709079742431641
HY00I00000Y00Y0000Y0YY0000YYYY000000000000YYYY000000000000000000000000000000000000000000000000000000000000000000000000000000000Y | Discriminator value -4.412442684173584
Epoch 2

steps:6582 	d_loss:-70.9481 	g_loss:0.0000
steps:6585 	d_loss:-70.1299 	g_loss:0.0000
steps:6588 	d_loss:-70.6276 	g_loss:0.0000
steps:6591 	d_loss:-69.9187 	g_loss:0.0000
steps:6594 	d_loss:-70.6533 	g_loss:0.0000
steps:6597 	d_loss:-70.7666 	g_loss:0.0000
steps:6600 	d_loss:-71.0481 	g_loss:0.0000
IKVKK0KKI000Y0000000000000000000000000000000Y000000000000000Y000000000000000000000000000000000000000000000000000000000000000000Y | Discriminator value -34.941856384277344
HY0IIY000YY00YY000Y0YY0000Y0YYY00YYYYYY00YYYYYY000YYYY0000YYYYY0000YYYY000YY0000000Y0000000000000000000000000000000000000000000Y | Discriminator value -45.94825744628906
Epoch 2200.0. Fineshed at 2018-07-27 10:12:54
Model saved in path: ../../weights/wgan_v1/one_hot/version_1
steps:6603 	d_loss:-70.8311 	g_loss:0.0000
steps:6606 	d_loss:-71.6808 	g_loss:0.0000
steps:6609 	d_loss:-70.5608 	g_loss:0.0000
steps:6612 	d_loss:-71.1771 	g_loss:0.0000
steps:6615 	d_loss:-70.7346 	g_loss:0.0000
steps:6618 	d_loss:-70.9776 	g_loss:

Model saved in path: ../../weights/wgan_v1/one_hot/version_1
steps:7053 	d_loss:-68.9825 	g_loss:0.0000
steps:7056 	d_loss:-69.4663 	g_loss:14.9382
steps:7059 	d_loss:-69.7832 	g_loss:0.0000
steps:7062 	d_loss:-69.1313 	g_loss:35.6496
steps:7065 	d_loss:-68.5538 	g_loss:0.0000
Training is finished


In [None]:
embeddings = sess.run(embedding_weights)
print("embeddings : %s" % embeddings[0])

In [35]:
generated= sess.run([fake], feed_dict={is_training: True})

## Validation of discriminator

In [36]:
with graph.as_default():
    val_real = discriminator(embedded_real_sequences, is_training=False)
    val_fake = discriminator(fake, is_training=False)
    val_loss = tf.reduce_mean(val_real-val_fake)
    real_predictions = tf.rint(val_real)
    fake_predictions = tf.rint(val_fake)
    correct_real_predictions = tf.equal(real_predictions, tf.zeros([BATCH_SIZE], dtype=tf.float32))
    correct_fake_predictions = tf.equal(fake_predictions, tf.ones([BATCH_SIZE], dtype=tf.float32))
    casted_real = tf.cast(correct_real_predictions, tf.float32)
    casted_fake = tf.cast(correct_fake_predictions, tf.float32)
    accuracy = (tf.reduce_mean(casted_real) + tf.reduce_mean(casted_fake))/2

discriminator
(128, 64, 32, 4)
(128, 32, 32, 8)
(128, 16, 32, 16)
(128, 8, 32, 32)
(128, 4, 32, 64)
(128, 2, 32, 128)
(128, 8192)
(128, 1)
(128,)
discriminator
(128, 64, 32, 4)
(128, 32, 32, 8)
(128, 16, 32, 16)
(128, 8, 32, 32)
(128, 4, 32, 64)
(128, 2, 32, 128)
(128, 8192)
(128, 1)
(128,)


In [None]:
#Validate discriminator by giving from validate data set and randomly generated
print ('validating discriminator...')
sess.run(iterator.initializer, 
         feed_dict={real_sequences: val_data, random_sequences: get_random_sequence(val_data.shape[0])})
losses = []
accuracies = []
while True:
    try:
        v_loss, v_accuracy = sess.run([val_loss, accuracy], feed_dict={is_training: False})
        losses.append(v_loss)
        accuracies.append(v_accuracy)
    except tf.errors.OutOfRangeError:
        print ('Validation g_loss:{:.4f} ,accuracy :{:.4f}'.format(mean(losses), mean(accuracies)))
        break

In [28]:
def restore_weights(saver, sess, path):
    saver.restore(sess, path)
    print("Model restored.")

In [29]:
restore_weights(saver, sess, PATH)

INFO:tensorflow:Restoring parameters from ../../weights/wgan_sequence/emb/version_3
Model restored.


## Review generated examples

In [115]:
def discriminator_score(session, sequence):
    with graph.as_default():
        test_seq = tf.placeholder(tf.int32, [None, SEQUENCE_LENGTH], name='real_sequence')
        print (test_seq.shape)
        embedded_test_seq = tf.nn.embedding_lookup(embedding_weights, test_seq)
        embedded_test_seq = tf.reshape(embedded_test_seq, shape=[-1, SEQUENCE_LENGTH, EMBEDDING_SIZE, 1])
        print (embedded_test_seq.shape)
        test_score = discriminator(embedded_test_seq, False)
        
    score = session.run(test_score, feed_dict={test_seq:sequence})
    return score    

In [78]:
def repeating_amino_acids_in_row(input_sequence):
    repeating2_count = 0
    repeating3_count = 0
    seq = input_sequence.replace("0", "")
    for index in range(len(seq)):
        if index == 0:
            continue
        if seq[index-1] == seq[index]:
            repeating2_count +=1
        if index == len(seq)-1:
            continue   
        if seq[index-1] == seq[index] and seq[index+1] == seq[index]:
            repeating3_count +=1
    return repeating2_count, repeating3_count

In [116]:
def print_seq_stats(input_sequence, score):
    sequence = "".join([ indexToLetter[acid_index] for acid_index in input_sequence ])
    print(sequence)
    repeating2_count, repeating3_count = repeating_amino_acids_in_row(sequence)
    print("Score: {} | Length: {} | Different characters: {} | Repeating 2s: {} | Repeating 3s: {}".format(
        score, len(sequence.replace("0", "")), (len(set(sequence))-1), repeating2_count, repeating3_count))

In [87]:
sequences = reverse_embedding_lookup(embedding_weights, tf.squeeze(fake))
print (sequences.shape)

(16, 120)


In [88]:
print ('Generating sequences...')
generated_sequences = sess.run([sequences], feed_dict={is_training: False})
generated_sequences[0]

Generating sequences...


array([[11, 16,  6, ...,  0,  0,  0],
       [11, 16, 15, ...,  0,  0,  0],
       [11, 17,  9, ...,  0,  0,  0],
       ...,
       [11, 15,  7, ...,  0,  0,  0],
       [11, 15,  7, ...,  0,  0,  0],
       [11,  1, 14, ...,  0,  0,  0]])

In [119]:
scores = discriminator_score(sess, generated_sequences[0])
for index, s in enumerate(generated_sequences[0]):
    print_seq_stats(s, scores[index])

(?, 120)
(?, 120, 32, 1)
discriminator
(?, 60, 32, 64)
(?, 30, 32, 128)
(?, 15, 32, 256)
(?, 1)
(?,)
MSGDDLDERIETYYVRVRGVGYVYYTRKKVQWWYKLRNLGDGSVEVDAPGEEQQVEKMVDWMRRGPSKANVSQVEERQVKLEYDYFRIEYGG00Y0Y00000000000000000000000
Score: 285.4346008300781 | Length: 94 | Different characters: 18 | Repeating 2s: 11 | Repeating 3s: 0
MSRVCYRRYVYGVVQGVGYRYYTQRQAQRLGVTGWVRNCDDGSVEAVYEGDSERVEEPPYWQRRGPRRWRRGPGSVEETSTRYEARRGYSRYERY0Q00000000000000000000000
Score: 277.64739990234375 | Length: 96 | Different characters: 16 | Repeating 2s: 11 | Repeating 3s: 0
MTKKRRYSWVRGRRRQVGYYQYGYGRRYYRVREYAQWQPWVEYYQQGDYEAVERYYLDKLEKCQGGPPRARVEEVEWWEPRRGGEYYEYR000000000000000000000000000000
Score: 254.7123260498047 | Length: 90 | Different characters: 16 | Repeating 2s: 17 | Repeating 3s: 1
MSKVRRQYYVRGRVQGVGYRAWTTYQAQQLGLTGWVRNLDDGSVEVYYYGEPERVEAEYVTEWLRGPPTRRPRVDVVDWEWWTEEEQPGGFERRF0Q00000000000000000000000
Score: 289.6255187988281 | Length: 96 | Different characters: 17 | Repeating 2s: 15 | Repeating 3s: 2
MKTLKY

In [117]:
scores = discriminator_score(sess, train_data)
for index, example in enumerate(train_data):
    print_seq_stats(example, scores[index])

(?, 120)
(?, 120, 32, 1)
discriminator
(?, 60, 32, 64)
(?, 30, 32, 128)
(?, 15, 32, 256)
(?, 1)
(?,)
MPGPTVVRFTARVVGRVQGVGFRDYVRTRGRRLGLVGTATNMPDGAVVVIAEGGAPACQNLARLLVTGHTPGWTDRVEVVWQRAQGDLADFRRK00000000000000000000000000
Score: 363.3921203613281 | Length: 94 | Different characters: 19 | Repeating 2s: 9 | Repeating 3s: 1
METQKILVSGQVQGVGFRWSATRLAKQLTLTGTVRNLANGQVEIIATGESATLQQFCQQLKHGLSPWINVMTLTTHSIPTHQFADFRIII000000000000000000000000000000
Score: 329.80377197265625 | Length: 90 | Different characters: 19 | Repeating 2s: 6 | Repeating 3s: 1
MKRVHVIVEGRVQGVGFRYFVQHEALKRQLTGWVKNNDDGTVEMEVQGNESALQLFLDTIEAGTMFAKVARMHIEPRDVRSDEKQFRIMYGSGF00000000000000000000000000
Score: 350.7937927246094 | Length: 94 | Different characters: 19 | Repeating 2s: 2 | Repeating 3s: 0
MVTDAQQARLTAWVHGRVQGVGFRWWTRARALELGLAGSATNLPGNRVEVVAEGPRESCERLLEALRSPDTPGDVDHVAEQWSEPKGGLTGFVER0000000000000000000000000
Score: 365.7681579589844 | Length: 95 | Different characters: 18 | Repeating 2s: 5 | Repeating 3s: 0
MTQVCIAAYV

In [49]:
repeating_amino_acids_in_row("MARDTAILRVTGFVQGVGFRYTTKHVAYKYDISGTVKNLDDGSVEIHAIAEEENLNKFIDAIKKGPSPGCRIEHVYIYKGAPVEDRKTFDIVY")

5


In [50]:
repeating_amino_acids_in_row("MKAARARLYYYGVVQGVYYRYYRYYYTYYQRYGYVGWYWVRRDREVVVVVQQQWQQCDELLKRWLRYTQPRARDTDVDWYEYQGDDQYYYVE")

21
