# WGAN-GP

## 1. Data Load

In [None]:
import pandas as pd
import numpy as np
import pickle
import sqlite3

dataset =
display(dataset)

In [None]:
from tqdm import tqdm
from collections import defaultdict

def onehot_encoder(train_set, classes="AGCT", is_train=True):
    encoder = defaultdict(lambda: np.array([0]*len(classes)))
    for i, _class in enumerate(classes):
        tmp = np.zeros(len(classes))
        tmp[i] = 1
        encoder[_class] = tmp
        
    if is_train == True:
        reverse = defaultdict(lambda: np.array([0]*len(classes)))
        for i, _class in enumerate("TCGA"):
            tmp = np.zeros(len(classes))
            tmp[i] = 1
            reverse[_class] = tmp
        
    output = []
    for record in tqdm(train_set):
#         forward_pad = 9
#         backward_pad = 9
        
        forward_pad = 0
        backward_pad = 0
        
        encoded_record = []
        for i in range(forward_pad):
            encoded_record.append([.25, .25, .25, .25])
            
        for c in record.upper():
            encoded_record.append(encoder[c])
            
        for i in range(backward_pad):
            encoded_record.append([.25, .25, .25, .25])
            
        output.append(encoded_record)
        
        if is_train == True:
            reversed_record = []
            for i in range(forward_pad):
                reversed_record.append([.25, .25, .25, .25])

            for c in record.upper():
                reversed_record.append(reverse[c])

            for i in range(backward_pad):
                reversed_record.append([.25, .25, .25, .25])

            output.append(reversed_record)
        
    output = np.array(output)
    
    return output

In [None]:
# train_X = onehot_encoder(dataset.values, is_train=False)
        
train_X = onehot_encoder(dataset[seq_dataset["4R"]>0].seq.values, is_train=False)
importance = dataset[seq_dataset["4R"]>0]["4R"].values

# importance = importance / np.sum(importance)
# importance = (importance + 1 - np.min(importance)) / np.std(importance)
# importance = (importance+1 - np.min(importance))/ (np.max(importance) - np.min(importance))
# importance = np.log(importance)
# importance = importance ** .75 / np.sum(importance ** .75)
importance = importance / np.mean(importance)

print("std:", np.std(importance))
print("mean:", np.mean(importance))

print(train_X.shape)
print(importance.shape)
print(importance)

In [None]:
validation_data = pd.read_csv("../data/DRA009383/csv/SPR_DRA009383.csv")
display(validation_data)
val_X = onehot_encoder(validation_data.Sequence.values, is_train=False)
val_Y = np.array(validation_data["Positive/Negative"].values)

print(val_X.shape)
print(val_Y.shape)

## 6. GPU Diet

In [None]:
#######################################################################################
import tensorflow as tf
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "1"

from tensorflow.config.experimental import list_physical_devices, set_memory_growth
gpus = list_physical_devices('GPU')
display(gpus)
if gpus:
  try:
    set_memory_growth(gpus[0], True)
  except RuntimeError as e:
    print(e)
    
#######################################################################################

## 7. Critic

In [None]:
#critic
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense, Conv1D, LeakyReLU, Flatten, Dropout, Lambda, BatchNormalization, Input, MaxPooling1D, Add
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.backend import clear_session
from tensorflow.keras import regularizers

clear_session()

# YOUR PARAMTERS
#######################################################################################

np.random.seed(1500)
tf.random.set_seed(1500)

kernel_size = 7
pool_size = train_X.shape[1] - kernel_size + 1

#######################################################################################

def convolution_block(x, filters, size, strides):
    
    bn = BatchNormalization()(x)
    l_relu = LeakyReLU()(bn)
    conv1d = Conv1D(filters=filters, kernel_size=size, strides=strides, kernel_regularizer=regularizers.l1(1e-5),padding='same')(l_relu)
    
    return conv1d
    
def create_critic(input_dim):
    
    # MODEL
    #######################################################################################
    
    inp = Input(shape=input_dim)
    conv1 = Conv1D(filters=32, kernel_size=7, strides=1, kernel_regularizer=regularizers.l1(1e-5),padding='same')(inp)
    convb1 = convolution_block(conv1, filters=64, size=7, strides=1)
    convb2 = convolution_block(convb1, filters=64, size=7, strides=1)
    convb2 = convolution_block(convb2, filters=32, size=7, strides=1)
    add1 = Add()([conv1, convb2])
    mp1 = MaxPooling1D(pool_size=input_dim[0])(add1)
    
    flatten = Flatten()(mp1)
    
    fc1 = Dense(32, kernel_regularizer=regularizers.l1(1e-5))(flatten)
    l_relu1 = LeakyReLU()(fc1)
    dropout1 = Dropout(rate=0.25)(l_relu1)
    outp = Dense(1, kernel_regularizer=regularizers.l1(1e-5))(dropout1)
    
    model = Model(inp, outp)

    #######################################################################################
    
    model.summary()
    
    return model

critic = create_critic(train_X.shape[1:])

## 8. Generator

In [None]:
#generator
from tensorflow.keras.layers import Conv1DTranspose, Reshape, Activation, UpSampling1D, ZeroPadding1D, Input, BatchNormalization, ReLU
from tensorflow.keras import regularizers

z_dim = 100

# YOUR PARAMTERS
#######################################################################################

design_len = 30

#######################################################################################

def create_generator(z_dim):
    model = Sequential(name="generator")
    # MODEL
    #######################################################################################
    model.add(Input(z_dim))
    model.add(Dense(64, kernel_regularizer=regularizers.l1(1e-5)))
#     model.add(LeakyReLU())
    model.add(Dense(32, kernel_regularizer=regularizers.l1(1e-5)))
    model.add(Reshape((1, 32)))
    model.add(Conv1DTranspose(filters=4, kernel_size=design_len, strides=1, kernel_regularizer=regularizers.l1(1e-5)))

    model.add(Activation("softmax"))
#     model.add(Lambda(gumbel_softmax)
    
#     model.add(Lambda(lambda x: x-.25))
#     model.add(ZeroPadding1D(padding=(9, 9)))
#     model.add(Lambda(lambda x: x+.25))

    #######################################################################################
    
    model.summary()

    return model

generator = create_generator(z_dim)

## 8. Optimizers

In [None]:
from tensorflow.keras.optimizers import Adam

critic_opt = Adam(learning_rate = 0.0001, beta_1=0, beta_2=0.9)
generator_opt = Adam(learning_rate = 0.0001, beta_1=0, beta_2=0.9)

## 12. Write GEN FASTA

## 13. Train Model

In [None]:
import random
from tqdm import tqdm

batch_size = 64

def train(dataset, importance, epoch):
    for epoch in range(epoch):
        g_loss = []
        c_loss = []
        
        shuf = np.array([i for i in range(train_X.shape[0])])
        random.shuffle(shuf)

        dataset = dataset[shuf]
        
        for batch in tqdm(range(dataset.shape[0] // batch_size)):
            real_X = dataset[batch * batch_size:(batch+1)*batch_size]
            sample_weights = importance[batch * batch_size:(batch+1)*batch_size]
            
            loss_c = 0
            for i in range(5):
                loss_c += train_critic(real_X, sample_weights)
#                 loss_c += train_critic(real_X, 1)
            loss_g = train_generator(batch_size)
        
            g_loss.append(loss_g)
            c_loss.append(loss_c / 5)
        
        # loss & plot
        print ('epoch {}'.format(epoch + 1))
        print (epoch+1, ":", "g_loss: {}".format(sum(g_loss)/len(g_loss)))
        print (epoch+1, ":", "c_loss: {}".format(sum(c_loss)/len(c_loss)))
         

        show_plot()
        write_gen_fasta(epoch)
        print("="*80)
    
    retun 

In [None]:
train(train_X, importance, 100)
# train(train_X, 1, 100)