The intention of this notebook is to use the hypertuned CNNs from BacDive+ that were selected for transfer learning and use the sequences from Merck&Co+ T1626 and extract their CNN representations (mutant, wildtype (and difference) at different layers)

## Set up notebook and environment: ##

### Connect to google drive: ###

In [0]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import keras #where?
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.image as mpimg
from keras.utils import plot_model
from sklearn import preprocessing
import gc

Using TensorFlow backend.


In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


### Specify location of all relevant data: (YOU HAVE TO INSERT YOUR FILE LOCATIONS) ####

In [0]:
print("Check correctnes of locations: ")
data_folder_location = "gdrive/My Drive/iGEM/Databases/Merck&Co/Data/T1626/sequence_sampler_T1626/"
data_target_location = "gdrive/My Drive/iGEM/Databases/Merck&Co/Data/T1626/cnn_rep_sampler_T1626/"
print("Folders: ")
print("\t",os.path.isdir(data_folder_location))
print("\t",os.path.isdir(data_target_location))
print("Files: ")
X_wt_loc= data_folder_location+"T1626_X_wild.npy"
X_mut_loc = data_folder_location+"T1626_X_mut.npy"
Mutation_Table_loc = data_folder_location+"T1626_sequences_only.csv"
weight_loc = "gdrive/My Drive/iGEM/Databases/BacDive/Models/CNN_Regressor_Transfer_Learning/model_transfer_learning_default.hdf5"
weight_loc_64 = "gdrive/My Drive/iGEM/Databases/BacDive/Models/CNN_Regressor_Transfer_Learning/model_transfer_learning_default_64.hdf5"
print("\t",os.path.isfile(X_wt_loc))
print("\t",os.path.isfile(X_mut_loc))
print("\t",os.path.isfile(Mutation_Table_loc))
print("\t",os.path.isfile(weight_loc))
print("\t",os.path.isfile(weight_loc_64))

Check correctnes of locations: 
Folders: 
	 True
	 True
Files: 
	 True
	 True
	 True
	 True
	 True


### Retrieve Data 

In [0]:
X_wt = np.load(X_wt_loc)

In [0]:
X_mut = np.load(X_mut_loc)

In [0]:
Mutation_Table = pd.read_csv(Mutation_Table_loc)

### Supplementary functions

In [0]:
def calc_pred_truncated_model(model, model_source, weight_loc, X):  
  #Compile the models
  model.compile(
    optimizer=tf.train.AdamOptimizer(), 
    loss=tf.keras.losses.mean_squared_error,
    metrics=['mse']
  )
  model_source.compile(
    optimizer=tf.train.AdamOptimizer(), 
    loss=tf.keras.losses.mean_squared_error,
    metrics=['mse']
  )
  
  model_source.load_weights(weight_loc)
  
  #transfer appropriate weights
  for i in range(len(model.layers)):
    model.layers[i].set_weights(model_source.layers[i].get_weights())
  
  print("Calculating predictions:")
  y_pred = model.predict(X, verbose=1)
    
  return y_pred

## Extract representation


In [0]:
SEQUENCE_LEN = 650
CLASSES = 21 #now that sequences with non cannonical AAs removed there are 20 left and "0" encoding "None"

### Default Model Last Layer (1)

**Define Models:**

In [0]:
#original model
inputs = tf.keras.layers.Input(shape=(SEQUENCE_LEN,))

x = tf.keras.layers.Embedding(CLASSES, 8, input_length=SEQUENCE_LEN)(inputs) 

x = tf.keras.layers.Conv1D(256, 7, 2)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x_mid = tf.keras.layers.Activation("relu")(x)

x = tf.keras.layers.MaxPooling1D(3)(x_mid)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)

x_mid = tf.keras.layers.GlobalMaxPooling1D()(x_mid)
x = tf.keras.layers.GlobalMaxPooling1D()(x)

x = tf.keras.layers.concatenate([x_mid, x], axis=-1)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Dense(1, activation="linear")(x)

model_source = tf.keras.Model(inputs=inputs, outputs=x)
model_source.summary()

Instructions for updating:
Colocations handled automatically by placer.
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 650)          0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, 650, 8)       168         input_1[0][0]                    
__________________________________________________________________________________________________
conv1d (Conv1D)                 (None, 322, 256)     14592       embedding[0][0]                  
__________________________________________________________________________________________________
batch_normalization_v1 (BatchNo (None, 322, 256)     1024        conv1d[0][0]                     
_____________________________________

In [0]:
#truncated model
inputs = tf.keras.layers.Input(shape=(SEQUENCE_LEN,))

x = tf.keras.layers.Embedding(CLASSES, 8, input_length=SEQUENCE_LEN)(inputs) 

x = tf.keras.layers.Conv1D(256, 7, 2)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x_mid = tf.keras.layers.Activation("relu")(x)

x = tf.keras.layers.MaxPooling1D(3)(x_mid)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)

x_mid = tf.keras.layers.GlobalMaxPooling1D()(x_mid)
x = tf.keras.layers.GlobalMaxPooling1D()(x)

x = tf.keras.layers.concatenate([x_mid, x], axis=-1)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)

model = tf.keras.Model(inputs=inputs, outputs=x)
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 650)          0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 650, 8)       168         input_2[0][0]                    
__________________________________________________________________________________________________
conv1d_6 (Conv1D)               (None, 322, 256)     14592       embedding_1[0][0]                
__________________________________________________________________________________________________
batch_normalization_v1_8 (Batch (None, 322, 256)     1024        conv1d_6[0][0]                   
__________________________________________________________________________________________________
activation

**Calculate representations:**

In [0]:
X_wt_rep = calc_pred_truncated_model(model, model_source, weight_loc, X_wt)
X_wt_rep[0][:10]

Calculating predictions:


array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)

In [0]:
X_mut_rep = calc_pred_truncated_model(model, model_source, weight_loc, X_mut)
X_mut_rep[0][:10]

Calculating predictions:


array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)

In [0]:
X_diff_rep = np.subtract(X_mut_rep, X_wt_rep)
X_diff_rep[0][-10:]

array([ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        , -0.04548088],
      dtype=float32)

**Create dataframes:**

In [0]:
X_wt_rep = pd.DataFrame(X_wt_rep)
X_wt_rep.columns = list(map(lambda x: "wt_rep_"+str(x),X_wt_rep.columns))
X_wt_rep.head()

Unnamed: 0,wt_rep_0,wt_rep_1,wt_rep_2,wt_rep_3,wt_rep_4,wt_rep_5,wt_rep_6,wt_rep_7,wt_rep_8,wt_rep_9,...,wt_rep_502,wt_rep_503,wt_rep_504,wt_rep_505,wt_rep_506,wt_rep_507,wt_rep_508,wt_rep_509,wt_rep_510,wt_rep_511
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.348201
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.348201
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.348201
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.348201
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.348201


In [0]:
X_mut_rep = pd.DataFrame(X_mut_rep)
X_mut_rep.columns = list(map(lambda x: "mut_rep_"+str(x),X_mut_rep.columns))
X_mut_rep.head()

Unnamed: 0,mut_rep_0,mut_rep_1,mut_rep_2,mut_rep_3,mut_rep_4,mut_rep_5,mut_rep_6,mut_rep_7,mut_rep_8,mut_rep_9,...,mut_rep_502,mut_rep_503,mut_rep_504,mut_rep_505,mut_rep_506,mut_rep_507,mut_rep_508,mut_rep_509,mut_rep_510,mut_rep_511
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.30272
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.361002
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.304854
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.302138
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.344548


In [0]:
X_diff_rep = pd.DataFrame(X_diff_rep)
X_diff_rep.columns = list(map(lambda x: "diff_rep_"+str(x),X_diff_rep.columns))
X_diff_rep.head()

Unnamed: 0,diff_rep_0,diff_rep_1,diff_rep_2,diff_rep_3,diff_rep_4,diff_rep_5,diff_rep_6,diff_rep_7,diff_rep_8,diff_rep_9,...,diff_rep_502,diff_rep_503,diff_rep_504,diff_rep_505,diff_rep_506,diff_rep_507,diff_rep_508,diff_rep_509,diff_rep_510,diff_rep_511
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.045481
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012801
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.043347
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.046063
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.003653


In [0]:
X_all = pd.concat([X_wt_rep, X_mut_rep, X_diff_rep], axis=1)
X_all.head()

Unnamed: 0,wt_rep_0,wt_rep_1,wt_rep_2,wt_rep_3,wt_rep_4,wt_rep_5,wt_rep_6,wt_rep_7,wt_rep_8,wt_rep_9,...,diff_rep_502,diff_rep_503,diff_rep_504,diff_rep_505,diff_rep_506,diff_rep_507,diff_rep_508,diff_rep_509,diff_rep_510,diff_rep_511
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.045481
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012801
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.043347
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.046063
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.003653


In [0]:
X_all = pd.concat([Mutation_Table[["Mutation"]], X_all], axis=1)
X_all.head()

Unnamed: 0,Mutation,wt_rep_0,wt_rep_1,wt_rep_2,wt_rep_3,wt_rep_4,wt_rep_5,wt_rep_6,wt_rep_7,wt_rep_8,...,diff_rep_502,diff_rep_503,diff_rep_504,diff_rep_505,diff_rep_506,diff_rep_507,diff_rep_508,diff_rep_509,diff_rep_510,diff_rep_511
0,1AKY@A@I213F,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.045481
1,1AKY@A@N169D,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012801
2,1AKY@A@Q48E,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.043347
3,1AKY@A@T110H,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.046063
4,1AKY@A@T77H,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.003653


In [0]:
X_diff_rep = pd.concat([Mutation_Table[["Mutation"]], X_diff_rep], axis=1)
X_diff_rep.head()

Unnamed: 0,Mutation,diff_rep_0,diff_rep_1,diff_rep_2,diff_rep_3,diff_rep_4,diff_rep_5,diff_rep_6,diff_rep_7,diff_rep_8,...,diff_rep_502,diff_rep_503,diff_rep_504,diff_rep_505,diff_rep_506,diff_rep_507,diff_rep_508,diff_rep_509,diff_rep_510,diff_rep_511
0,1AKY@A@I213F,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.045481
1,1AKY@A@N169D,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012801
2,1AKY@A@Q48E,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.043347
3,1AKY@A@T110H,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.046063
4,1AKY@A@T77H,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.003653


**Save dataframes:**

In [0]:
X_all.to_csv(data_target_location+"sequence_cnn_rep_all_default_01.csv", index=False)

In [0]:
X_diff_rep.to_csv(data_target_location+"sequence_cnn_rep_diff_default_01.csv", index=False)

### Default Model Second to Last Layer (2)

**Define Models:**

In [0]:
#original model
inputs = tf.keras.layers.Input(shape=(SEQUENCE_LEN,))

x = tf.keras.layers.Embedding(CLASSES, 8, input_length=SEQUENCE_LEN)(inputs) 

x = tf.keras.layers.Conv1D(256, 7, 2)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x_mid = tf.keras.layers.Activation("relu")(x)

x = tf.keras.layers.MaxPooling1D(3)(x_mid)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)

x_mid = tf.keras.layers.GlobalMaxPooling1D()(x_mid)
x = tf.keras.layers.GlobalMaxPooling1D()(x)

x = tf.keras.layers.concatenate([x_mid, x], axis=-1)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Dense(1, activation="linear")(x)

model_source = tf.keras.Model(inputs=inputs, outputs=x)
model_source.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            (None, 650)          0                                            
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, 650, 8)       168         input_3[0][0]                    
__________________________________________________________________________________________________
conv1d_12 (Conv1D)              (None, 322, 256)     14592       embedding_2[0][0]                
__________________________________________________________________________________________________
batch_normalization_v1_16 (Batc (None, 322, 256)     1024        conv1d_12[0][0]                  
__________________________________________________________________________________________________
activation

In [0]:
#truncated model
inputs = tf.keras.layers.Input(shape=(SEQUENCE_LEN,))

x = tf.keras.layers.Embedding(CLASSES, 8, input_length=SEQUENCE_LEN)(inputs) 

x = tf.keras.layers.Conv1D(256, 7, 2)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x_mid = tf.keras.layers.Activation("relu")(x)

x = tf.keras.layers.MaxPooling1D(3)(x_mid)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)

x_mid = tf.keras.layers.GlobalMaxPooling1D()(x_mid)
x = tf.keras.layers.GlobalMaxPooling1D()(x)

x = tf.keras.layers.concatenate([x_mid, x], axis=-1)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)

model = tf.keras.Model(inputs=inputs, outputs=x)
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            (None, 650)          0                                            
__________________________________________________________________________________________________
embedding_3 (Embedding)         (None, 650, 8)       168         input_4[0][0]                    
__________________________________________________________________________________________________
conv1d_18 (Conv1D)              (None, 322, 256)     14592       embedding_3[0][0]                
__________________________________________________________________________________________________
batch_normalization_v1_24 (Batc (None, 322, 256)     1024        conv1d_18[0][0]                  
__________________________________________________________________________________________________
activation

**Calculate representations:**

In [0]:
X_wt_rep = calc_pred_truncated_model(model, model_source, weight_loc, X_wt)
X_wt_rep[0][:10]

Calculating predictions:


array([0.8179297 , 0.        , 0.6262641 , 0.22631238, 0.78447473,
       1.4895139 , 0.        , 0.03177214, 0.        , 0.5299945 ],
      dtype=float32)

In [0]:
X_mut_rep = calc_pred_truncated_model(model, model_source, weight_loc, X_mut)
X_mut_rep[0][:10]

Calculating predictions:


array([0.8322854 , 0.        , 0.5803604 , 0.22289486, 0.9261756 ,
       1.5741313 , 0.        , 0.28255105, 0.        , 0.52822924],
      dtype=float32)

In [0]:
X_diff_rep = np.subtract(X_mut_rep, X_wt_rep)
X_diff_rep[0][:10]

array([ 0.01435572,  0.        , -0.04590368, -0.00341752,  0.14170086,
        0.08461738,  0.        ,  0.2507789 ,  0.        , -0.00176525],
      dtype=float32)

**Create dataframes:**

In [0]:
X_wt_rep = pd.DataFrame(X_wt_rep)
X_wt_rep.columns = list(map(lambda x: "wt_rep_"+str(x),X_wt_rep.columns))
X_wt_rep.head()

Unnamed: 0,wt_rep_0,wt_rep_1,wt_rep_2,wt_rep_3,wt_rep_4,wt_rep_5,wt_rep_6,wt_rep_7,wt_rep_8,wt_rep_9,...,wt_rep_502,wt_rep_503,wt_rep_504,wt_rep_505,wt_rep_506,wt_rep_507,wt_rep_508,wt_rep_509,wt_rep_510,wt_rep_511
0,0.81793,0.0,0.626264,0.226312,0.784475,1.489514,0.0,0.031772,0.0,0.529994,...,0.0,0.0,0.0,0.672851,0.0,2.373915,7.3e-05,1.148515,0.0,0.739182
1,0.81793,0.0,0.626264,0.226312,0.784475,1.489514,0.0,0.031772,0.0,0.529994,...,0.0,0.0,0.0,0.672851,0.0,2.373915,7.3e-05,1.148515,0.0,0.739182
2,0.81793,0.0,0.626264,0.226312,0.784475,1.489514,0.0,0.031772,0.0,0.529994,...,0.0,0.0,0.0,0.672851,0.0,2.373915,7.3e-05,1.148515,0.0,0.739182
3,0.81793,0.0,0.626264,0.226312,0.784475,1.489514,0.0,0.031772,0.0,0.529994,...,0.0,0.0,0.0,0.672851,0.0,2.373915,7.3e-05,1.148515,0.0,0.739182
4,0.81793,0.0,0.626264,0.226312,0.784475,1.489514,0.0,0.031772,0.0,0.529994,...,0.0,0.0,0.0,0.672851,0.0,2.373915,7.3e-05,1.148515,0.0,0.739182


In [0]:
X_mut_rep = pd.DataFrame(X_mut_rep)
X_mut_rep.columns = list(map(lambda x: "mut_rep_"+str(x),X_mut_rep.columns))
X_mut_rep.head()

Unnamed: 0,mut_rep_0,mut_rep_1,mut_rep_2,mut_rep_3,mut_rep_4,mut_rep_5,mut_rep_6,mut_rep_7,mut_rep_8,mut_rep_9,...,mut_rep_502,mut_rep_503,mut_rep_504,mut_rep_505,mut_rep_506,mut_rep_507,mut_rep_508,mut_rep_509,mut_rep_510,mut_rep_511
0,0.832285,0.0,0.58036,0.222895,0.926176,1.574131,0.0,0.282551,0.0,0.528229,...,0.0,0.0,0.0,0.759377,0.0,2.361954,0.0,1.168339,0.012785,0.669754
1,0.768922,0.0,0.555668,0.19406,0.835667,1.481572,0.0,0.0,0.0,0.431541,...,0.0,0.0,0.0,0.704316,0.0,2.393311,0.0,1.14889,0.016798,0.797758
2,0.494086,0.0,0.595339,0.0,0.806259,1.36455,0.0,0.056347,0.0,0.507511,...,0.0,0.0,0.0,0.594373,0.0,2.203408,0.0,1.080276,0.040969,0.70992
3,0.773166,0.0,0.724892,0.108154,0.74898,1.478188,0.0,0.068977,0.0,0.411394,...,0.0,0.0,0.0,0.571045,0.0,2.341264,0.095745,1.122434,0.0,0.73947
4,0.86804,0.0,0.55645,0.178904,0.769714,1.484469,0.0,0.0,0.0,0.503011,...,0.0,0.0,0.0,0.659532,0.0,2.278608,0.045472,1.124859,0.0,0.706146


In [0]:
X_diff_rep = pd.DataFrame(X_diff_rep)
X_diff_rep.columns = list(map(lambda x: "diff_rep_"+str(x),X_diff_rep.columns))
X_diff_rep.head()

Unnamed: 0,diff_rep_0,diff_rep_1,diff_rep_2,diff_rep_3,diff_rep_4,diff_rep_5,diff_rep_6,diff_rep_7,diff_rep_8,diff_rep_9,...,diff_rep_502,diff_rep_503,diff_rep_504,diff_rep_505,diff_rep_506,diff_rep_507,diff_rep_508,diff_rep_509,diff_rep_510,diff_rep_511
0,0.014356,0.0,-0.045904,-0.003418,0.141701,0.084617,0.0,0.250779,0.0,-0.001765,...,0.0,0.0,0.0,0.086527,0.0,-0.011961,-7.3e-05,0.019824,0.012785,-0.069428
1,-0.049008,0.0,-0.070596,-0.032252,0.051192,-0.007942,0.0,-0.031772,0.0,-0.098454,...,0.0,0.0,0.0,0.031465,0.0,0.019396,-7.3e-05,0.000375,0.016798,0.058576
2,-0.323844,0.0,-0.030926,-0.226312,0.021784,-0.124964,0.0,0.024575,0.0,-0.022483,...,0.0,0.0,0.0,-0.078477,0.0,-0.170506,-7.3e-05,-0.068239,0.040969,-0.029262
3,-0.044764,0.0,0.098628,-0.118158,-0.035495,-0.011326,0.0,0.037205,0.0,-0.1186,...,0.0,0.0,0.0,-0.101806,0.0,-0.032651,0.095672,-0.026081,0.0,0.000287
4,0.05011,0.0,-0.069814,-0.047409,-0.01476,-0.005044,0.0,-0.031772,0.0,-0.026983,...,0.0,0.0,0.0,-0.013319,0.0,-0.095306,0.045399,-0.023656,0.0,-0.033036


In [0]:
X_all = pd.concat([X_wt_rep, X_mut_rep, X_diff_rep], axis=1)
X_all.head()

Unnamed: 0,wt_rep_0,wt_rep_1,wt_rep_2,wt_rep_3,wt_rep_4,wt_rep_5,wt_rep_6,wt_rep_7,wt_rep_8,wt_rep_9,...,diff_rep_502,diff_rep_503,diff_rep_504,diff_rep_505,diff_rep_506,diff_rep_507,diff_rep_508,diff_rep_509,diff_rep_510,diff_rep_511
0,0.81793,0.0,0.626264,0.226312,0.784475,1.489514,0.0,0.031772,0.0,0.529994,...,0.0,0.0,0.0,0.086527,0.0,-0.011961,-7.3e-05,0.019824,0.012785,-0.069428
1,0.81793,0.0,0.626264,0.226312,0.784475,1.489514,0.0,0.031772,0.0,0.529994,...,0.0,0.0,0.0,0.031465,0.0,0.019396,-7.3e-05,0.000375,0.016798,0.058576
2,0.81793,0.0,0.626264,0.226312,0.784475,1.489514,0.0,0.031772,0.0,0.529994,...,0.0,0.0,0.0,-0.078477,0.0,-0.170506,-7.3e-05,-0.068239,0.040969,-0.029262
3,0.81793,0.0,0.626264,0.226312,0.784475,1.489514,0.0,0.031772,0.0,0.529994,...,0.0,0.0,0.0,-0.101806,0.0,-0.032651,0.095672,-0.026081,0.0,0.000287
4,0.81793,0.0,0.626264,0.226312,0.784475,1.489514,0.0,0.031772,0.0,0.529994,...,0.0,0.0,0.0,-0.013319,0.0,-0.095306,0.045399,-0.023656,0.0,-0.033036


In [0]:
X_all = pd.concat([Mutation_Table[["Mutation"]], X_all], axis=1)
X_all.head()

Unnamed: 0,Mutation,wt_rep_0,wt_rep_1,wt_rep_2,wt_rep_3,wt_rep_4,wt_rep_5,wt_rep_6,wt_rep_7,wt_rep_8,...,diff_rep_502,diff_rep_503,diff_rep_504,diff_rep_505,diff_rep_506,diff_rep_507,diff_rep_508,diff_rep_509,diff_rep_510,diff_rep_511
0,1AKY@A@I213F,0.81793,0.0,0.626264,0.226312,0.784475,1.489514,0.0,0.031772,0.0,...,0.0,0.0,0.0,0.086527,0.0,-0.011961,-7.3e-05,0.019824,0.012785,-0.069428
1,1AKY@A@N169D,0.81793,0.0,0.626264,0.226312,0.784475,1.489514,0.0,0.031772,0.0,...,0.0,0.0,0.0,0.031465,0.0,0.019396,-7.3e-05,0.000375,0.016798,0.058576
2,1AKY@A@Q48E,0.81793,0.0,0.626264,0.226312,0.784475,1.489514,0.0,0.031772,0.0,...,0.0,0.0,0.0,-0.078477,0.0,-0.170506,-7.3e-05,-0.068239,0.040969,-0.029262
3,1AKY@A@T110H,0.81793,0.0,0.626264,0.226312,0.784475,1.489514,0.0,0.031772,0.0,...,0.0,0.0,0.0,-0.101806,0.0,-0.032651,0.095672,-0.026081,0.0,0.000287
4,1AKY@A@T77H,0.81793,0.0,0.626264,0.226312,0.784475,1.489514,0.0,0.031772,0.0,...,0.0,0.0,0.0,-0.013319,0.0,-0.095306,0.045399,-0.023656,0.0,-0.033036


In [0]:
X_diff_rep = pd.concat([Mutation_Table[["Mutation"]], X_diff_rep], axis=1)
X_diff_rep.head()

Unnamed: 0,Mutation,diff_rep_0,diff_rep_1,diff_rep_2,diff_rep_3,diff_rep_4,diff_rep_5,diff_rep_6,diff_rep_7,diff_rep_8,...,diff_rep_502,diff_rep_503,diff_rep_504,diff_rep_505,diff_rep_506,diff_rep_507,diff_rep_508,diff_rep_509,diff_rep_510,diff_rep_511
0,1AKY@A@I213F,0.014356,0.0,-0.045904,-0.003418,0.141701,0.084617,0.0,0.250779,0.0,...,0.0,0.0,0.0,0.086527,0.0,-0.011961,-7.3e-05,0.019824,0.012785,-0.069428
1,1AKY@A@N169D,-0.049008,0.0,-0.070596,-0.032252,0.051192,-0.007942,0.0,-0.031772,0.0,...,0.0,0.0,0.0,0.031465,0.0,0.019396,-7.3e-05,0.000375,0.016798,0.058576
2,1AKY@A@Q48E,-0.323844,0.0,-0.030926,-0.226312,0.021784,-0.124964,0.0,0.024575,0.0,...,0.0,0.0,0.0,-0.078477,0.0,-0.170506,-7.3e-05,-0.068239,0.040969,-0.029262
3,1AKY@A@T110H,-0.044764,0.0,0.098628,-0.118158,-0.035495,-0.011326,0.0,0.037205,0.0,...,0.0,0.0,0.0,-0.101806,0.0,-0.032651,0.095672,-0.026081,0.0,0.000287
4,1AKY@A@T77H,0.05011,0.0,-0.069814,-0.047409,-0.01476,-0.005044,0.0,-0.031772,0.0,...,0.0,0.0,0.0,-0.013319,0.0,-0.095306,0.045399,-0.023656,0.0,-0.033036


**Save dataframes:**

In [0]:
X_all.to_csv(data_target_location+"sequence_cnn_rep_all_default_02.csv", index=False)

In [0]:
X_diff_rep.to_csv(data_target_location+"sequence_cnn_rep_diff_default_02.csv", index=False)

### Default Model Third to Last Layer (3)

**Define Models:**

In [0]:
#original model
inputs = tf.keras.layers.Input(shape=(SEQUENCE_LEN,))

x = tf.keras.layers.Embedding(CLASSES, 8, input_length=SEQUENCE_LEN)(inputs) 

x = tf.keras.layers.Conv1D(256, 7, 2)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x_mid = tf.keras.layers.Activation("relu")(x)

x = tf.keras.layers.MaxPooling1D(3)(x_mid)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)

x_mid = tf.keras.layers.GlobalMaxPooling1D()(x_mid)
x = tf.keras.layers.GlobalMaxPooling1D()(x)

x = tf.keras.layers.concatenate([x_mid, x], axis=-1)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Dense(1, activation="linear")(x)

model_source = tf.keras.Model(inputs=inputs, outputs=x)
model_source.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_5 (InputLayer)            (None, 650)          0                                            
__________________________________________________________________________________________________
embedding_4 (Embedding)         (None, 650, 8)       168         input_5[0][0]                    
__________________________________________________________________________________________________
conv1d_24 (Conv1D)              (None, 322, 256)     14592       embedding_4[0][0]                
__________________________________________________________________________________________________
batch_normalization_v1_31 (Batc (None, 322, 256)     1024        conv1d_24[0][0]                  
__________________________________________________________________________________________________
activation

In [0]:
#truncated model
inputs = tf.keras.layers.Input(shape=(SEQUENCE_LEN,))

x = tf.keras.layers.Embedding(CLASSES, 8, input_length=SEQUENCE_LEN)(inputs) 

x = tf.keras.layers.Conv1D(256, 7, 2)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x_mid = tf.keras.layers.Activation("relu")(x)

x = tf.keras.layers.MaxPooling1D(3)(x_mid)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)

x_mid = tf.keras.layers.GlobalMaxPooling1D()(x_mid)
x = tf.keras.layers.GlobalMaxPooling1D()(x)

x = tf.keras.layers.concatenate([x_mid, x], axis=-1)

model = tf.keras.Model(inputs=inputs, outputs=x)
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_6 (InputLayer)            (None, 650)          0                                            
__________________________________________________________________________________________________
embedding_5 (Embedding)         (None, 650, 8)       168         input_6[0][0]                    
__________________________________________________________________________________________________
conv1d_30 (Conv1D)              (None, 322, 256)     14592       embedding_5[0][0]                
__________________________________________________________________________________________________
batch_normalization_v1_39 (Batc (None, 322, 256)     1024        conv1d_30[0][0]                  
__________________________________________________________________________________________________
activation

**Calculate representations:**

In [0]:
X_wt_rep = calc_pred_truncated_model(model, model_source, weight_loc, X_wt)
X_wt_rep[0][:10]

Calculating predictions:


array([3.554784 , 3.280347 , 3.6189182, 2.98941  , 3.0017288, 5.001732 ,
       2.9027479, 2.1182196, 3.1502807, 3.4515576], dtype=float32)

In [0]:
X_mut_rep = calc_pred_truncated_model(model, model_source, weight_loc, X_mut)
X_mut_rep[0][:10]

Calculating predictions:


array([3.554784 , 3.280347 , 3.1969278, 2.98941  , 3.0017288, 5.001732 ,
       2.9027479, 2.1182196, 3.1502807, 3.4515576], dtype=float32)

In [0]:
X_diff_rep = np.subtract(X_mut_rep, X_wt_rep)
X_diff_rep[0][-10:]

array([ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.04222178,  0.        , -0.2061367 ,  0.        ],
      dtype=float32)

**Create dataframes:**

In [0]:
X_wt_rep = pd.DataFrame(X_wt_rep)
X_wt_rep.columns = list(map(lambda x: "wt_rep_"+str(x),X_wt_rep.columns))
X_wt_rep.head()

Unnamed: 0,wt_rep_0,wt_rep_1,wt_rep_2,wt_rep_3,wt_rep_4,wt_rep_5,wt_rep_6,wt_rep_7,wt_rep_8,wt_rep_9,...,wt_rep_758,wt_rep_759,wt_rep_760,wt_rep_761,wt_rep_762,wt_rep_763,wt_rep_764,wt_rep_765,wt_rep_766,wt_rep_767
0,3.554784,3.280347,3.618918,2.98941,3.001729,5.001732,2.902748,2.11822,3.150281,3.451558,...,3.359161,1.316831,0.0,1.055476,1.873763,2.145716,1.976812,3.575752,3.789693,1.999177
1,3.554784,3.280347,3.618918,2.98941,3.001729,5.001732,2.902748,2.11822,3.150281,3.451558,...,3.359161,1.316831,0.0,1.055476,1.873763,2.145716,1.976812,3.575752,3.789693,1.999177
2,3.554784,3.280347,3.618918,2.98941,3.001729,5.001732,2.902748,2.11822,3.150281,3.451558,...,3.359161,1.316831,0.0,1.055476,1.873763,2.145716,1.976812,3.575752,3.789693,1.999177
3,3.554784,3.280347,3.618918,2.98941,3.001729,5.001732,2.902748,2.11822,3.150281,3.451558,...,3.359161,1.316831,0.0,1.055476,1.873763,2.145716,1.976812,3.575752,3.789693,1.999177
4,3.554784,3.280347,3.618918,2.98941,3.001729,5.001732,2.902748,2.11822,3.150281,3.451558,...,3.359161,1.316831,0.0,1.055476,1.873763,2.145716,1.976812,3.575752,3.789693,1.999177


In [0]:
X_mut_rep = pd.DataFrame(X_mut_rep)
X_mut_rep.columns = list(map(lambda x: "mut_rep_"+str(x),X_mut_rep.columns))
X_mut_rep.head()

Unnamed: 0,mut_rep_0,mut_rep_1,mut_rep_2,mut_rep_3,mut_rep_4,mut_rep_5,mut_rep_6,mut_rep_7,mut_rep_8,mut_rep_9,...,mut_rep_758,mut_rep_759,mut_rep_760,mut_rep_761,mut_rep_762,mut_rep_763,mut_rep_764,mut_rep_765,mut_rep_766,mut_rep_767
0,3.554784,3.280347,3.196928,2.98941,3.001729,5.001732,2.902748,2.11822,3.150281,3.451558,...,3.359161,1.316831,0.0,1.055476,1.873763,2.145716,2.019033,3.575752,3.583557,1.999177
1,3.554784,3.280347,3.618918,2.98941,3.001729,5.001732,2.902748,2.11822,3.150281,3.451558,...,3.357924,1.316831,0.0,1.055476,1.873763,2.145716,1.951526,3.575752,3.789693,1.999177
2,3.554784,3.280347,3.618918,2.98941,3.001729,5.001732,2.902748,1.81807,3.150281,3.451558,...,3.359161,1.316831,0.0,1.055476,1.873763,2.185869,2.091187,3.575752,3.789693,1.999177
3,3.554784,3.280347,3.618918,2.98941,3.001729,5.001732,2.902748,2.11822,2.561329,3.451558,...,3.359161,1.316831,0.068523,0.95235,1.883678,2.145716,1.976812,3.575752,3.789693,2.039857
4,3.554784,3.280347,3.618918,2.98941,3.001729,5.001732,2.902748,2.11822,3.150281,3.451558,...,3.359161,1.32789,0.0,1.055476,1.793253,2.145716,1.976812,3.81483,3.789693,2.302211


In [0]:
X_diff_rep = pd.DataFrame(X_diff_rep)
X_diff_rep.columns = list(map(lambda x: "diff_rep_"+str(x),X_diff_rep.columns))
X_diff_rep.head()

Unnamed: 0,diff_rep_0,diff_rep_1,diff_rep_2,diff_rep_3,diff_rep_4,diff_rep_5,diff_rep_6,diff_rep_7,diff_rep_8,diff_rep_9,...,diff_rep_758,diff_rep_759,diff_rep_760,diff_rep_761,diff_rep_762,diff_rep_763,diff_rep_764,diff_rep_765,diff_rep_766,diff_rep_767
0,0.0,0.0,-0.42199,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.042222,0.0,-0.206137,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.001237,0.0,0.0,0.0,0.0,0.0,-0.025286,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.300149,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.040153,0.114376,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.588952,0.0,...,0.0,0.0,0.068523,-0.103127,0.009915,0.0,0.0,0.0,0.0,0.04068
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.011059,0.0,0.0,-0.080509,0.0,0.0,0.239078,0.0,0.303034


In [0]:
X_all = pd.concat([X_wt_rep, X_mut_rep, X_diff_rep], axis=1)
X_all.head()

Unnamed: 0,wt_rep_0,wt_rep_1,wt_rep_2,wt_rep_3,wt_rep_4,wt_rep_5,wt_rep_6,wt_rep_7,wt_rep_8,wt_rep_9,...,diff_rep_758,diff_rep_759,diff_rep_760,diff_rep_761,diff_rep_762,diff_rep_763,diff_rep_764,diff_rep_765,diff_rep_766,diff_rep_767
0,3.554784,3.280347,3.618918,2.98941,3.001729,5.001732,2.902748,2.11822,3.150281,3.451558,...,0.0,0.0,0.0,0.0,0.0,0.0,0.042222,0.0,-0.206137,0.0
1,3.554784,3.280347,3.618918,2.98941,3.001729,5.001732,2.902748,2.11822,3.150281,3.451558,...,-0.001237,0.0,0.0,0.0,0.0,0.0,-0.025286,0.0,0.0,0.0
2,3.554784,3.280347,3.618918,2.98941,3.001729,5.001732,2.902748,2.11822,3.150281,3.451558,...,0.0,0.0,0.0,0.0,0.0,0.040153,0.114376,0.0,0.0,0.0
3,3.554784,3.280347,3.618918,2.98941,3.001729,5.001732,2.902748,2.11822,3.150281,3.451558,...,0.0,0.0,0.068523,-0.103127,0.009915,0.0,0.0,0.0,0.0,0.04068
4,3.554784,3.280347,3.618918,2.98941,3.001729,5.001732,2.902748,2.11822,3.150281,3.451558,...,0.0,0.011059,0.0,0.0,-0.080509,0.0,0.0,0.239078,0.0,0.303034


In [0]:
X_all = pd.concat([Mutation_Table[["Mutation"]], X_all], axis=1)
X_all.head()

Unnamed: 0,Mutation,wt_rep_0,wt_rep_1,wt_rep_2,wt_rep_3,wt_rep_4,wt_rep_5,wt_rep_6,wt_rep_7,wt_rep_8,...,diff_rep_758,diff_rep_759,diff_rep_760,diff_rep_761,diff_rep_762,diff_rep_763,diff_rep_764,diff_rep_765,diff_rep_766,diff_rep_767
0,1AKY@A@I213F,3.554784,3.280347,3.618918,2.98941,3.001729,5.001732,2.902748,2.11822,3.150281,...,0.0,0.0,0.0,0.0,0.0,0.0,0.042222,0.0,-0.206137,0.0
1,1AKY@A@N169D,3.554784,3.280347,3.618918,2.98941,3.001729,5.001732,2.902748,2.11822,3.150281,...,-0.001237,0.0,0.0,0.0,0.0,0.0,-0.025286,0.0,0.0,0.0
2,1AKY@A@Q48E,3.554784,3.280347,3.618918,2.98941,3.001729,5.001732,2.902748,2.11822,3.150281,...,0.0,0.0,0.0,0.0,0.0,0.040153,0.114376,0.0,0.0,0.0
3,1AKY@A@T110H,3.554784,3.280347,3.618918,2.98941,3.001729,5.001732,2.902748,2.11822,3.150281,...,0.0,0.0,0.068523,-0.103127,0.009915,0.0,0.0,0.0,0.0,0.04068
4,1AKY@A@T77H,3.554784,3.280347,3.618918,2.98941,3.001729,5.001732,2.902748,2.11822,3.150281,...,0.0,0.011059,0.0,0.0,-0.080509,0.0,0.0,0.239078,0.0,0.303034


In [0]:
X_diff_rep = pd.concat([Mutation_Table[["Mutation"]], X_diff_rep], axis=1)
X_diff_rep.head()

Unnamed: 0,Mutation,diff_rep_0,diff_rep_1,diff_rep_2,diff_rep_3,diff_rep_4,diff_rep_5,diff_rep_6,diff_rep_7,diff_rep_8,...,diff_rep_758,diff_rep_759,diff_rep_760,diff_rep_761,diff_rep_762,diff_rep_763,diff_rep_764,diff_rep_765,diff_rep_766,diff_rep_767
0,1AKY@A@I213F,0.0,0.0,-0.42199,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.042222,0.0,-0.206137,0.0
1,1AKY@A@N169D,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.001237,0.0,0.0,0.0,0.0,0.0,-0.025286,0.0,0.0,0.0
2,1AKY@A@Q48E,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.300149,0.0,...,0.0,0.0,0.0,0.0,0.0,0.040153,0.114376,0.0,0.0,0.0
3,1AKY@A@T110H,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.588952,...,0.0,0.0,0.068523,-0.103127,0.009915,0.0,0.0,0.0,0.0,0.04068
4,1AKY@A@T77H,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.011059,0.0,0.0,-0.080509,0.0,0.0,0.239078,0.0,0.303034


**Save dataframes:**

In [0]:
X_all.to_csv(data_target_location+"sequence_cnn_rep_all_default_03.csv", index=False)

In [0]:
X_diff_rep.to_csv(data_target_location+"sequence_cnn_rep_diff_default_03.csv", index=False)

### 64 Model Last Layer (1)

**Define Models:**

In [0]:
#original model
inputs = tf.keras.layers.Input(shape=(SEQUENCE_LEN,))

x = tf.keras.layers.Embedding(CLASSES, 8, input_length=SEQUENCE_LEN)(inputs) 

x = tf.keras.layers.Conv1D(256, 7, 2)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x_mid = tf.keras.layers.Activation("relu")(x)

x = tf.keras.layers.MaxPooling1D(3)(x_mid)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)

x_mid = tf.keras.layers.GlobalMaxPooling1D()(x_mid)
x = tf.keras.layers.GlobalMaxPooling1D()(x)

x = tf.keras.layers.concatenate([x_mid, x], axis=-1)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Dense(64)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Dense(1, activation="linear")(x)

model_source = tf.keras.Model(inputs=inputs, outputs=x)
model_source.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_11 (InputLayer)           (None, 650)          0                                            
__________________________________________________________________________________________________
embedding_10 (Embedding)        (None, 650, 8)       168         input_11[0][0]                   
__________________________________________________________________________________________________
conv1d_60 (Conv1D)              (None, 322, 256)     14592       embedding_10[0][0]               
__________________________________________________________________________________________________
batch_normalization_v1_81 (Batc (None, 322, 256)     1024        conv1d_60[0][0]                  
__________________________________________________________________________________________________
activation

In [0]:
#truncated model
inputs = tf.keras.layers.Input(shape=(SEQUENCE_LEN,))

x = tf.keras.layers.Embedding(CLASSES, 8, input_length=SEQUENCE_LEN)(inputs) 

x = tf.keras.layers.Conv1D(256, 7, 2)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x_mid = tf.keras.layers.Activation("relu")(x)

x = tf.keras.layers.MaxPooling1D(3)(x_mid)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)

x_mid = tf.keras.layers.GlobalMaxPooling1D()(x_mid)
x = tf.keras.layers.GlobalMaxPooling1D()(x)

x = tf.keras.layers.concatenate([x_mid, x], axis=-1)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Dense(64)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)

model = tf.keras.Model(inputs=inputs, outputs=x)
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_12 (InputLayer)           (None, 650)          0                                            
__________________________________________________________________________________________________
embedding_11 (Embedding)        (None, 650, 8)       168         input_12[0][0]                   
__________________________________________________________________________________________________
conv1d_66 (Conv1D)              (None, 322, 256)     14592       embedding_11[0][0]               
__________________________________________________________________________________________________
batch_normalization_v1_90 (Batc (None, 322, 256)     1024        conv1d_66[0][0]                  
__________________________________________________________________________________________________
activation

**Calculate representations:**

In [0]:
X_wt_rep = calc_pred_truncated_model(model, model_source, weight_loc_64, X_wt)
X_wt_rep[0][:10]

Calculating predictions:


array([0.        , 0.36767685, 0.28666687, 0.16953684, 0.        ,
       0.36832157, 0.        , 0.        , 0.        , 0.        ],
      dtype=float32)

In [0]:
X_mut_rep = calc_pred_truncated_model(model, model_source, weight_loc_64, X_mut)
X_mut_rep[0][:10]

Calculating predictions:


array([0.        , 0.39708775, 0.2870831 , 0.16146907, 0.        ,
       0.36126408, 0.        , 0.        , 0.        , 0.        ],
      dtype=float32)

In [0]:
X_diff_rep = np.subtract(X_mut_rep, X_wt_rep)
X_diff_rep[0][-10:]

array([ 0.        ,  0.01239872,  0.        ,  0.00441167,  0.        ,
        0.00545901,  0.        ,  0.        , -0.01483226,  0.        ],
      dtype=float32)

**Create dataframes:**

In [0]:
X_wt_rep = pd.DataFrame(X_wt_rep)
X_wt_rep.columns = list(map(lambda x: "wt_rep_"+str(x),X_wt_rep.columns))
X_wt_rep.head()

Unnamed: 0,wt_rep_0,wt_rep_1,wt_rep_2,wt_rep_3,wt_rep_4,wt_rep_5,wt_rep_6,wt_rep_7,wt_rep_8,wt_rep_9,...,wt_rep_54,wt_rep_55,wt_rep_56,wt_rep_57,wt_rep_58,wt_rep_59,wt_rep_60,wt_rep_61,wt_rep_62,wt_rep_63
0,0.0,0.367677,0.286667,0.169537,0.0,0.368322,0.0,0.0,0.0,0.0,...,0.0,0.264639,0.0,0.411715,0.0,0.833764,0.0,0.0,0.332689,0.0
1,0.0,0.367677,0.286667,0.169537,0.0,0.368322,0.0,0.0,0.0,0.0,...,0.0,0.264639,0.0,0.411715,0.0,0.833764,0.0,0.0,0.332689,0.0
2,0.0,0.367677,0.286667,0.169537,0.0,0.368322,0.0,0.0,0.0,0.0,...,0.0,0.264639,0.0,0.411715,0.0,0.833764,0.0,0.0,0.332689,0.0
3,0.0,0.367677,0.286667,0.169537,0.0,0.368322,0.0,0.0,0.0,0.0,...,0.0,0.264639,0.0,0.411715,0.0,0.833764,0.0,0.0,0.332689,0.0
4,0.0,0.367677,0.286667,0.169537,0.0,0.368322,0.0,0.0,0.0,0.0,...,0.0,0.264639,0.0,0.411715,0.0,0.833764,0.0,0.0,0.332689,0.0


In [0]:
X_mut_rep = pd.DataFrame(X_mut_rep)
X_mut_rep.columns = list(map(lambda x: "mut_rep_"+str(x),X_mut_rep.columns))
X_mut_rep.head()

Unnamed: 0,mut_rep_0,mut_rep_1,mut_rep_2,mut_rep_3,mut_rep_4,mut_rep_5,mut_rep_6,mut_rep_7,mut_rep_8,mut_rep_9,...,mut_rep_54,mut_rep_55,mut_rep_56,mut_rep_57,mut_rep_58,mut_rep_59,mut_rep_60,mut_rep_61,mut_rep_62,mut_rep_63
0,0.0,0.397088,0.287083,0.161469,0.0,0.361264,0.0,0.0,0.0,0.0,...,0.0,0.277038,0.0,0.416127,0.0,0.839223,0.0,0.0,0.317856,0.0
1,0.0,0.294428,0.268043,0.168872,0.0,0.347109,0.0,0.0,0.0,0.0,...,0.0,0.238613,0.0,0.38426,0.0,0.812755,0.0,0.0,0.33955,0.0
2,0.0,0.321337,0.289027,0.129028,0.0,0.472738,0.0,0.0,0.0,0.0,...,0.0,0.253139,0.0,0.415389,0.0,0.937984,0.0,0.0,0.341798,0.0
3,0.0,0.346672,0.273652,0.200603,0.0,0.337886,0.0,0.0,0.0,0.0,...,0.0,0.253443,0.0,0.392829,0.0,0.780872,0.0,0.0,0.308488,0.0
4,0.0,0.354769,0.278331,0.134663,0.0,0.394717,0.0,0.0,0.0,0.0,...,0.0,0.293581,0.0,0.389738,0.0,0.866551,0.0,0.0,0.293931,0.0


In [0]:
X_diff_rep = pd.DataFrame(X_diff_rep)
X_diff_rep.columns = list(map(lambda x: "diff_rep_"+str(x),X_diff_rep.columns))
X_diff_rep.head()

Unnamed: 0,diff_rep_0,diff_rep_1,diff_rep_2,diff_rep_3,diff_rep_4,diff_rep_5,diff_rep_6,diff_rep_7,diff_rep_8,diff_rep_9,...,diff_rep_54,diff_rep_55,diff_rep_56,diff_rep_57,diff_rep_58,diff_rep_59,diff_rep_60,diff_rep_61,diff_rep_62,diff_rep_63
0,0.0,0.029411,0.000416,-0.008068,0.0,-0.007057,0.0,0.0,0.0,0.0,...,0.0,0.012399,0.0,0.004412,0.0,0.005459,0.0,0.0,-0.014832,0.0
1,0.0,-0.073249,-0.018624,-0.000665,0.0,-0.021213,0.0,0.0,0.0,0.0,...,0.0,-0.026027,0.0,-0.027456,0.0,-0.021009,0.0,0.0,0.006861,0.0
2,0.0,-0.04634,0.00236,-0.040509,0.0,0.104416,0.0,0.0,0.0,0.0,...,0.0,-0.0115,0.0,0.003673,0.0,0.10422,0.0,0.0,0.009109,0.0
3,0.0,-0.021005,-0.013015,0.031066,0.0,-0.030435,0.0,0.0,0.0,0.0,...,0.0,-0.011197,0.0,-0.018886,0.0,-0.052893,0.0,0.0,-0.0242,0.0
4,0.0,-0.012908,-0.008336,-0.034874,0.0,0.026395,0.0,0.0,0.0,0.0,...,0.0,0.028941,0.0,-0.021977,0.0,0.032787,0.0,0.0,-0.038757,0.0


In [0]:
X_all = pd.concat([X_wt_rep, X_mut_rep, X_diff_rep], axis=1)
X_all.head()

Unnamed: 0,wt_rep_0,wt_rep_1,wt_rep_2,wt_rep_3,wt_rep_4,wt_rep_5,wt_rep_6,wt_rep_7,wt_rep_8,wt_rep_9,...,diff_rep_54,diff_rep_55,diff_rep_56,diff_rep_57,diff_rep_58,diff_rep_59,diff_rep_60,diff_rep_61,diff_rep_62,diff_rep_63
0,0.0,0.367677,0.286667,0.169537,0.0,0.368322,0.0,0.0,0.0,0.0,...,0.0,0.012399,0.0,0.004412,0.0,0.005459,0.0,0.0,-0.014832,0.0
1,0.0,0.367677,0.286667,0.169537,0.0,0.368322,0.0,0.0,0.0,0.0,...,0.0,-0.026027,0.0,-0.027456,0.0,-0.021009,0.0,0.0,0.006861,0.0
2,0.0,0.367677,0.286667,0.169537,0.0,0.368322,0.0,0.0,0.0,0.0,...,0.0,-0.0115,0.0,0.003673,0.0,0.10422,0.0,0.0,0.009109,0.0
3,0.0,0.367677,0.286667,0.169537,0.0,0.368322,0.0,0.0,0.0,0.0,...,0.0,-0.011197,0.0,-0.018886,0.0,-0.052893,0.0,0.0,-0.0242,0.0
4,0.0,0.367677,0.286667,0.169537,0.0,0.368322,0.0,0.0,0.0,0.0,...,0.0,0.028941,0.0,-0.021977,0.0,0.032787,0.0,0.0,-0.038757,0.0


In [0]:
X_all = pd.concat([Mutation_Table[["Mutation"]], X_all], axis=1)
X_all.head()

Unnamed: 0,Mutation,wt_rep_0,wt_rep_1,wt_rep_2,wt_rep_3,wt_rep_4,wt_rep_5,wt_rep_6,wt_rep_7,wt_rep_8,...,diff_rep_54,diff_rep_55,diff_rep_56,diff_rep_57,diff_rep_58,diff_rep_59,diff_rep_60,diff_rep_61,diff_rep_62,diff_rep_63
0,1AKY@A@I213F,0.0,0.367677,0.286667,0.169537,0.0,0.368322,0.0,0.0,0.0,...,0.0,0.012399,0.0,0.004412,0.0,0.005459,0.0,0.0,-0.014832,0.0
1,1AKY@A@N169D,0.0,0.367677,0.286667,0.169537,0.0,0.368322,0.0,0.0,0.0,...,0.0,-0.026027,0.0,-0.027456,0.0,-0.021009,0.0,0.0,0.006861,0.0
2,1AKY@A@Q48E,0.0,0.367677,0.286667,0.169537,0.0,0.368322,0.0,0.0,0.0,...,0.0,-0.0115,0.0,0.003673,0.0,0.10422,0.0,0.0,0.009109,0.0
3,1AKY@A@T110H,0.0,0.367677,0.286667,0.169537,0.0,0.368322,0.0,0.0,0.0,...,0.0,-0.011197,0.0,-0.018886,0.0,-0.052893,0.0,0.0,-0.0242,0.0
4,1AKY@A@T77H,0.0,0.367677,0.286667,0.169537,0.0,0.368322,0.0,0.0,0.0,...,0.0,0.028941,0.0,-0.021977,0.0,0.032787,0.0,0.0,-0.038757,0.0


In [0]:
X_diff_rep = pd.concat([Mutation_Table[["Mutation"]], X_diff_rep], axis=1)
X_diff_rep.head()

Unnamed: 0,Mutation,diff_rep_0,diff_rep_1,diff_rep_2,diff_rep_3,diff_rep_4,diff_rep_5,diff_rep_6,diff_rep_7,diff_rep_8,...,diff_rep_54,diff_rep_55,diff_rep_56,diff_rep_57,diff_rep_58,diff_rep_59,diff_rep_60,diff_rep_61,diff_rep_62,diff_rep_63
0,1AKY@A@I213F,0.0,0.029411,0.000416,-0.008068,0.0,-0.007057,0.0,0.0,0.0,...,0.0,0.012399,0.0,0.004412,0.0,0.005459,0.0,0.0,-0.014832,0.0
1,1AKY@A@N169D,0.0,-0.073249,-0.018624,-0.000665,0.0,-0.021213,0.0,0.0,0.0,...,0.0,-0.026027,0.0,-0.027456,0.0,-0.021009,0.0,0.0,0.006861,0.0
2,1AKY@A@Q48E,0.0,-0.04634,0.00236,-0.040509,0.0,0.104416,0.0,0.0,0.0,...,0.0,-0.0115,0.0,0.003673,0.0,0.10422,0.0,0.0,0.009109,0.0
3,1AKY@A@T110H,0.0,-0.021005,-0.013015,0.031066,0.0,-0.030435,0.0,0.0,0.0,...,0.0,-0.011197,0.0,-0.018886,0.0,-0.052893,0.0,0.0,-0.0242,0.0
4,1AKY@A@T77H,0.0,-0.012908,-0.008336,-0.034874,0.0,0.026395,0.0,0.0,0.0,...,0.0,0.028941,0.0,-0.021977,0.0,0.032787,0.0,0.0,-0.038757,0.0


**Save dataframes:**

In [0]:
X_all.to_csv(data_target_location+"sequence_cnn_rep_all_64_01.csv", index=False)

In [0]:
X_diff_rep.to_csv(data_target_location+"sequence_cnn_rep_diff_64_01.csv", index=False)