The intention of this notebook is to use the hypertuned CNNs from BacDive+ that were selected for transfer learning and use the sequences from Merck&Co+ T96 and extract their CNN representations (mutant, wildtype (and difference) at different layers)

## Set up notebook and environment: ##

### Connect to google drive: ###

In [0]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.image as mpimg
from keras.utils import plot_model
from sklearn import preprocessing
import gc

Using TensorFlow backend.


In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


### Specify location of all relevant data: (YOU HAVE TO INSERT YOUR FILE LOCATIONS) ####

In [0]:
print("Check correctnes of locations: ")
data_folder_location = "gdrive/My Drive/iGEM/Databases/Merck&Co/Data/T96/sequence_sampler_T96/"
data_target_location = "gdrive/My Drive/iGEM/Databases/Merck&Co/Data/T96/cnn_rep_sampler_T96/"
print("Folders: ")
print("\t",os.path.isdir(data_folder_location))
print("\t",os.path.isdir(data_target_location))
print("Files: ")
X_wt_loc= data_folder_location+"T96_X_wild.npy"
X_mut_loc = data_folder_location+"T96_X_mut.npy"
Mutation_Table_loc = data_folder_location+"T96_sequences_only.csv"
weight_loc = "gdrive/My Drive/iGEM/Databases/BacDive/Models/CNN_Regressor_Transfer_Learning/model_transfer_learning_default.hdf5"
weight_loc_64 = "gdrive/My Drive/iGEM/Databases/BacDive/Models/CNN_Regressor_Transfer_Learning/model_transfer_learning_default_64.hdf5"
print("\t",os.path.isfile(X_wt_loc))
print("\t",os.path.isfile(X_mut_loc))
print("\t",os.path.isfile(Mutation_Table_loc))
print("\t",os.path.isfile(weight_loc))
print("\t",os.path.isfile(weight_loc_64))

Check correctnes of locations: 
Folders: 
	 True
	 True
Files: 
	 True
	 True
	 True
	 True
	 True


### Retrieve Data 

In [0]:
X_wt = np.load(X_wt_loc)

In [0]:
X_mut = np.load(X_mut_loc)

In [0]:
Mutation_Table = pd.read_csv(Mutation_Table_loc)

### Supplementary functions

In [0]:
def calc_pred_truncated_model(model, model_source, weight_loc, X):  
  #Compile the models
  model.compile(
    optimizer=tf.train.AdamOptimizer(), 
    loss=tf.keras.losses.mean_squared_error,
    metrics=['mse']
  )
  model_source.compile(
    optimizer=tf.train.AdamOptimizer(), 
    loss=tf.keras.losses.mean_squared_error,
    metrics=['mse']
  )
  
  model_source.load_weights(weight_loc)
  
  #transfer appropriate weights
  for i in range(len(model.layers)):
    model.layers[i].set_weights(model_source.layers[i].get_weights())
  
  print("Calculating predictions:")
  y_pred = model.predict(X, verbose=1)
    
  return y_pred

## Extract representation


In [0]:
SEQUENCE_LEN = 650
CLASSES = 21 #now that sequences with non cannonical AAs removed there are 20 left and "0" encoding "None"

### Default Model Last Layer (1)

**Define Models:**

In [0]:
#original model
inputs = tf.keras.layers.Input(shape=(SEQUENCE_LEN,))

x = tf.keras.layers.Embedding(CLASSES, 8, input_length=SEQUENCE_LEN)(inputs) 

x = tf.keras.layers.Conv1D(256, 7, 2)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x_mid = tf.keras.layers.Activation("relu")(x)

x = tf.keras.layers.MaxPooling1D(3)(x_mid)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)

x_mid = tf.keras.layers.GlobalMaxPooling1D()(x_mid)
x = tf.keras.layers.GlobalMaxPooling1D()(x)

x = tf.keras.layers.concatenate([x_mid, x], axis=-1)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Dense(1, activation="linear")(x)

model_source = tf.keras.Model(inputs=inputs, outputs=x)
model_source.summary()

Instructions for updating:
Colocations handled automatically by placer.
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 650)          0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, 650, 8)       168         input_1[0][0]                    
__________________________________________________________________________________________________
conv1d (Conv1D)                 (None, 322, 256)     14592       embedding[0][0]                  
__________________________________________________________________________________________________
batch_normalization_v1 (BatchNo (None, 322, 256)     1024        conv1d[0][0]                     
_____________________________________

In [0]:
#truncated model
inputs = tf.keras.layers.Input(shape=(SEQUENCE_LEN,))

x = tf.keras.layers.Embedding(CLASSES, 8, input_length=SEQUENCE_LEN)(inputs) 

x = tf.keras.layers.Conv1D(256, 7, 2)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x_mid = tf.keras.layers.Activation("relu")(x)

x = tf.keras.layers.MaxPooling1D(3)(x_mid)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)

x_mid = tf.keras.layers.GlobalMaxPooling1D()(x_mid)
x = tf.keras.layers.GlobalMaxPooling1D()(x)

x = tf.keras.layers.concatenate([x_mid, x], axis=-1)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)

model = tf.keras.Model(inputs=inputs, outputs=x)
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 650)          0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 650, 8)       168         input_2[0][0]                    
__________________________________________________________________________________________________
conv1d_6 (Conv1D)               (None, 322, 256)     14592       embedding_1[0][0]                
__________________________________________________________________________________________________
batch_normalization_v1_8 (Batch (None, 322, 256)     1024        conv1d_6[0][0]                   
__________________________________________________________________________________________________
activation

**Calculate representations:**

In [0]:
X_wt_rep = calc_pred_truncated_model(model, model_source, weight_loc, X_wt)
X_wt_rep[0][:10]

Calculating predictions:


array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)

In [0]:
X_mut_rep = calc_pred_truncated_model(model, model_source, weight_loc, X_mut)
X_mut_rep[0][:10]

Calculating predictions:


array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)

In [0]:
X_diff_rep = np.subtract(X_mut_rep, X_wt_rep)
X_diff_rep[0][-10:]

array([ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.04606143,  0.        ,  0.        , -0.02790682],
      dtype=float32)

**Create dataframes:**

In [0]:
X_wt_rep = pd.DataFrame(X_wt_rep)
X_wt_rep.columns = list(map(lambda x: "wt_rep_"+str(x),X_wt_rep.columns))
X_wt_rep.head()

Unnamed: 0,wt_rep_0,wt_rep_1,wt_rep_2,wt_rep_3,wt_rep_4,wt_rep_5,wt_rep_6,wt_rep_7,wt_rep_8,wt_rep_9,...,wt_rep_502,wt_rep_503,wt_rep_504,wt_rep_505,wt_rep_506,wt_rep_507,wt_rep_508,wt_rep_509,wt_rep_510,wt_rep_511
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.197871,0.0,0.0,0.079762
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.197871,0.0,0.0,0.079762
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.197871,0.0,0.0,0.079762
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.197871,0.0,0.0,0.079762
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.197871,0.0,0.0,0.079762


In [0]:
X_mut_rep = pd.DataFrame(X_mut_rep)
X_mut_rep.columns = list(map(lambda x: "mut_rep_"+str(x),X_mut_rep.columns))
X_mut_rep.head()

Unnamed: 0,mut_rep_0,mut_rep_1,mut_rep_2,mut_rep_3,mut_rep_4,mut_rep_5,mut_rep_6,mut_rep_7,mut_rep_8,mut_rep_9,...,mut_rep_502,mut_rep_503,mut_rep_504,mut_rep_505,mut_rep_506,mut_rep_507,mut_rep_508,mut_rep_509,mut_rep_510,mut_rep_511
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.243932,0.0,0.0,0.051855
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.219868,0.0,0.0,0.061005
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.282438,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.237392,0.0,0.0,0.04372
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.197481,0.0,0.0,0.064765


In [0]:
X_diff_rep = pd.DataFrame(X_diff_rep)
X_diff_rep.columns = list(map(lambda x: "diff_rep_"+str(x),X_diff_rep.columns))
X_diff_rep.head()

Unnamed: 0,diff_rep_0,diff_rep_1,diff_rep_2,diff_rep_3,diff_rep_4,diff_rep_5,diff_rep_6,diff_rep_7,diff_rep_8,diff_rep_9,...,diff_rep_502,diff_rep_503,diff_rep_504,diff_rep_505,diff_rep_506,diff_rep_507,diff_rep_508,diff_rep_509,diff_rep_510,diff_rep_511
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.046061,0.0,0.0,-0.027907
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.021997,0.0,0.0,-0.018757
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.084567,0.0,0.0,-0.079762
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.039522,0.0,0.0,-0.036042
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,-0.00039,0.0,0.0,-0.014997


In [0]:
X_all = pd.concat([X_wt_rep, X_mut_rep, X_diff_rep], axis=1)
X_all.head()

Unnamed: 0,wt_rep_0,wt_rep_1,wt_rep_2,wt_rep_3,wt_rep_4,wt_rep_5,wt_rep_6,wt_rep_7,wt_rep_8,wt_rep_9,...,diff_rep_502,diff_rep_503,diff_rep_504,diff_rep_505,diff_rep_506,diff_rep_507,diff_rep_508,diff_rep_509,diff_rep_510,diff_rep_511
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.046061,0.0,0.0,-0.027907
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.021997,0.0,0.0,-0.018757
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.084567,0.0,0.0,-0.079762
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.039522,0.0,0.0,-0.036042
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,-0.00039,0.0,0.0,-0.014997


In [0]:
X_all = pd.concat([Mutation_Table[["Mutation"]], X_all], axis=1)
X_all.head()

Unnamed: 0,Mutation,wt_rep_0,wt_rep_1,wt_rep_2,wt_rep_3,wt_rep_4,wt_rep_5,wt_rep_6,wt_rep_7,wt_rep_8,...,diff_rep_502,diff_rep_503,diff_rep_504,diff_rep_505,diff_rep_506,diff_rep_507,diff_rep_508,diff_rep_509,diff_rep_510,diff_rep_511
0,GKR3BB@A@A158W,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.046061,0.0,0.0,-0.027907
1,GKR3BB@A@D107K,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.021997,0.0,0.0,-0.018757
2,GKR3BB@A@D107L,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.084567,0.0,0.0,-0.079762
3,GKR3BB@A@D107R,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.039522,0.0,0.0,-0.036042
4,GKR3BB@A@D134S,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,-0.00039,0.0,0.0,-0.014997


In [0]:
X_diff_rep = pd.concat([Mutation_Table[["Mutation"]], X_diff_rep], axis=1)
X_diff_rep.head()

Unnamed: 0,Mutation,diff_rep_0,diff_rep_1,diff_rep_2,diff_rep_3,diff_rep_4,diff_rep_5,diff_rep_6,diff_rep_7,diff_rep_8,...,diff_rep_502,diff_rep_503,diff_rep_504,diff_rep_505,diff_rep_506,diff_rep_507,diff_rep_508,diff_rep_509,diff_rep_510,diff_rep_511
0,GKR3BB@A@A158W,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.046061,0.0,0.0,-0.027907
1,GKR3BB@A@D107K,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.021997,0.0,0.0,-0.018757
2,GKR3BB@A@D107L,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.084567,0.0,0.0,-0.079762
3,GKR3BB@A@D107R,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.039522,0.0,0.0,-0.036042
4,GKR3BB@A@D134S,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,-0.00039,0.0,0.0,-0.014997


**Save dataframes:**

In [0]:
X_all.to_csv(data_target_location+"sequence_cnn_rep_all_default_01.csv", index=False)

In [0]:
X_diff_rep.to_csv(data_target_location+"sequence_cnn_rep_diff_default_01.csv", index=False)

### Default Model Second to Last Layer (2)

**Define Models:**

In [0]:
#original model
inputs = tf.keras.layers.Input(shape=(SEQUENCE_LEN,))

x = tf.keras.layers.Embedding(CLASSES, 8, input_length=SEQUENCE_LEN)(inputs) 

x = tf.keras.layers.Conv1D(256, 7, 2)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x_mid = tf.keras.layers.Activation("relu")(x)

x = tf.keras.layers.MaxPooling1D(3)(x_mid)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)

x_mid = tf.keras.layers.GlobalMaxPooling1D()(x_mid)
x = tf.keras.layers.GlobalMaxPooling1D()(x)

x = tf.keras.layers.concatenate([x_mid, x], axis=-1)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Dense(1, activation="linear")(x)

model_source = tf.keras.Model(inputs=inputs, outputs=x)
model_source.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            (None, 650)          0                                            
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, 650, 8)       168         input_3[0][0]                    
__________________________________________________________________________________________________
conv1d_12 (Conv1D)              (None, 322, 256)     14592       embedding_2[0][0]                
__________________________________________________________________________________________________
batch_normalization_v1_16 (Batc (None, 322, 256)     1024        conv1d_12[0][0]                  
__________________________________________________________________________________________________
activation

In [0]:
#truncated model
inputs = tf.keras.layers.Input(shape=(SEQUENCE_LEN,))

x = tf.keras.layers.Embedding(CLASSES, 8, input_length=SEQUENCE_LEN)(inputs) 

x = tf.keras.layers.Conv1D(256, 7, 2)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x_mid = tf.keras.layers.Activation("relu")(x)

x = tf.keras.layers.MaxPooling1D(3)(x_mid)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)

x_mid = tf.keras.layers.GlobalMaxPooling1D()(x_mid)
x = tf.keras.layers.GlobalMaxPooling1D()(x)

x = tf.keras.layers.concatenate([x_mid, x], axis=-1)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)

model = tf.keras.Model(inputs=inputs, outputs=x)
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            (None, 650)          0                                            
__________________________________________________________________________________________________
embedding_3 (Embedding)         (None, 650, 8)       168         input_4[0][0]                    
__________________________________________________________________________________________________
conv1d_18 (Conv1D)              (None, 322, 256)     14592       embedding_3[0][0]                
__________________________________________________________________________________________________
batch_normalization_v1_24 (Batc (None, 322, 256)     1024        conv1d_18[0][0]                  
__________________________________________________________________________________________________
activation

**Calculate representations:**

In [0]:
X_wt_rep = calc_pred_truncated_model(model, model_source, weight_loc, X_wt)
X_wt_rep[0][:10]

Calculating predictions:


array([0.        , 0.        , 0.        , 0.        , 0.30972362,
       0.6839075 , 0.        , 0.12057745, 0.        , 0.        ],
      dtype=float32)

In [0]:
X_mut_rep = calc_pred_truncated_model(model, model_source, weight_loc, X_mut)
X_mut_rep[0][:10]

Calculating predictions:


array([0.        , 0.        , 0.        , 0.        , 0.22753215,
       0.79492664, 0.        , 0.10647225, 0.        , 0.        ],
      dtype=float32)

In [0]:
X_diff_rep = np.subtract(X_mut_rep, X_wt_rep)
X_diff_rep[0][:10]

array([ 0.        ,  0.        ,  0.        ,  0.        , -0.08219147,
        0.11101913,  0.        , -0.0141052 ,  0.        ,  0.        ],
      dtype=float32)

**Create dataframes:**

In [0]:
X_wt_rep = pd.DataFrame(X_wt_rep)
X_wt_rep.columns = list(map(lambda x: "wt_rep_"+str(x),X_wt_rep.columns))
X_wt_rep.head()

Unnamed: 0,wt_rep_0,wt_rep_1,wt_rep_2,wt_rep_3,wt_rep_4,wt_rep_5,wt_rep_6,wt_rep_7,wt_rep_8,wt_rep_9,...,wt_rep_502,wt_rep_503,wt_rep_504,wt_rep_505,wt_rep_506,wt_rep_507,wt_rep_508,wt_rep_509,wt_rep_510,wt_rep_511
0,0.0,0.0,0.0,0.0,0.309724,0.683908,0.0,0.120577,0.0,0.0,...,0.0,0.0,0.0,0.483965,0.0,0.419097,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.309724,0.683908,0.0,0.120577,0.0,0.0,...,0.0,0.0,0.0,0.483965,0.0,0.419097,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.309724,0.683908,0.0,0.120577,0.0,0.0,...,0.0,0.0,0.0,0.483965,0.0,0.419097,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.309724,0.683908,0.0,0.120577,0.0,0.0,...,0.0,0.0,0.0,0.483965,0.0,0.419097,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.309724,0.683908,0.0,0.120577,0.0,0.0,...,0.0,0.0,0.0,0.483965,0.0,0.419097,0.0,0.0,0.0,0.0


In [0]:
X_mut_rep = pd.DataFrame(X_mut_rep)
X_mut_rep.columns = list(map(lambda x: "mut_rep_"+str(x),X_mut_rep.columns))
X_mut_rep.head()

Unnamed: 0,mut_rep_0,mut_rep_1,mut_rep_2,mut_rep_3,mut_rep_4,mut_rep_5,mut_rep_6,mut_rep_7,mut_rep_8,mut_rep_9,...,mut_rep_502,mut_rep_503,mut_rep_504,mut_rep_505,mut_rep_506,mut_rep_507,mut_rep_508,mut_rep_509,mut_rep_510,mut_rep_511
0,0.0,0.0,0.0,0.0,0.227532,0.794927,0.0,0.106472,0.0,0.0,...,0.0,0.0,0.0,0.396427,0.0,0.265413,0.0,0.0,0.0,0.087232
1,0.0,0.0,0.0,0.0,0.13692,0.780519,0.0,0.029353,0.0,0.0,...,0.0,0.0,0.0,0.554947,0.0,0.391552,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.317582,0.88762,0.0,0.011203,0.0,0.0,...,0.0,0.0,0.0,0.347676,0.0,0.262929,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.085185,0.691183,0.0,0.041582,0.0,0.0,...,0.0,0.0,0.0,0.501599,0.088222,0.309841,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.288409,0.648867,0.0,0.081969,0.0,0.0,...,0.0,0.0,0.0,0.560424,0.0,0.416985,0.0,0.0,0.0,0.0


In [0]:
X_diff_rep = pd.DataFrame(X_diff_rep)
X_diff_rep.columns = list(map(lambda x: "diff_rep_"+str(x),X_diff_rep.columns))
X_diff_rep.head()

Unnamed: 0,diff_rep_0,diff_rep_1,diff_rep_2,diff_rep_3,diff_rep_4,diff_rep_5,diff_rep_6,diff_rep_7,diff_rep_8,diff_rep_9,...,diff_rep_502,diff_rep_503,diff_rep_504,diff_rep_505,diff_rep_506,diff_rep_507,diff_rep_508,diff_rep_509,diff_rep_510,diff_rep_511
0,0.0,0.0,0.0,0.0,-0.082191,0.111019,0.0,-0.014105,0.0,0.0,...,0.0,0.0,0.0,-0.087538,0.0,-0.153684,0.0,0.0,0.0,0.087232
1,0.0,0.0,0.0,0.0,-0.172803,0.096611,0.0,-0.091224,0.0,0.0,...,0.0,0.0,0.0,0.070982,0.0,-0.027545,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.007858,0.203713,0.0,-0.109374,0.0,0.0,...,0.0,0.0,0.0,-0.136289,0.0,-0.156168,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,-0.224539,0.007275,0.0,-0.078995,0.0,0.0,...,0.0,0.0,0.0,0.017634,0.088222,-0.109256,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,-0.021314,-0.035041,0.0,-0.038608,0.0,0.0,...,0.0,0.0,0.0,0.076459,0.0,-0.002113,0.0,0.0,0.0,0.0


In [0]:
X_all = pd.concat([X_wt_rep, X_mut_rep, X_diff_rep], axis=1)
X_all.head()

Unnamed: 0,wt_rep_0,wt_rep_1,wt_rep_2,wt_rep_3,wt_rep_4,wt_rep_5,wt_rep_6,wt_rep_7,wt_rep_8,wt_rep_9,...,diff_rep_502,diff_rep_503,diff_rep_504,diff_rep_505,diff_rep_506,diff_rep_507,diff_rep_508,diff_rep_509,diff_rep_510,diff_rep_511
0,0.0,0.0,0.0,0.0,0.309724,0.683908,0.0,0.120577,0.0,0.0,...,0.0,0.0,0.0,-0.087538,0.0,-0.153684,0.0,0.0,0.0,0.087232
1,0.0,0.0,0.0,0.0,0.309724,0.683908,0.0,0.120577,0.0,0.0,...,0.0,0.0,0.0,0.070982,0.0,-0.027545,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.309724,0.683908,0.0,0.120577,0.0,0.0,...,0.0,0.0,0.0,-0.136289,0.0,-0.156168,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.309724,0.683908,0.0,0.120577,0.0,0.0,...,0.0,0.0,0.0,0.017634,0.088222,-0.109256,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.309724,0.683908,0.0,0.120577,0.0,0.0,...,0.0,0.0,0.0,0.076459,0.0,-0.002113,0.0,0.0,0.0,0.0


In [0]:
X_all = pd.concat([Mutation_Table[["Mutation"]], X_all], axis=1)
X_all.head()

Unnamed: 0,Mutation,wt_rep_0,wt_rep_1,wt_rep_2,wt_rep_3,wt_rep_4,wt_rep_5,wt_rep_6,wt_rep_7,wt_rep_8,...,diff_rep_502,diff_rep_503,diff_rep_504,diff_rep_505,diff_rep_506,diff_rep_507,diff_rep_508,diff_rep_509,diff_rep_510,diff_rep_511
0,GKR3BB@A@A158W,0.0,0.0,0.0,0.0,0.309724,0.683908,0.0,0.120577,0.0,...,0.0,0.0,0.0,-0.087538,0.0,-0.153684,0.0,0.0,0.0,0.087232
1,GKR3BB@A@D107K,0.0,0.0,0.0,0.0,0.309724,0.683908,0.0,0.120577,0.0,...,0.0,0.0,0.0,0.070982,0.0,-0.027545,0.0,0.0,0.0,0.0
2,GKR3BB@A@D107L,0.0,0.0,0.0,0.0,0.309724,0.683908,0.0,0.120577,0.0,...,0.0,0.0,0.0,-0.136289,0.0,-0.156168,0.0,0.0,0.0,0.0
3,GKR3BB@A@D107R,0.0,0.0,0.0,0.0,0.309724,0.683908,0.0,0.120577,0.0,...,0.0,0.0,0.0,0.017634,0.088222,-0.109256,0.0,0.0,0.0,0.0
4,GKR3BB@A@D134S,0.0,0.0,0.0,0.0,0.309724,0.683908,0.0,0.120577,0.0,...,0.0,0.0,0.0,0.076459,0.0,-0.002113,0.0,0.0,0.0,0.0


In [0]:
X_diff_rep = pd.concat([Mutation_Table[["Mutation"]], X_diff_rep], axis=1)
X_diff_rep.head()

Unnamed: 0,Mutation,diff_rep_0,diff_rep_1,diff_rep_2,diff_rep_3,diff_rep_4,diff_rep_5,diff_rep_6,diff_rep_7,diff_rep_8,...,diff_rep_502,diff_rep_503,diff_rep_504,diff_rep_505,diff_rep_506,diff_rep_507,diff_rep_508,diff_rep_509,diff_rep_510,diff_rep_511
0,GKR3BB@A@A158W,0.0,0.0,0.0,0.0,-0.082191,0.111019,0.0,-0.014105,0.0,...,0.0,0.0,0.0,-0.087538,0.0,-0.153684,0.0,0.0,0.0,0.087232
1,GKR3BB@A@D107K,0.0,0.0,0.0,0.0,-0.172803,0.096611,0.0,-0.091224,0.0,...,0.0,0.0,0.0,0.070982,0.0,-0.027545,0.0,0.0,0.0,0.0
2,GKR3BB@A@D107L,0.0,0.0,0.0,0.0,0.007858,0.203713,0.0,-0.109374,0.0,...,0.0,0.0,0.0,-0.136289,0.0,-0.156168,0.0,0.0,0.0,0.0
3,GKR3BB@A@D107R,0.0,0.0,0.0,0.0,-0.224539,0.007275,0.0,-0.078995,0.0,...,0.0,0.0,0.0,0.017634,0.088222,-0.109256,0.0,0.0,0.0,0.0
4,GKR3BB@A@D134S,0.0,0.0,0.0,0.0,-0.021314,-0.035041,0.0,-0.038608,0.0,...,0.0,0.0,0.0,0.076459,0.0,-0.002113,0.0,0.0,0.0,0.0


**Save dataframes:**

In [0]:
X_all.to_csv(data_target_location+"sequence_cnn_rep_all_default_02.csv", index=False)

In [0]:
X_diff_rep.to_csv(data_target_location+"sequence_cnn_rep_diff_default_02.csv", index=False)

### Default Model Third to Last Layer (3)

**Define Models:**

In [0]:
#original model
inputs = tf.keras.layers.Input(shape=(SEQUENCE_LEN,))

x = tf.keras.layers.Embedding(CLASSES, 8, input_length=SEQUENCE_LEN)(inputs) 

x = tf.keras.layers.Conv1D(256, 7, 2)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x_mid = tf.keras.layers.Activation("relu")(x)

x = tf.keras.layers.MaxPooling1D(3)(x_mid)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)

x_mid = tf.keras.layers.GlobalMaxPooling1D()(x_mid)
x = tf.keras.layers.GlobalMaxPooling1D()(x)

x = tf.keras.layers.concatenate([x_mid, x], axis=-1)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Dense(1, activation="linear")(x)

model_source = tf.keras.Model(inputs=inputs, outputs=x)
model_source.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_5 (InputLayer)            (None, 650)          0                                            
__________________________________________________________________________________________________
embedding_4 (Embedding)         (None, 650, 8)       168         input_5[0][0]                    
__________________________________________________________________________________________________
conv1d_24 (Conv1D)              (None, 322, 256)     14592       embedding_4[0][0]                
__________________________________________________________________________________________________
batch_normalization_v1_31 (Batc (None, 322, 256)     1024        conv1d_24[0][0]                  
__________________________________________________________________________________________________
activation

In [0]:
#truncated model
inputs = tf.keras.layers.Input(shape=(SEQUENCE_LEN,))

x = tf.keras.layers.Embedding(CLASSES, 8, input_length=SEQUENCE_LEN)(inputs) 

x = tf.keras.layers.Conv1D(256, 7, 2)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x_mid = tf.keras.layers.Activation("relu")(x)

x = tf.keras.layers.MaxPooling1D(3)(x_mid)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)

x_mid = tf.keras.layers.GlobalMaxPooling1D()(x_mid)
x = tf.keras.layers.GlobalMaxPooling1D()(x)

x = tf.keras.layers.concatenate([x_mid, x], axis=-1)

model = tf.keras.Model(inputs=inputs, outputs=x)
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_6 (InputLayer)            (None, 650)          0                                            
__________________________________________________________________________________________________
embedding_5 (Embedding)         (None, 650, 8)       168         input_6[0][0]                    
__________________________________________________________________________________________________
conv1d_30 (Conv1D)              (None, 322, 256)     14592       embedding_5[0][0]                
__________________________________________________________________________________________________
batch_normalization_v1_39 (Batc (None, 322, 256)     1024        conv1d_30[0][0]                  
__________________________________________________________________________________________________
activation

**Calculate representations:**

In [0]:
X_wt_rep = calc_pred_truncated_model(model, model_source, weight_loc, X_wt)
X_wt_rep[0][:10]

Calculating predictions:


array([3.1033378, 2.5268912, 3.3325129, 3.814498 , 1.9288152, 2.9383054,
       2.3518345, 3.067183 , 2.4579315, 2.3095336], dtype=float32)

In [0]:
X_mut_rep = calc_pred_truncated_model(model, model_source, weight_loc, X_mut)
X_mut_rep[0][:10]

Calculating predictions:


array([3.1033378, 2.5268912, 3.3325129, 3.814498 , 1.9288152, 2.9383054,
       2.3518345, 3.067183 , 2.4579315, 2.3095336], dtype=float32)

In [0]:
X_diff_rep = np.subtract(X_mut_rep, X_wt_rep)
X_diff_rep[0][-10:]

array([ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.06087965, -0.00750494,  0.08981848, -0.31360698],
      dtype=float32)

**Create dataframes:**

In [0]:
X_wt_rep = pd.DataFrame(X_wt_rep)
X_wt_rep.columns = list(map(lambda x: "wt_rep_"+str(x),X_wt_rep.columns))
X_wt_rep.head()

Unnamed: 0,wt_rep_0,wt_rep_1,wt_rep_2,wt_rep_3,wt_rep_4,wt_rep_5,wt_rep_6,wt_rep_7,wt_rep_8,wt_rep_9,...,wt_rep_758,wt_rep_759,wt_rep_760,wt_rep_761,wt_rep_762,wt_rep_763,wt_rep_764,wt_rep_765,wt_rep_766,wt_rep_767
0,3.103338,2.526891,3.332513,3.814498,1.928815,2.938305,2.351835,3.067183,2.457932,2.309534,...,1.121733,2.024356,0.0,0.514163,2.497667,0.523262,0.657111,3.08848,3.864666,2.207023
1,3.103338,2.526891,3.332513,3.814498,1.928815,2.938305,2.351835,3.067183,2.457932,2.309534,...,1.121733,2.024356,0.0,0.514163,2.497667,0.523262,0.657111,3.08848,3.864666,2.207023
2,3.103338,2.526891,3.332513,3.814498,1.928815,2.938305,2.351835,3.067183,2.457932,2.309534,...,1.121733,2.024356,0.0,0.514163,2.497667,0.523262,0.657111,3.08848,3.864666,2.207023
3,3.103338,2.526891,3.332513,3.814498,1.928815,2.938305,2.351835,3.067183,2.457932,2.309534,...,1.121733,2.024356,0.0,0.514163,2.497667,0.523262,0.657111,3.08848,3.864666,2.207023
4,3.103338,2.526891,3.332513,3.814498,1.928815,2.938305,2.351835,3.067183,2.457932,2.309534,...,1.121733,2.024356,0.0,0.514163,2.497667,0.523262,0.657111,3.08848,3.864666,2.207023


In [0]:
X_mut_rep = pd.DataFrame(X_mut_rep)
X_mut_rep.columns = list(map(lambda x: "mut_rep_"+str(x),X_mut_rep.columns))
X_mut_rep.head()

Unnamed: 0,mut_rep_0,mut_rep_1,mut_rep_2,mut_rep_3,mut_rep_4,mut_rep_5,mut_rep_6,mut_rep_7,mut_rep_8,mut_rep_9,...,mut_rep_758,mut_rep_759,mut_rep_760,mut_rep_761,mut_rep_762,mut_rep_763,mut_rep_764,mut_rep_765,mut_rep_766,mut_rep_767
0,3.103338,2.526891,3.332513,3.814498,1.928815,2.938305,2.351835,3.067183,2.457932,2.309534,...,1.121733,2.024356,0.0,0.514163,2.497667,0.523262,0.717991,3.080976,3.954484,1.893416
1,3.103338,2.526891,3.332513,3.814498,1.928815,2.938305,2.351835,3.067183,2.457932,2.309534,...,1.133292,2.024356,0.0,0.473732,2.497667,0.378163,0.657111,3.08848,3.864666,2.207023
2,3.103338,2.526891,3.332513,3.814498,1.928815,2.938305,2.351835,3.067183,2.457932,2.309534,...,1.066541,2.024356,0.0,0.473732,2.497667,0.378163,0.657111,3.08848,3.864666,2.207023
3,3.103338,2.526891,3.332513,3.814498,1.928815,2.938305,2.351835,3.067183,2.457932,2.309534,...,1.160029,2.024356,0.0,0.473732,2.497667,0.378163,0.657111,3.08848,3.864666,2.207023
4,3.103338,2.526891,3.332513,3.814498,1.928815,3.014178,2.351835,3.067183,2.457932,2.309534,...,1.124743,2.024356,0.0,0.519387,2.497667,0.523262,0.675846,3.08848,3.864666,2.207023


In [0]:
X_diff_rep = pd.DataFrame(X_diff_rep)
X_diff_rep.columns = list(map(lambda x: "diff_rep_"+str(x),X_diff_rep.columns))
X_diff_rep.head()

Unnamed: 0,diff_rep_0,diff_rep_1,diff_rep_2,diff_rep_3,diff_rep_4,diff_rep_5,diff_rep_6,diff_rep_7,diff_rep_8,diff_rep_9,...,diff_rep_758,diff_rep_759,diff_rep_760,diff_rep_761,diff_rep_762,diff_rep_763,diff_rep_764,diff_rep_765,diff_rep_766,diff_rep_767
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.06088,-0.007505,0.089818,-0.313607
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.011559,0.0,0.0,-0.040432,0.0,-0.145099,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.055192,0.0,0.0,-0.040432,0.0,-0.145099,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.038296,0.0,0.0,-0.040432,0.0,-0.145099,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.075872,0.0,0.0,0.0,0.0,...,0.00301,0.0,0.0,0.005224,0.0,0.0,0.018735,0.0,0.0,0.0


In [0]:
X_all = pd.concat([X_wt_rep, X_mut_rep, X_diff_rep], axis=1)
X_all.head()

Unnamed: 0,wt_rep_0,wt_rep_1,wt_rep_2,wt_rep_3,wt_rep_4,wt_rep_5,wt_rep_6,wt_rep_7,wt_rep_8,wt_rep_9,...,diff_rep_758,diff_rep_759,diff_rep_760,diff_rep_761,diff_rep_762,diff_rep_763,diff_rep_764,diff_rep_765,diff_rep_766,diff_rep_767
0,3.103338,2.526891,3.332513,3.814498,1.928815,2.938305,2.351835,3.067183,2.457932,2.309534,...,0.0,0.0,0.0,0.0,0.0,0.0,0.06088,-0.007505,0.089818,-0.313607
1,3.103338,2.526891,3.332513,3.814498,1.928815,2.938305,2.351835,3.067183,2.457932,2.309534,...,0.011559,0.0,0.0,-0.040432,0.0,-0.145099,0.0,0.0,0.0,0.0
2,3.103338,2.526891,3.332513,3.814498,1.928815,2.938305,2.351835,3.067183,2.457932,2.309534,...,-0.055192,0.0,0.0,-0.040432,0.0,-0.145099,0.0,0.0,0.0,0.0
3,3.103338,2.526891,3.332513,3.814498,1.928815,2.938305,2.351835,3.067183,2.457932,2.309534,...,0.038296,0.0,0.0,-0.040432,0.0,-0.145099,0.0,0.0,0.0,0.0
4,3.103338,2.526891,3.332513,3.814498,1.928815,2.938305,2.351835,3.067183,2.457932,2.309534,...,0.00301,0.0,0.0,0.005224,0.0,0.0,0.018735,0.0,0.0,0.0


In [0]:
X_all = pd.concat([Mutation_Table[["Mutation"]], X_all], axis=1)
X_all.head()

Unnamed: 0,Mutation,wt_rep_0,wt_rep_1,wt_rep_2,wt_rep_3,wt_rep_4,wt_rep_5,wt_rep_6,wt_rep_7,wt_rep_8,...,diff_rep_758,diff_rep_759,diff_rep_760,diff_rep_761,diff_rep_762,diff_rep_763,diff_rep_764,diff_rep_765,diff_rep_766,diff_rep_767
0,GKR3BB@A@A158W,3.103338,2.526891,3.332513,3.814498,1.928815,2.938305,2.351835,3.067183,2.457932,...,0.0,0.0,0.0,0.0,0.0,0.0,0.06088,-0.007505,0.089818,-0.313607
1,GKR3BB@A@D107K,3.103338,2.526891,3.332513,3.814498,1.928815,2.938305,2.351835,3.067183,2.457932,...,0.011559,0.0,0.0,-0.040432,0.0,-0.145099,0.0,0.0,0.0,0.0
2,GKR3BB@A@D107L,3.103338,2.526891,3.332513,3.814498,1.928815,2.938305,2.351835,3.067183,2.457932,...,-0.055192,0.0,0.0,-0.040432,0.0,-0.145099,0.0,0.0,0.0,0.0
3,GKR3BB@A@D107R,3.103338,2.526891,3.332513,3.814498,1.928815,2.938305,2.351835,3.067183,2.457932,...,0.038296,0.0,0.0,-0.040432,0.0,-0.145099,0.0,0.0,0.0,0.0
4,GKR3BB@A@D134S,3.103338,2.526891,3.332513,3.814498,1.928815,2.938305,2.351835,3.067183,2.457932,...,0.00301,0.0,0.0,0.005224,0.0,0.0,0.018735,0.0,0.0,0.0


In [0]:
X_diff_rep = pd.concat([Mutation_Table[["Mutation"]], X_diff_rep], axis=1)
X_diff_rep.head()

Unnamed: 0,Mutation,diff_rep_0,diff_rep_1,diff_rep_2,diff_rep_3,diff_rep_4,diff_rep_5,diff_rep_6,diff_rep_7,diff_rep_8,...,diff_rep_758,diff_rep_759,diff_rep_760,diff_rep_761,diff_rep_762,diff_rep_763,diff_rep_764,diff_rep_765,diff_rep_766,diff_rep_767
0,GKR3BB@A@A158W,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.06088,-0.007505,0.089818,-0.313607
1,GKR3BB@A@D107K,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.011559,0.0,0.0,-0.040432,0.0,-0.145099,0.0,0.0,0.0,0.0
2,GKR3BB@A@D107L,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.055192,0.0,0.0,-0.040432,0.0,-0.145099,0.0,0.0,0.0,0.0
3,GKR3BB@A@D107R,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.038296,0.0,0.0,-0.040432,0.0,-0.145099,0.0,0.0,0.0,0.0
4,GKR3BB@A@D134S,0.0,0.0,0.0,0.0,0.0,0.075872,0.0,0.0,0.0,...,0.00301,0.0,0.0,0.005224,0.0,0.0,0.018735,0.0,0.0,0.0


**Save dataframes:**

In [0]:
X_all.to_csv(data_target_location+"sequence_cnn_rep_all_default_03.csv", index=False)

In [0]:
X_diff_rep.to_csv(data_target_location+"sequence_cnn_rep_diff_default_03.csv", index=False)

### 64 Model Last Layer (1)

**Define Models:**

In [0]:
#original model
inputs = tf.keras.layers.Input(shape=(SEQUENCE_LEN,))

x = tf.keras.layers.Embedding(CLASSES, 8, input_length=SEQUENCE_LEN)(inputs) 

x = tf.keras.layers.Conv1D(256, 7, 2)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x_mid = tf.keras.layers.Activation("relu")(x)

x = tf.keras.layers.MaxPooling1D(3)(x_mid)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)

x_mid = tf.keras.layers.GlobalMaxPooling1D()(x_mid)
x = tf.keras.layers.GlobalMaxPooling1D()(x)

x = tf.keras.layers.concatenate([x_mid, x], axis=-1)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Dense(64)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Dense(1, activation="linear")(x)

model_source = tf.keras.Model(inputs=inputs, outputs=x)
model_source.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_7 (InputLayer)            (None, 650)          0                                            
__________________________________________________________________________________________________
embedding_6 (Embedding)         (None, 650, 8)       168         input_7[0][0]                    
__________________________________________________________________________________________________
conv1d_36 (Conv1D)              (None, 322, 256)     14592       embedding_6[0][0]                
__________________________________________________________________________________________________
batch_normalization_v1_45 (Batc (None, 322, 256)     1024        conv1d_36[0][0]                  
__________________________________________________________________________________________________
activation

In [0]:
#truncated model
inputs = tf.keras.layers.Input(shape=(SEQUENCE_LEN,))

x = tf.keras.layers.Embedding(CLASSES, 8, input_length=SEQUENCE_LEN)(inputs) 

x = tf.keras.layers.Conv1D(256, 7, 2)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x_mid = tf.keras.layers.Activation("relu")(x)

x = tf.keras.layers.MaxPooling1D(3)(x_mid)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)

x_mid = tf.keras.layers.GlobalMaxPooling1D()(x_mid)
x = tf.keras.layers.GlobalMaxPooling1D()(x)

x = tf.keras.layers.concatenate([x_mid, x], axis=-1)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Dense(64)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)

model = tf.keras.Model(inputs=inputs, outputs=x)
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_8 (InputLayer)            (None, 650)          0                                            
__________________________________________________________________________________________________
embedding_7 (Embedding)         (None, 650, 8)       168         input_8[0][0]                    
__________________________________________________________________________________________________
conv1d_42 (Conv1D)              (None, 322, 256)     14592       embedding_7[0][0]                
__________________________________________________________________________________________________
batch_normalization_v1_54 (Batc (None, 322, 256)     1024        conv1d_42[0][0]                  
__________________________________________________________________________________________________
activation

**Calculate representations:**

In [0]:
X_wt_rep = calc_pred_truncated_model(model, model_source, weight_loc_64, X_wt)
X_wt_rep[0][:10]

Calculating predictions:


array([0.26906392, 0.        , 0.10230337, 0.1276019 , 0.        ,
       0.27099025, 0.        , 0.00934488, 0.        , 0.        ],
      dtype=float32)

In [0]:
X_mut_rep = calc_pred_truncated_model(model, model_source, weight_loc_64, X_mut)
X_mut_rep[0][:10]

Calculating predictions:


array([0.19138724, 0.        , 0.10832093, 0.08528879, 0.        ,
       0.35608885, 0.        , 0.07154334, 0.        , 0.        ],
      dtype=float32)

In [0]:
X_diff_rep = np.subtract(X_mut_rep, X_wt_rep)
X_diff_rep[0][-10:]

array([ 0.        , -0.02008142, -0.02466768, -0.00646071,  0.        ,
        0.10297406,  0.1400678 ,  0.        , -0.00916907,  0.07424417],
      dtype=float32)

**Create dataframes:**

In [0]:
X_wt_rep = pd.DataFrame(X_wt_rep)
X_wt_rep.columns = list(map(lambda x: "wt_rep_"+str(x),X_wt_rep.columns))
X_wt_rep.head()

Unnamed: 0,wt_rep_0,wt_rep_1,wt_rep_2,wt_rep_3,wt_rep_4,wt_rep_5,wt_rep_6,wt_rep_7,wt_rep_8,wt_rep_9,...,wt_rep_54,wt_rep_55,wt_rep_56,wt_rep_57,wt_rep_58,wt_rep_59,wt_rep_60,wt_rep_61,wt_rep_62,wt_rep_63
0,0.269064,0.0,0.102303,0.127602,0.0,0.27099,0.0,0.009345,0.0,0.0,...,0.0,0.132307,0.024668,0.133013,0.0,0.565307,0.209598,0.0,0.206115,0.265239
1,0.269064,0.0,0.102303,0.127602,0.0,0.27099,0.0,0.009345,0.0,0.0,...,0.0,0.132307,0.024668,0.133013,0.0,0.565307,0.209598,0.0,0.206115,0.265239
2,0.269064,0.0,0.102303,0.127602,0.0,0.27099,0.0,0.009345,0.0,0.0,...,0.0,0.132307,0.024668,0.133013,0.0,0.565307,0.209598,0.0,0.206115,0.265239
3,0.269064,0.0,0.102303,0.127602,0.0,0.27099,0.0,0.009345,0.0,0.0,...,0.0,0.132307,0.024668,0.133013,0.0,0.565307,0.209598,0.0,0.206115,0.265239
4,0.269064,0.0,0.102303,0.127602,0.0,0.27099,0.0,0.009345,0.0,0.0,...,0.0,0.132307,0.024668,0.133013,0.0,0.565307,0.209598,0.0,0.206115,0.265239


In [0]:
X_mut_rep = pd.DataFrame(X_mut_rep)
X_mut_rep.columns = list(map(lambda x: "mut_rep_"+str(x),X_mut_rep.columns))
X_mut_rep.head()

Unnamed: 0,mut_rep_0,mut_rep_1,mut_rep_2,mut_rep_3,mut_rep_4,mut_rep_5,mut_rep_6,mut_rep_7,mut_rep_8,mut_rep_9,...,mut_rep_54,mut_rep_55,mut_rep_56,mut_rep_57,mut_rep_58,mut_rep_59,mut_rep_60,mut_rep_61,mut_rep_62,mut_rep_63
0,0.191387,0.0,0.108321,0.085289,0.0,0.356089,0.0,0.071543,0.0,0.0,...,0.0,0.112225,0.0,0.126552,0.0,0.668281,0.349666,0.0,0.196946,0.339483
1,0.259323,0.0,0.098922,0.052844,0.006781,0.370381,0.0,0.082768,0.0,0.0,...,0.0,0.167361,0.0,0.117424,0.0,0.676727,0.351142,0.0,0.168288,0.338289
2,0.265779,0.0,0.104646,0.073109,0.0,0.31909,0.0,0.0274,0.0,0.0,...,0.0,0.174379,0.0,0.126469,0.0,0.621302,0.235194,0.0,0.19556,0.277855
3,0.252638,0.0,0.099183,0.062159,0.0074,0.360916,0.0,0.080707,0.0,0.0,...,0.0,0.153043,0.0,0.119102,0.0,0.663157,0.356571,0.0,0.193891,0.340835
4,0.270261,0.0,0.103543,0.123668,0.0,0.300326,0.0,0.030325,0.0,0.0,...,0.002959,0.12914,0.0,0.129875,0.0,0.605906,0.224742,0.0,0.193487,0.267496


In [0]:
X_diff_rep = pd.DataFrame(X_diff_rep)
X_diff_rep.columns = list(map(lambda x: "diff_rep_"+str(x),X_diff_rep.columns))
X_diff_rep.head()

Unnamed: 0,diff_rep_0,diff_rep_1,diff_rep_2,diff_rep_3,diff_rep_4,diff_rep_5,diff_rep_6,diff_rep_7,diff_rep_8,diff_rep_9,...,diff_rep_54,diff_rep_55,diff_rep_56,diff_rep_57,diff_rep_58,diff_rep_59,diff_rep_60,diff_rep_61,diff_rep_62,diff_rep_63
0,-0.077677,0.0,0.006018,-0.042313,0.0,0.085099,0.0,0.062198,0.0,0.0,...,0.0,-0.020081,-0.024668,-0.006461,0.0,0.102974,0.140068,0.0,-0.009169,0.074244
1,-0.009741,0.0,-0.003382,-0.074758,0.006781,0.099391,0.0,0.073423,0.0,0.0,...,0.0,0.035055,-0.024668,-0.015589,0.0,0.11142,0.141543,0.0,-0.037826,0.07305
2,-0.003285,0.0,0.002342,-0.054493,0.0,0.048099,0.0,0.018055,0.0,0.0,...,0.0,0.042072,-0.024668,-0.006544,0.0,0.055995,0.025596,0.0,-0.010555,0.012616
3,-0.016426,0.0,-0.00312,-0.065443,0.0074,0.089925,0.0,0.071362,0.0,0.0,...,0.0,0.020736,-0.024668,-0.01391,0.0,0.09785,0.146973,0.0,-0.012223,0.075596
4,0.001197,0.0,0.001239,-0.003934,0.0,0.029336,0.0,0.02098,0.0,0.0,...,0.002959,-0.003167,-0.024668,-0.003138,0.0,0.040599,0.015143,0.0,-0.012627,0.002257


In [0]:
X_all = pd.concat([X_wt_rep, X_mut_rep, X_diff_rep], axis=1)
X_all.head()

Unnamed: 0,wt_rep_0,wt_rep_1,wt_rep_2,wt_rep_3,wt_rep_4,wt_rep_5,wt_rep_6,wt_rep_7,wt_rep_8,wt_rep_9,...,diff_rep_54,diff_rep_55,diff_rep_56,diff_rep_57,diff_rep_58,diff_rep_59,diff_rep_60,diff_rep_61,diff_rep_62,diff_rep_63
0,0.269064,0.0,0.102303,0.127602,0.0,0.27099,0.0,0.009345,0.0,0.0,...,0.0,-0.020081,-0.024668,-0.006461,0.0,0.102974,0.140068,0.0,-0.009169,0.074244
1,0.269064,0.0,0.102303,0.127602,0.0,0.27099,0.0,0.009345,0.0,0.0,...,0.0,0.035055,-0.024668,-0.015589,0.0,0.11142,0.141543,0.0,-0.037826,0.07305
2,0.269064,0.0,0.102303,0.127602,0.0,0.27099,0.0,0.009345,0.0,0.0,...,0.0,0.042072,-0.024668,-0.006544,0.0,0.055995,0.025596,0.0,-0.010555,0.012616
3,0.269064,0.0,0.102303,0.127602,0.0,0.27099,0.0,0.009345,0.0,0.0,...,0.0,0.020736,-0.024668,-0.01391,0.0,0.09785,0.146973,0.0,-0.012223,0.075596
4,0.269064,0.0,0.102303,0.127602,0.0,0.27099,0.0,0.009345,0.0,0.0,...,0.002959,-0.003167,-0.024668,-0.003138,0.0,0.040599,0.015143,0.0,-0.012627,0.002257


In [0]:
X_all = pd.concat([Mutation_Table[["Mutation"]], X_all], axis=1)
X_all.head()

Unnamed: 0,Mutation,wt_rep_0,wt_rep_1,wt_rep_2,wt_rep_3,wt_rep_4,wt_rep_5,wt_rep_6,wt_rep_7,wt_rep_8,...,diff_rep_54,diff_rep_55,diff_rep_56,diff_rep_57,diff_rep_58,diff_rep_59,diff_rep_60,diff_rep_61,diff_rep_62,diff_rep_63
0,GKR3BB@A@A158W,0.269064,0.0,0.102303,0.127602,0.0,0.27099,0.0,0.009345,0.0,...,0.0,-0.020081,-0.024668,-0.006461,0.0,0.102974,0.140068,0.0,-0.009169,0.074244
1,GKR3BB@A@D107K,0.269064,0.0,0.102303,0.127602,0.0,0.27099,0.0,0.009345,0.0,...,0.0,0.035055,-0.024668,-0.015589,0.0,0.11142,0.141543,0.0,-0.037826,0.07305
2,GKR3BB@A@D107L,0.269064,0.0,0.102303,0.127602,0.0,0.27099,0.0,0.009345,0.0,...,0.0,0.042072,-0.024668,-0.006544,0.0,0.055995,0.025596,0.0,-0.010555,0.012616
3,GKR3BB@A@D107R,0.269064,0.0,0.102303,0.127602,0.0,0.27099,0.0,0.009345,0.0,...,0.0,0.020736,-0.024668,-0.01391,0.0,0.09785,0.146973,0.0,-0.012223,0.075596
4,GKR3BB@A@D134S,0.269064,0.0,0.102303,0.127602,0.0,0.27099,0.0,0.009345,0.0,...,0.002959,-0.003167,-0.024668,-0.003138,0.0,0.040599,0.015143,0.0,-0.012627,0.002257


In [0]:
X_diff_rep = pd.concat([Mutation_Table[["Mutation"]], X_diff_rep], axis=1)
tmp_columns = list(X_diff_rep.columns)
tmp_columns[0] = "Mutation"
X_diff_rep.columns = tmp_columns
X_diff_rep.head()

Unnamed: 0,Mutation,diff_rep_0,diff_rep_1,diff_rep_2,diff_rep_3,diff_rep_4,diff_rep_5,diff_rep_6,diff_rep_7,diff_rep_8,...,diff_rep_54,diff_rep_55,diff_rep_56,diff_rep_57,diff_rep_58,diff_rep_59,diff_rep_60,diff_rep_61,diff_rep_62,diff_rep_63
0,GKR3BB@A@A158W,-0.077677,0.0,0.006018,-0.042313,0.0,0.085099,0.0,0.062198,0.0,...,0.0,-0.020081,-0.024668,-0.006461,0.0,0.102974,0.140068,0.0,-0.009169,0.074244
1,GKR3BB@A@D107K,-0.009741,0.0,-0.003382,-0.074758,0.006781,0.099391,0.0,0.073423,0.0,...,0.0,0.035055,-0.024668,-0.015589,0.0,0.11142,0.141543,0.0,-0.037826,0.07305
2,GKR3BB@A@D107L,-0.003285,0.0,0.002342,-0.054493,0.0,0.048099,0.0,0.018055,0.0,...,0.0,0.042072,-0.024668,-0.006544,0.0,0.055995,0.025596,0.0,-0.010555,0.012616
3,GKR3BB@A@D107R,-0.016426,0.0,-0.00312,-0.065443,0.0074,0.089925,0.0,0.071362,0.0,...,0.0,0.020736,-0.024668,-0.01391,0.0,0.09785,0.146973,0.0,-0.012223,0.075596
4,GKR3BB@A@D134S,0.001197,0.0,0.001239,-0.003934,0.0,0.029336,0.0,0.02098,0.0,...,0.002959,-0.003167,-0.024668,-0.003138,0.0,0.040599,0.015143,0.0,-0.012627,0.002257


**Save dataframes:**

In [0]:
X_all.to_csv(data_target_location+"sequence_cnn_rep_all_64_01.csv", index=False)

In [0]:
X_diff_rep.to_csv(data_target_location+"sequence_cnn_rep_diff_64_01.csv", index=False)