The intention of this notebook is to use the hypertuned CNNs from BacDive+ that were selected for transfer learning and use the sequences from Merck&Co+ T251 and extract their CNN representations (mutant, wildtype (and difference) at different layers)

## Set up notebook and environment: ##

### Connect to google drive: ###

In [0]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.image as mpimg
from keras.utils import plot_model
from sklearn import preprocessing
import gc

Using TensorFlow backend.


In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive


### Specify location of all relevant data: (YOU HAVE TO INSERT YOUR FILE LOCATIONS) ####

In [0]:
print("Check correctnes of locations: ")
data_folder_location = "gdrive/My Drive/iGEM/Databases/Merck&Co/Data/T251/sequence_sampler_T251/"
data_target_location = "gdrive/My Drive/iGEM/Databases/Merck&Co/Data/T251/cnn_rep_sampler_T251/"
print("Folders: ")
print("\t",os.path.isdir(data_folder_location))
print("\t",os.path.isdir(data_target_location))
print("Files: ")
X_wt_loc= data_folder_location+"T251_X_wild.npy"
X_mut_loc = data_folder_location+"T251_X_mut.npy"
Mutation_Table_loc = data_folder_location+"T251_sequences_only.csv"
weight_loc = "gdrive/My Drive/iGEM/Databases/BacDive/Models/CNN_Regressor_Transfer_Learning/model_transfer_learning_default.hdf5"
weight_loc_64 = "gdrive/My Drive/iGEM/Databases/BacDive/Models/CNN_Regressor_Transfer_Learning/model_transfer_learning_default_64.hdf5"
print("\t",os.path.isfile(X_wt_loc))
print("\t",os.path.isfile(X_mut_loc))
print("\t",os.path.isfile(Mutation_Table_loc))
print("\t",os.path.isfile(weight_loc))
print("\t",os.path.isfile(weight_loc_64))

Check correctnes of locations: 
Folders: 
	 True
	 True
Files: 
	 True
	 True
	 True
	 True
	 True


### Retrieve Data 

In [0]:
X_wt = np.load(X_wt_loc)

In [0]:
X_mut = np.load(X_mut_loc)

In [0]:
Mutation_Table = pd.read_csv(Mutation_Table_loc)

### Supplementary functions

In [0]:
def calc_pred_truncated_model(model, model_source, weight_loc, X):  
  #Compile the models
  model.compile(
    optimizer=tf.train.AdamOptimizer(), 
    loss=tf.keras.losses.mean_squared_error,
    metrics=['mse']
  )
  model_source.compile(
    optimizer=tf.train.AdamOptimizer(), 
    loss=tf.keras.losses.mean_squared_error,
    metrics=['mse']
  )
  
  model_source.load_weights(weight_loc)
  
  #transfer appropriate weights
  for i in range(len(model.layers)):
    model.layers[i].set_weights(model_source.layers[i].get_weights())
  
  print("Calculating predictions:")
  y_pred = model.predict(X, verbose=1)
    
  return y_pred

## Extract representation


In [0]:
SEQUENCE_LEN = 650
CLASSES = 21 #now that sequences with non cannonical AAs removed there are 20 left and "0" encoding "None"

### Default Model Last Layer (1)

**Define Models:**

In [0]:
#original model
inputs = tf.keras.layers.Input(shape=(SEQUENCE_LEN,))

x = tf.keras.layers.Embedding(CLASSES, 8, input_length=SEQUENCE_LEN)(inputs) 

x = tf.keras.layers.Conv1D(256, 7, 2)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x_mid = tf.keras.layers.Activation("relu")(x)

x = tf.keras.layers.MaxPooling1D(3)(x_mid)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)

x_mid = tf.keras.layers.GlobalMaxPooling1D()(x_mid)
x = tf.keras.layers.GlobalMaxPooling1D()(x)

x = tf.keras.layers.concatenate([x_mid, x], axis=-1)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Dense(1, activation="linear")(x)

model_source = tf.keras.Model(inputs=inputs, outputs=x)
model_source.summary()

Instructions for updating:
Colocations handled automatically by placer.
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 650)          0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, 650, 8)       168         input_1[0][0]                    
__________________________________________________________________________________________________
conv1d (Conv1D)                 (None, 322, 256)     14592       embedding[0][0]                  
__________________________________________________________________________________________________
batch_normalization_v1 (BatchNo (None, 322, 256)     1024        conv1d[0][0]                     
_____________________________________

In [0]:
#truncated model
inputs = tf.keras.layers.Input(shape=(SEQUENCE_LEN,))

x = tf.keras.layers.Embedding(CLASSES, 8, input_length=SEQUENCE_LEN)(inputs) 

x = tf.keras.layers.Conv1D(256, 7, 2)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x_mid = tf.keras.layers.Activation("relu")(x)

x = tf.keras.layers.MaxPooling1D(3)(x_mid)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)

x_mid = tf.keras.layers.GlobalMaxPooling1D()(x_mid)
x = tf.keras.layers.GlobalMaxPooling1D()(x)

x = tf.keras.layers.concatenate([x_mid, x], axis=-1)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)

model = tf.keras.Model(inputs=inputs, outputs=x)
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 650)          0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 650, 8)       168         input_2[0][0]                    
__________________________________________________________________________________________________
conv1d_6 (Conv1D)               (None, 322, 256)     14592       embedding_1[0][0]                
__________________________________________________________________________________________________
batch_normalization_v1_8 (Batch (None, 322, 256)     1024        conv1d_6[0][0]                   
__________________________________________________________________________________________________
activation

**Calculate representations:**

In [0]:
X_wt_rep = calc_pred_truncated_model(model, model_source, weight_loc, X_wt)
X_wt_rep[0][:10]

Calculating predictions:


array([0.        , 0.        , 0.        , 0.13785997, 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.05584997],
      dtype=float32)

In [0]:
X_mut_rep = calc_pred_truncated_model(model, model_source, weight_loc, X_mut)
X_mut_rep[0][:10]

Calculating predictions:


array([0.        , 0.        , 0.        , 0.16585964, 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.10158187],
      dtype=float32)

In [0]:
X_diff_rep = np.subtract(X_mut_rep, X_wt_rep)
X_diff_rep[0][-10:]

array([0.       , 0.       , 0.       , 0.       , 0.       , 0.       ,
       0.0270212, 0.       , 0.       , 0.       ], dtype=float32)

**Create dataframes:**

In [0]:
X_wt_rep = pd.DataFrame(X_wt_rep)
X_wt_rep.columns = list(map(lambda x: "wt_rep_"+str(x),X_wt_rep.columns))
X_wt_rep.head()

Unnamed: 0,wt_rep_0,wt_rep_1,wt_rep_2,wt_rep_3,wt_rep_4,wt_rep_5,wt_rep_6,wt_rep_7,wt_rep_8,wt_rep_9,...,wt_rep_502,wt_rep_503,wt_rep_504,wt_rep_505,wt_rep_506,wt_rep_507,wt_rep_508,wt_rep_509,wt_rep_510,wt_rep_511
0,0.0,0.0,0.0,0.13786,0.0,0.0,0.0,0.0,0.0,0.05585,...,0.0,0.0,0.0,0.0,0.0,0.0,0.241162,0.0,0.0,0.0
1,0.0,0.0,0.0,0.13786,0.0,0.0,0.0,0.0,0.0,0.05585,...,0.0,0.0,0.0,0.0,0.0,0.0,0.241162,0.0,0.0,0.0
2,0.0,0.0,0.0,0.13786,0.0,0.0,0.0,0.0,0.0,0.05585,...,0.0,0.0,0.0,0.0,0.0,0.0,0.241162,0.0,0.0,0.0
3,0.0,0.0,0.0,0.13786,0.0,0.0,0.0,0.0,0.0,0.05585,...,0.0,0.0,0.0,0.0,0.0,0.0,0.241162,0.0,0.0,0.0
4,0.0,0.0,0.0,0.13786,0.0,0.0,0.0,0.0,0.0,0.05585,...,0.0,0.0,0.0,0.0,0.0,0.0,0.241162,0.0,0.0,0.0


In [0]:
X_mut_rep = pd.DataFrame(X_mut_rep)
X_mut_rep.columns = list(map(lambda x: "mut_rep_"+str(x),X_mut_rep.columns))
X_mut_rep.head()

Unnamed: 0,mut_rep_0,mut_rep_1,mut_rep_2,mut_rep_3,mut_rep_4,mut_rep_5,mut_rep_6,mut_rep_7,mut_rep_8,mut_rep_9,...,mut_rep_502,mut_rep_503,mut_rep_504,mut_rep_505,mut_rep_506,mut_rep_507,mut_rep_508,mut_rep_509,mut_rep_510,mut_rep_511
0,0.0,0.0,0.0,0.16586,0.0,0.0,0.0,0.0,0.0,0.101582,...,0.0,0.0,0.0,0.0,0.0,0.0,0.268184,0.0,0.0,0.0
1,0.0,0.0,0.0,0.151633,0.0,0.0,0.0,0.0,0.0,0.091009,...,0.0,0.0,0.0,0.0,0.0,0.0,0.27001,0.0,0.0,0.0
2,0.0,0.0,0.0,0.163929,0.0,0.0,0.0,0.0,0.0,0.077448,...,0.0,0.0,0.0,0.0,0.0,0.0,0.251195,0.0,0.0,0.0
3,0.0,0.0,0.0,0.181372,0.0,0.0,0.0,0.0,0.0,0.115365,...,0.0,0.0,0.0,0.0,0.0,0.0,0.276369,0.0,0.0,0.0
4,0.0,0.0,0.0,0.146416,0.0,0.0,0.0,0.0,0.003839,0.099578,...,0.0,0.0,0.0,0.0,0.0,0.0,0.287784,0.0,0.0,0.0


In [0]:
X_diff_rep = pd.DataFrame(X_diff_rep)
X_diff_rep.columns = list(map(lambda x: "diff_rep_"+str(x),X_diff_rep.columns))
X_diff_rep.head()

Unnamed: 0,diff_rep_0,diff_rep_1,diff_rep_2,diff_rep_3,diff_rep_4,diff_rep_5,diff_rep_6,diff_rep_7,diff_rep_8,diff_rep_9,...,diff_rep_502,diff_rep_503,diff_rep_504,diff_rep_505,diff_rep_506,diff_rep_507,diff_rep_508,diff_rep_509,diff_rep_510,diff_rep_511
0,0.0,0.0,0.0,0.028,0.0,0.0,0.0,0.0,0.0,0.045732,...,0.0,0.0,0.0,0.0,0.0,0.0,0.027021,0.0,0.0,0.0
1,0.0,0.0,0.0,0.013773,0.0,0.0,0.0,0.0,0.0,0.035159,...,0.0,0.0,0.0,0.0,0.0,0.0,0.028847,0.0,0.0,0.0
2,0.0,0.0,0.0,0.026069,0.0,0.0,0.0,0.0,0.0,0.021598,...,0.0,0.0,0.0,0.0,0.0,0.0,0.010033,0.0,0.0,0.0
3,0.0,0.0,0.0,0.043512,0.0,0.0,0.0,0.0,0.0,0.059515,...,0.0,0.0,0.0,0.0,0.0,0.0,0.035206,0.0,0.0,0.0
4,0.0,0.0,0.0,0.008556,0.0,0.0,0.0,0.0,0.003839,0.043728,...,0.0,0.0,0.0,0.0,0.0,0.0,0.046622,0.0,0.0,0.0


In [0]:
X_all = pd.concat([X_wt_rep, X_mut_rep, X_diff_rep], axis=1)
X_all.head()

Unnamed: 0,wt_rep_0,wt_rep_1,wt_rep_2,wt_rep_3,wt_rep_4,wt_rep_5,wt_rep_6,wt_rep_7,wt_rep_8,wt_rep_9,...,diff_rep_502,diff_rep_503,diff_rep_504,diff_rep_505,diff_rep_506,diff_rep_507,diff_rep_508,diff_rep_509,diff_rep_510,diff_rep_511
0,0.0,0.0,0.0,0.13786,0.0,0.0,0.0,0.0,0.0,0.05585,...,0.0,0.0,0.0,0.0,0.0,0.0,0.027021,0.0,0.0,0.0
1,0.0,0.0,0.0,0.13786,0.0,0.0,0.0,0.0,0.0,0.05585,...,0.0,0.0,0.0,0.0,0.0,0.0,0.028847,0.0,0.0,0.0
2,0.0,0.0,0.0,0.13786,0.0,0.0,0.0,0.0,0.0,0.05585,...,0.0,0.0,0.0,0.0,0.0,0.0,0.010033,0.0,0.0,0.0
3,0.0,0.0,0.0,0.13786,0.0,0.0,0.0,0.0,0.0,0.05585,...,0.0,0.0,0.0,0.0,0.0,0.0,0.035206,0.0,0.0,0.0
4,0.0,0.0,0.0,0.13786,0.0,0.0,0.0,0.0,0.0,0.05585,...,0.0,0.0,0.0,0.0,0.0,0.0,0.046622,0.0,0.0,0.0


In [0]:
X_all = pd.concat([Mutation_Table[["MUTATION"]], X_all], axis=1)
tmp_columns = list(X_all.columns)
tmp_columns[0] = "Mutation"
X_all.columns = tmp_columns
X_all.head()

Unnamed: 0,Mutation,wt_rep_0,wt_rep_1,wt_rep_2,wt_rep_3,wt_rep_4,wt_rep_5,wt_rep_6,wt_rep_7,wt_rep_8,...,diff_rep_502,diff_rep_503,diff_rep_504,diff_rep_505,diff_rep_506,diff_rep_507,diff_rep_508,diff_rep_509,diff_rep_510,diff_rep_511
0,1ADO@A@D128A,0.0,0.0,0.0,0.13786,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.027021,0.0,0.0,0.0
1,1ADO@A@D128G,0.0,0.0,0.0,0.13786,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.028847,0.0,0.0,0.0
2,1ADO@A@D128N,0.0,0.0,0.0,0.13786,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.010033,0.0,0.0,0.0
3,1ADO@A@D128Q,0.0,0.0,0.0,0.13786,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.035206,0.0,0.0,0.0
4,1ADO@A@D128V,0.0,0.0,0.0,0.13786,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.046622,0.0,0.0,0.0


In [0]:
X_diff_rep = pd.concat([Mutation_Table[["MUTATION"]], X_diff_rep], axis=1)
tmp_columns = list(X_diff_rep.columns)
tmp_columns[0] = "Mutation"
X_diff_rep.columns = tmp_columns
X_diff_rep.head()

Unnamed: 0,Mutation,diff_rep_0,diff_rep_1,diff_rep_2,diff_rep_3,diff_rep_4,diff_rep_5,diff_rep_6,diff_rep_7,diff_rep_8,...,diff_rep_502,diff_rep_503,diff_rep_504,diff_rep_505,diff_rep_506,diff_rep_507,diff_rep_508,diff_rep_509,diff_rep_510,diff_rep_511
0,1ADO@A@D128A,0.0,0.0,0.0,0.028,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.027021,0.0,0.0,0.0
1,1ADO@A@D128G,0.0,0.0,0.0,0.013773,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.028847,0.0,0.0,0.0
2,1ADO@A@D128N,0.0,0.0,0.0,0.026069,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.010033,0.0,0.0,0.0
3,1ADO@A@D128Q,0.0,0.0,0.0,0.043512,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.035206,0.0,0.0,0.0
4,1ADO@A@D128V,0.0,0.0,0.0,0.008556,0.0,0.0,0.0,0.0,0.003839,...,0.0,0.0,0.0,0.0,0.0,0.0,0.046622,0.0,0.0,0.0


**Save dataframes:**

In [0]:
X_all.to_csv(data_target_location+"sequence_cnn_rep_all_default_01.csv", index=False)

In [0]:
X_diff_rep.to_csv(data_target_location+"sequence_cnn_rep_diff_default_01.csv", index=False)

### Default Model Second to Last Layer (2)

**Define Models:**

In [0]:
#original model
inputs = tf.keras.layers.Input(shape=(SEQUENCE_LEN,))

x = tf.keras.layers.Embedding(CLASSES, 8, input_length=SEQUENCE_LEN)(inputs) 

x = tf.keras.layers.Conv1D(256, 7, 2)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x_mid = tf.keras.layers.Activation("relu")(x)

x = tf.keras.layers.MaxPooling1D(3)(x_mid)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)

x_mid = tf.keras.layers.GlobalMaxPooling1D()(x_mid)
x = tf.keras.layers.GlobalMaxPooling1D()(x)

x = tf.keras.layers.concatenate([x_mid, x], axis=-1)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Dense(1, activation="linear")(x)

model_source = tf.keras.Model(inputs=inputs, outputs=x)
model_source.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            (None, 650)          0                                            
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, 650, 8)       168         input_3[0][0]                    
__________________________________________________________________________________________________
conv1d_12 (Conv1D)              (None, 322, 256)     14592       embedding_2[0][0]                
__________________________________________________________________________________________________
batch_normalization_v1_16 (Batc (None, 322, 256)     1024        conv1d_12[0][0]                  
__________________________________________________________________________________________________
activation

In [0]:
#truncated model
inputs = tf.keras.layers.Input(shape=(SEQUENCE_LEN,))

x = tf.keras.layers.Embedding(CLASSES, 8, input_length=SEQUENCE_LEN)(inputs) 

x = tf.keras.layers.Conv1D(256, 7, 2)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x_mid = tf.keras.layers.Activation("relu")(x)

x = tf.keras.layers.MaxPooling1D(3)(x_mid)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)

x_mid = tf.keras.layers.GlobalMaxPooling1D()(x_mid)
x = tf.keras.layers.GlobalMaxPooling1D()(x)

x = tf.keras.layers.concatenate([x_mid, x], axis=-1)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)

model = tf.keras.Model(inputs=inputs, outputs=x)
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            (None, 650)          0                                            
__________________________________________________________________________________________________
embedding_3 (Embedding)         (None, 650, 8)       168         input_4[0][0]                    
__________________________________________________________________________________________________
conv1d_18 (Conv1D)              (None, 322, 256)     14592       embedding_3[0][0]                
__________________________________________________________________________________________________
batch_normalization_v1_24 (Batc (None, 322, 256)     1024        conv1d_18[0][0]                  
__________________________________________________________________________________________________
activation

**Calculate representations:**

In [0]:
X_wt_rep = calc_pred_truncated_model(model, model_source, weight_loc, X_wt)
X_wt_rep[0][:10]

Calculating predictions:


array([0.24293911, 0.        , 0.3994887 , 0.        , 0.2982533 ,
       1.2998984 , 0.        , 0.16076386, 0.22289142, 0.        ],
      dtype=float32)

In [0]:
X_mut_rep = calc_pred_truncated_model(model, model_source, weight_loc, X_mut)
X_mut_rep[0][:10]

Calculating predictions:


array([0.14357318, 0.        , 0.3074119 , 0.        , 0.28117108,
       1.3196471 , 0.        , 0.1658001 , 0.14882767, 0.        ],
      dtype=float32)

In [0]:
X_diff_rep = np.subtract(X_mut_rep, X_wt_rep)
X_diff_rep[0][:10]

array([-0.09936593,  0.        , -0.09207678,  0.        , -0.01708221,
        0.01974869,  0.        ,  0.00503623, -0.07406375,  0.        ],
      dtype=float32)

**Create dataframes:**

In [0]:
X_wt_rep = pd.DataFrame(X_wt_rep)
X_wt_rep.columns = list(map(lambda x: "wt_rep_"+str(x),X_wt_rep.columns))
X_wt_rep.head()

Unnamed: 0,wt_rep_0,wt_rep_1,wt_rep_2,wt_rep_3,wt_rep_4,wt_rep_5,wt_rep_6,wt_rep_7,wt_rep_8,wt_rep_9,...,wt_rep_502,wt_rep_503,wt_rep_504,wt_rep_505,wt_rep_506,wt_rep_507,wt_rep_508,wt_rep_509,wt_rep_510,wt_rep_511
0,0.242939,0.0,0.399489,0.0,0.298253,1.299898,0.0,0.160764,0.222891,0.0,...,0.0,0.065701,0.0,0.0,0.103569,0.947081,0.007269,0.0,0.553753,0.308339
1,0.242939,0.0,0.399489,0.0,0.298253,1.299898,0.0,0.160764,0.222891,0.0,...,0.0,0.065701,0.0,0.0,0.103569,0.947081,0.007269,0.0,0.553753,0.308339
2,0.242939,0.0,0.399489,0.0,0.298253,1.299898,0.0,0.160764,0.222891,0.0,...,0.0,0.065701,0.0,0.0,0.103569,0.947081,0.007269,0.0,0.553753,0.308339
3,0.242939,0.0,0.399489,0.0,0.298253,1.299898,0.0,0.160764,0.222891,0.0,...,0.0,0.065701,0.0,0.0,0.103569,0.947081,0.007269,0.0,0.553753,0.308339
4,0.242939,0.0,0.399489,0.0,0.298253,1.299898,0.0,0.160764,0.222891,0.0,...,0.0,0.065701,0.0,0.0,0.103569,0.947081,0.007269,0.0,0.553753,0.308339


In [0]:
X_mut_rep = pd.DataFrame(X_mut_rep)
X_mut_rep.columns = list(map(lambda x: "mut_rep_"+str(x),X_mut_rep.columns))
X_mut_rep.head()

Unnamed: 0,mut_rep_0,mut_rep_1,mut_rep_2,mut_rep_3,mut_rep_4,mut_rep_5,mut_rep_6,mut_rep_7,mut_rep_8,mut_rep_9,...,mut_rep_502,mut_rep_503,mut_rep_504,mut_rep_505,mut_rep_506,mut_rep_507,mut_rep_508,mut_rep_509,mut_rep_510,mut_rep_511
0,0.143573,0.0,0.307412,0.0,0.281171,1.319647,0.0,0.1658,0.148828,0.0,...,0.0,0.195515,0.0,0.0,0.135978,0.819574,0.0,0.0,0.570151,0.343665
1,0.043673,0.0,0.345127,0.0,0.265173,1.31009,0.0,0.087704,0.302769,0.0,...,0.0,0.181503,0.0,0.0,0.178721,0.809656,0.029876,0.0,0.725649,0.212272
2,0.079186,0.0,0.407448,0.0,0.270454,1.330586,0.0,0.176548,0.197568,0.0,...,0.0,0.126569,0.0,0.0,0.146917,0.885851,0.0,0.0,0.636954,0.334009
3,0.0,0.0,0.400103,0.0,0.229143,1.32952,0.0,0.115486,0.228602,0.0,...,0.0,0.144557,0.0,0.0,0.175811,0.772731,0.047945,0.0,0.706723,0.314281
4,0.0,0.0,0.345543,0.0,0.243362,1.27126,0.0,0.21332,0.237742,0.0,...,0.0,0.214643,0.0,0.0,0.192502,0.721022,0.0,0.0,0.745018,0.323378


In [0]:
X_diff_rep = pd.DataFrame(X_diff_rep)
X_diff_rep.columns = list(map(lambda x: "diff_rep_"+str(x),X_diff_rep.columns))
X_diff_rep.head()

Unnamed: 0,diff_rep_0,diff_rep_1,diff_rep_2,diff_rep_3,diff_rep_4,diff_rep_5,diff_rep_6,diff_rep_7,diff_rep_8,diff_rep_9,...,diff_rep_502,diff_rep_503,diff_rep_504,diff_rep_505,diff_rep_506,diff_rep_507,diff_rep_508,diff_rep_509,diff_rep_510,diff_rep_511
0,-0.099366,0.0,-0.092077,0.0,-0.017082,0.019749,0.0,0.005036,-0.074064,0.0,...,0.0,0.129813,0.0,0.0,0.032409,-0.127507,-0.007269,0.0,0.016397,0.035326
1,-0.199266,0.0,-0.054361,0.0,-0.03308,0.010191,0.0,-0.07306,0.079878,0.0,...,0.0,0.115802,0.0,0.0,0.075152,-0.137425,0.022607,0.0,0.171896,-0.096067
2,-0.163753,0.0,0.00796,0.0,-0.027799,0.030688,0.0,0.015784,-0.025323,0.0,...,0.0,0.060868,0.0,0.0,0.043347,-0.06123,-0.007269,0.0,0.083201,0.02567
3,-0.242939,0.0,0.000614,0.0,-0.06911,0.029622,0.0,-0.045278,0.00571,0.0,...,0.0,0.078855,0.0,0.0,0.072242,-0.17435,0.040676,0.0,0.15297,0.005942
4,-0.242939,0.0,-0.053945,0.0,-0.054891,-0.028638,0.0,0.052557,0.014851,0.0,...,0.0,0.148942,0.0,0.0,0.088933,-0.22606,-0.007269,0.0,0.191264,0.015038


In [0]:
X_all = pd.concat([X_wt_rep, X_mut_rep, X_diff_rep], axis=1)
X_all.head()

Unnamed: 0,wt_rep_0,wt_rep_1,wt_rep_2,wt_rep_3,wt_rep_4,wt_rep_5,wt_rep_6,wt_rep_7,wt_rep_8,wt_rep_9,...,diff_rep_502,diff_rep_503,diff_rep_504,diff_rep_505,diff_rep_506,diff_rep_507,diff_rep_508,diff_rep_509,diff_rep_510,diff_rep_511
0,0.242939,0.0,0.399489,0.0,0.298253,1.299898,0.0,0.160764,0.222891,0.0,...,0.0,0.129813,0.0,0.0,0.032409,-0.127507,-0.007269,0.0,0.016397,0.035326
1,0.242939,0.0,0.399489,0.0,0.298253,1.299898,0.0,0.160764,0.222891,0.0,...,0.0,0.115802,0.0,0.0,0.075152,-0.137425,0.022607,0.0,0.171896,-0.096067
2,0.242939,0.0,0.399489,0.0,0.298253,1.299898,0.0,0.160764,0.222891,0.0,...,0.0,0.060868,0.0,0.0,0.043347,-0.06123,-0.007269,0.0,0.083201,0.02567
3,0.242939,0.0,0.399489,0.0,0.298253,1.299898,0.0,0.160764,0.222891,0.0,...,0.0,0.078855,0.0,0.0,0.072242,-0.17435,0.040676,0.0,0.15297,0.005942
4,0.242939,0.0,0.399489,0.0,0.298253,1.299898,0.0,0.160764,0.222891,0.0,...,0.0,0.148942,0.0,0.0,0.088933,-0.22606,-0.007269,0.0,0.191264,0.015038


In [0]:
X_all = pd.concat([Mutation_Table[["MUTATION"]], X_all], axis=1)
tmp_columns = list(X_all.columns)
tmp_columns[0] = "Mutation"
X_all.columns = tmp_columns
X_all.head()

Unnamed: 0,Mutation,wt_rep_0,wt_rep_1,wt_rep_2,wt_rep_3,wt_rep_4,wt_rep_5,wt_rep_6,wt_rep_7,wt_rep_8,...,diff_rep_502,diff_rep_503,diff_rep_504,diff_rep_505,diff_rep_506,diff_rep_507,diff_rep_508,diff_rep_509,diff_rep_510,diff_rep_511
0,1ADO@A@D128A,0.242939,0.0,0.399489,0.0,0.298253,1.299898,0.0,0.160764,0.222891,...,0.0,0.129813,0.0,0.0,0.032409,-0.127507,-0.007269,0.0,0.016397,0.035326
1,1ADO@A@D128G,0.242939,0.0,0.399489,0.0,0.298253,1.299898,0.0,0.160764,0.222891,...,0.0,0.115802,0.0,0.0,0.075152,-0.137425,0.022607,0.0,0.171896,-0.096067
2,1ADO@A@D128N,0.242939,0.0,0.399489,0.0,0.298253,1.299898,0.0,0.160764,0.222891,...,0.0,0.060868,0.0,0.0,0.043347,-0.06123,-0.007269,0.0,0.083201,0.02567
3,1ADO@A@D128Q,0.242939,0.0,0.399489,0.0,0.298253,1.299898,0.0,0.160764,0.222891,...,0.0,0.078855,0.0,0.0,0.072242,-0.17435,0.040676,0.0,0.15297,0.005942
4,1ADO@A@D128V,0.242939,0.0,0.399489,0.0,0.298253,1.299898,0.0,0.160764,0.222891,...,0.0,0.148942,0.0,0.0,0.088933,-0.22606,-0.007269,0.0,0.191264,0.015038


In [0]:
X_diff_rep = pd.concat([Mutation_Table[["MUTATION"]], X_diff_rep], axis=1)
tmp_columns = list(X_diff_rep.columns)
tmp_columns[0] = "Mutation"
X_diff_rep.columns = tmp_columns
X_diff_rep.head()

Unnamed: 0,Mutation,diff_rep_0,diff_rep_1,diff_rep_2,diff_rep_3,diff_rep_4,diff_rep_5,diff_rep_6,diff_rep_7,diff_rep_8,...,diff_rep_502,diff_rep_503,diff_rep_504,diff_rep_505,diff_rep_506,diff_rep_507,diff_rep_508,diff_rep_509,diff_rep_510,diff_rep_511
0,1ADO@A@D128A,-0.099366,0.0,-0.092077,0.0,-0.017082,0.019749,0.0,0.005036,-0.074064,...,0.0,0.129813,0.0,0.0,0.032409,-0.127507,-0.007269,0.0,0.016397,0.035326
1,1ADO@A@D128G,-0.199266,0.0,-0.054361,0.0,-0.03308,0.010191,0.0,-0.07306,0.079878,...,0.0,0.115802,0.0,0.0,0.075152,-0.137425,0.022607,0.0,0.171896,-0.096067
2,1ADO@A@D128N,-0.163753,0.0,0.00796,0.0,-0.027799,0.030688,0.0,0.015784,-0.025323,...,0.0,0.060868,0.0,0.0,0.043347,-0.06123,-0.007269,0.0,0.083201,0.02567
3,1ADO@A@D128Q,-0.242939,0.0,0.000614,0.0,-0.06911,0.029622,0.0,-0.045278,0.00571,...,0.0,0.078855,0.0,0.0,0.072242,-0.17435,0.040676,0.0,0.15297,0.005942
4,1ADO@A@D128V,-0.242939,0.0,-0.053945,0.0,-0.054891,-0.028638,0.0,0.052557,0.014851,...,0.0,0.148942,0.0,0.0,0.088933,-0.22606,-0.007269,0.0,0.191264,0.015038


**Save dataframes:**

In [0]:
X_all.to_csv(data_target_location+"sequence_cnn_rep_all_default_02.csv", index=False)

In [0]:
X_diff_rep.to_csv(data_target_location+"sequence_cnn_rep_diff_default_02.csv", index=False)

### Default Model Third to Last Layer (3)

**Define Models:**

In [0]:
#original model
inputs = tf.keras.layers.Input(shape=(SEQUENCE_LEN,))

x = tf.keras.layers.Embedding(CLASSES, 8, input_length=SEQUENCE_LEN)(inputs) 

x = tf.keras.layers.Conv1D(256, 7, 2)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x_mid = tf.keras.layers.Activation("relu")(x)

x = tf.keras.layers.MaxPooling1D(3)(x_mid)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)

x_mid = tf.keras.layers.GlobalMaxPooling1D()(x_mid)
x = tf.keras.layers.GlobalMaxPooling1D()(x)

x = tf.keras.layers.concatenate([x_mid, x], axis=-1)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Dense(1, activation="linear")(x)

model_source = tf.keras.Model(inputs=inputs, outputs=x)
model_source.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_5 (InputLayer)            (None, 650)          0                                            
__________________________________________________________________________________________________
embedding_4 (Embedding)         (None, 650, 8)       168         input_5[0][0]                    
__________________________________________________________________________________________________
conv1d_24 (Conv1D)              (None, 322, 256)     14592       embedding_4[0][0]                
__________________________________________________________________________________________________
batch_normalization_v1_31 (Batc (None, 322, 256)     1024        conv1d_24[0][0]                  
__________________________________________________________________________________________________
activation

In [0]:
#truncated model
inputs = tf.keras.layers.Input(shape=(SEQUENCE_LEN,))

x = tf.keras.layers.Embedding(CLASSES, 8, input_length=SEQUENCE_LEN)(inputs) 

x = tf.keras.layers.Conv1D(256, 7, 2)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x_mid = tf.keras.layers.Activation("relu")(x)

x = tf.keras.layers.MaxPooling1D(3)(x_mid)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)

x_mid = tf.keras.layers.GlobalMaxPooling1D()(x_mid)
x = tf.keras.layers.GlobalMaxPooling1D()(x)

x = tf.keras.layers.concatenate([x_mid, x], axis=-1)

model = tf.keras.Model(inputs=inputs, outputs=x)
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_6 (InputLayer)            (None, 650)          0                                            
__________________________________________________________________________________________________
embedding_5 (Embedding)         (None, 650, 8)       168         input_6[0][0]                    
__________________________________________________________________________________________________
conv1d_30 (Conv1D)              (None, 322, 256)     14592       embedding_5[0][0]                
__________________________________________________________________________________________________
batch_normalization_v1_39 (Batc (None, 322, 256)     1024        conv1d_30[0][0]                  
__________________________________________________________________________________________________
activation

**Calculate representations:**

In [0]:
X_wt_rep = calc_pred_truncated_model(model, model_source, weight_loc, X_wt)
X_wt_rep[0][:10]

Calculating predictions:


array([4.015851 , 3.5572786, 3.2733345, 4.8180285, 3.23963  , 3.3289287,
       2.8777688, 3.1038089, 4.239603 , 3.0628   ], dtype=float32)

In [0]:
X_mut_rep = calc_pred_truncated_model(model, model_source, weight_loc, X_mut)
X_mut_rep[0][:10]

Calculating predictions:


array([4.015851 , 3.5572786, 3.2733345, 4.6749716, 3.23963  , 3.3289287,
       2.8777688, 3.1038089, 4.239603 , 3.0628   ], dtype=float32)

In [0]:
X_diff_rep = np.subtract(X_mut_rep, X_wt_rep)
X_diff_rep[0][-10:]

array([0.        , 0.        , 0.22088325, 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.01210022, 0.        ],
      dtype=float32)

**Create dataframes:**

In [0]:
X_wt_rep = pd.DataFrame(X_wt_rep)
X_wt_rep.columns = list(map(lambda x: "wt_rep_"+str(x),X_wt_rep.columns))
X_wt_rep.head()

Unnamed: 0,wt_rep_0,wt_rep_1,wt_rep_2,wt_rep_3,wt_rep_4,wt_rep_5,wt_rep_6,wt_rep_7,wt_rep_8,wt_rep_9,...,wt_rep_758,wt_rep_759,wt_rep_760,wt_rep_761,wt_rep_762,wt_rep_763,wt_rep_764,wt_rep_765,wt_rep_766,wt_rep_767
0,4.015851,3.557279,3.273335,4.818028,3.23963,3.328929,2.877769,3.103809,4.239603,3.0628,...,2.409964,2.531097,1.224079,1.218049,2.994668,1.018906,1.770575,3.001825,4.113869,3.33108
1,4.015851,3.557279,3.273335,4.818028,3.23963,3.328929,2.877769,3.103809,4.239603,3.0628,...,2.409964,2.531097,1.224079,1.218049,2.994668,1.018906,1.770575,3.001825,4.113869,3.33108
2,4.015851,3.557279,3.273335,4.818028,3.23963,3.328929,2.877769,3.103809,4.239603,3.0628,...,2.409964,2.531097,1.224079,1.218049,2.994668,1.018906,1.770575,3.001825,4.113869,3.33108
3,4.015851,3.557279,3.273335,4.818028,3.23963,3.328929,2.877769,3.103809,4.239603,3.0628,...,2.409964,2.531097,1.224079,1.218049,2.994668,1.018906,1.770575,3.001825,4.113869,3.33108
4,4.015851,3.557279,3.273335,4.818028,3.23963,3.328929,2.877769,3.103809,4.239603,3.0628,...,2.409964,2.531097,1.224079,1.218049,2.994668,1.018906,1.770575,3.001825,4.113869,3.33108


In [0]:
X_mut_rep = pd.DataFrame(X_mut_rep)
X_mut_rep.columns = list(map(lambda x: "mut_rep_"+str(x),X_mut_rep.columns))
X_mut_rep.head()

Unnamed: 0,mut_rep_0,mut_rep_1,mut_rep_2,mut_rep_3,mut_rep_4,mut_rep_5,mut_rep_6,mut_rep_7,mut_rep_8,mut_rep_9,...,mut_rep_758,mut_rep_759,mut_rep_760,mut_rep_761,mut_rep_762,mut_rep_763,mut_rep_764,mut_rep_765,mut_rep_766,mut_rep_767
0,4.015851,3.557279,3.273335,4.674972,3.23963,3.328929,2.877769,3.103809,4.239603,3.0628,...,2.409964,2.531097,1.444962,1.218049,2.994668,1.018906,1.770575,3.001825,4.125969,3.33108
1,4.015851,3.557279,3.273335,4.768565,3.23963,3.328929,2.877769,3.103809,4.239603,3.0628,...,2.409964,2.531097,1.34344,1.218049,2.994668,1.018906,1.770575,3.001825,4.072161,3.33108
2,4.015851,3.557279,3.273335,4.754043,3.23963,3.328929,2.877769,3.103809,4.239603,3.0628,...,2.409964,2.531097,1.366893,1.218049,2.994668,1.018906,1.770575,3.001825,4.105414,3.33108
3,4.015851,3.557279,3.273335,4.781416,3.23963,3.328929,2.886809,3.103809,4.239603,3.0628,...,2.409964,2.531097,1.409667,1.218049,2.994668,1.018906,1.770575,3.001825,4.086159,3.33108
4,4.015851,3.557279,3.273335,4.600608,3.23963,3.328929,2.956737,3.103809,4.239603,3.0628,...,2.409964,2.531097,1.433306,1.218049,2.994668,1.018906,1.770575,3.001825,4.087595,3.33108


In [0]:
X_diff_rep = pd.DataFrame(X_diff_rep)
X_diff_rep.columns = list(map(lambda x: "diff_rep_"+str(x),X_diff_rep.columns))
X_diff_rep.head()

Unnamed: 0,diff_rep_0,diff_rep_1,diff_rep_2,diff_rep_3,diff_rep_4,diff_rep_5,diff_rep_6,diff_rep_7,diff_rep_8,diff_rep_9,...,diff_rep_758,diff_rep_759,diff_rep_760,diff_rep_761,diff_rep_762,diff_rep_763,diff_rep_764,diff_rep_765,diff_rep_766,diff_rep_767
0,0.0,0.0,0.0,-0.143057,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.220883,0.0,0.0,0.0,0.0,0.0,0.0121,0.0
1,0.0,0.0,0.0,-0.049463,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.119361,0.0,0.0,0.0,0.0,0.0,-0.041708,0.0
2,0.0,0.0,0.0,-0.063986,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.142814,0.0,0.0,0.0,0.0,0.0,-0.008455,0.0
3,0.0,0.0,0.0,-0.036613,0.0,0.0,0.00904,0.0,0.0,0.0,...,0.0,0.0,0.185588,0.0,0.0,0.0,0.0,0.0,-0.02771,0.0
4,0.0,0.0,0.0,-0.217421,0.0,0.0,0.078968,0.0,0.0,0.0,...,0.0,0.0,0.209227,0.0,0.0,0.0,0.0,0.0,-0.026274,0.0


In [0]:
X_all = pd.concat([X_wt_rep, X_mut_rep, X_diff_rep], axis=1)
X_all.head()

Unnamed: 0,wt_rep_0,wt_rep_1,wt_rep_2,wt_rep_3,wt_rep_4,wt_rep_5,wt_rep_6,wt_rep_7,wt_rep_8,wt_rep_9,...,diff_rep_758,diff_rep_759,diff_rep_760,diff_rep_761,diff_rep_762,diff_rep_763,diff_rep_764,diff_rep_765,diff_rep_766,diff_rep_767
0,4.015851,3.557279,3.273335,4.818028,3.23963,3.328929,2.877769,3.103809,4.239603,3.0628,...,0.0,0.0,0.220883,0.0,0.0,0.0,0.0,0.0,0.0121,0.0
1,4.015851,3.557279,3.273335,4.818028,3.23963,3.328929,2.877769,3.103809,4.239603,3.0628,...,0.0,0.0,0.119361,0.0,0.0,0.0,0.0,0.0,-0.041708,0.0
2,4.015851,3.557279,3.273335,4.818028,3.23963,3.328929,2.877769,3.103809,4.239603,3.0628,...,0.0,0.0,0.142814,0.0,0.0,0.0,0.0,0.0,-0.008455,0.0
3,4.015851,3.557279,3.273335,4.818028,3.23963,3.328929,2.877769,3.103809,4.239603,3.0628,...,0.0,0.0,0.185588,0.0,0.0,0.0,0.0,0.0,-0.02771,0.0
4,4.015851,3.557279,3.273335,4.818028,3.23963,3.328929,2.877769,3.103809,4.239603,3.0628,...,0.0,0.0,0.209227,0.0,0.0,0.0,0.0,0.0,-0.026274,0.0


In [0]:
X_all = pd.concat([Mutation_Table[["MUTATION"]], X_all], axis=1)
tmp_columns = list(X_all.columns)
tmp_columns[0] = "Mutation"
X_all.columns = tmp_columns
X_all.head()

Unnamed: 0,Mutation,wt_rep_0,wt_rep_1,wt_rep_2,wt_rep_3,wt_rep_4,wt_rep_5,wt_rep_6,wt_rep_7,wt_rep_8,...,diff_rep_758,diff_rep_759,diff_rep_760,diff_rep_761,diff_rep_762,diff_rep_763,diff_rep_764,diff_rep_765,diff_rep_766,diff_rep_767
0,1ADO@A@D128A,4.015851,3.557279,3.273335,4.818028,3.23963,3.328929,2.877769,3.103809,4.239603,...,0.0,0.0,0.220883,0.0,0.0,0.0,0.0,0.0,0.0121,0.0
1,1ADO@A@D128G,4.015851,3.557279,3.273335,4.818028,3.23963,3.328929,2.877769,3.103809,4.239603,...,0.0,0.0,0.119361,0.0,0.0,0.0,0.0,0.0,-0.041708,0.0
2,1ADO@A@D128N,4.015851,3.557279,3.273335,4.818028,3.23963,3.328929,2.877769,3.103809,4.239603,...,0.0,0.0,0.142814,0.0,0.0,0.0,0.0,0.0,-0.008455,0.0
3,1ADO@A@D128Q,4.015851,3.557279,3.273335,4.818028,3.23963,3.328929,2.877769,3.103809,4.239603,...,0.0,0.0,0.185588,0.0,0.0,0.0,0.0,0.0,-0.02771,0.0
4,1ADO@A@D128V,4.015851,3.557279,3.273335,4.818028,3.23963,3.328929,2.877769,3.103809,4.239603,...,0.0,0.0,0.209227,0.0,0.0,0.0,0.0,0.0,-0.026274,0.0


In [0]:
X_diff_rep = pd.concat([Mutation_Table[["MUTATION"]], X_diff_rep], axis=1)
tmp_columns = list(X_diff_rep.columns)
tmp_columns[0] = "Mutation"
X_diff_rep.columns = tmp_columns
X_diff_rep.head()

Unnamed: 0,Mutation,diff_rep_0,diff_rep_1,diff_rep_2,diff_rep_3,diff_rep_4,diff_rep_5,diff_rep_6,diff_rep_7,diff_rep_8,...,diff_rep_758,diff_rep_759,diff_rep_760,diff_rep_761,diff_rep_762,diff_rep_763,diff_rep_764,diff_rep_765,diff_rep_766,diff_rep_767
0,1ADO@A@D128A,0.0,0.0,0.0,-0.143057,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.220883,0.0,0.0,0.0,0.0,0.0,0.0121,0.0
1,1ADO@A@D128G,0.0,0.0,0.0,-0.049463,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.119361,0.0,0.0,0.0,0.0,0.0,-0.041708,0.0
2,1ADO@A@D128N,0.0,0.0,0.0,-0.063986,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.142814,0.0,0.0,0.0,0.0,0.0,-0.008455,0.0
3,1ADO@A@D128Q,0.0,0.0,0.0,-0.036613,0.0,0.0,0.00904,0.0,0.0,...,0.0,0.0,0.185588,0.0,0.0,0.0,0.0,0.0,-0.02771,0.0
4,1ADO@A@D128V,0.0,0.0,0.0,-0.217421,0.0,0.0,0.078968,0.0,0.0,...,0.0,0.0,0.209227,0.0,0.0,0.0,0.0,0.0,-0.026274,0.0


**Save dataframes:**

In [0]:
X_all.to_csv(data_target_location+"sequence_cnn_rep_all_default_03.csv", index=False)

In [0]:
X_diff_rep.to_csv(data_target_location+"sequence_cnn_rep_diff_default_03.csv", index=False)

### 64 Model Last Layer (1)

**Define Models:**

In [0]:
#original model
inputs = tf.keras.layers.Input(shape=(SEQUENCE_LEN,))

x = tf.keras.layers.Embedding(CLASSES, 8, input_length=SEQUENCE_LEN)(inputs) 

x = tf.keras.layers.Conv1D(256, 7, 2)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x_mid = tf.keras.layers.Activation("relu")(x)

x = tf.keras.layers.MaxPooling1D(3)(x_mid)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)

x_mid = tf.keras.layers.GlobalMaxPooling1D()(x_mid)
x = tf.keras.layers.GlobalMaxPooling1D()(x)

x = tf.keras.layers.concatenate([x_mid, x], axis=-1)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Dense(64)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Dense(1, activation="linear")(x)

model_source = tf.keras.Model(inputs=inputs, outputs=x)
model_source.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_7 (InputLayer)            (None, 650)          0                                            
__________________________________________________________________________________________________
embedding_6 (Embedding)         (None, 650, 8)       168         input_7[0][0]                    
__________________________________________________________________________________________________
conv1d_36 (Conv1D)              (None, 322, 256)     14592       embedding_6[0][0]                
__________________________________________________________________________________________________
batch_normalization_v1_45 (Batc (None, 322, 256)     1024        conv1d_36[0][0]                  
__________________________________________________________________________________________________
activation

In [0]:
#truncated model
inputs = tf.keras.layers.Input(shape=(SEQUENCE_LEN,))

x = tf.keras.layers.Embedding(CLASSES, 8, input_length=SEQUENCE_LEN)(inputs) 

x = tf.keras.layers.Conv1D(256, 7, 2)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(256, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x_mid = tf.keras.layers.Activation("relu")(x)

x = tf.keras.layers.MaxPooling1D(3)(x_mid)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Conv1D(512, 3)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)

x_mid = tf.keras.layers.GlobalMaxPooling1D()(x_mid)
x = tf.keras.layers.GlobalMaxPooling1D()(x)

x = tf.keras.layers.concatenate([x_mid, x], axis=-1)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Dense(512)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)
x = tf.keras.layers.Dense(64)(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation("relu")(x)

model = tf.keras.Model(inputs=inputs, outputs=x)
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_8 (InputLayer)            (None, 650)          0                                            
__________________________________________________________________________________________________
embedding_7 (Embedding)         (None, 650, 8)       168         input_8[0][0]                    
__________________________________________________________________________________________________
conv1d_42 (Conv1D)              (None, 322, 256)     14592       embedding_7[0][0]                
__________________________________________________________________________________________________
batch_normalization_v1_54 (Batc (None, 322, 256)     1024        conv1d_42[0][0]                  
__________________________________________________________________________________________________
activation

**Calculate representations:**

In [0]:
X_wt_rep = calc_pred_truncated_model(model, model_source, weight_loc_64, X_wt)
X_wt_rep[0][:10]

Calculating predictions:


array([0.        , 0.3238511 , 0.28337714, 0.17371589, 0.        ,
       0.13224402, 0.        , 0.        , 0.        , 0.        ],
      dtype=float32)

In [0]:
X_mut_rep = calc_pred_truncated_model(model, model_source, weight_loc_64, X_mut)
X_mut_rep[0][:10]

Calculating predictions:


array([0.        , 0.28097606, 0.27574655, 0.17494065, 0.        ,
       0.1501263 , 0.        , 0.        , 0.        , 0.        ],
      dtype=float32)

In [0]:
X_diff_rep = np.subtract(X_mut_rep, X_wt_rep)
X_diff_rep[0][-10:]

array([ 0.        ,  0.01542863,  0.        , -0.01748872,  0.        ,
        0.00376785,  0.        ,  0.        ,  0.01746579,  0.        ],
      dtype=float32)

**Create dataframes:**

In [0]:
X_wt_rep = pd.DataFrame(X_wt_rep)
X_wt_rep.columns = list(map(lambda x: "wt_rep_"+str(x),X_wt_rep.columns))
X_wt_rep.head()

Unnamed: 0,wt_rep_0,wt_rep_1,wt_rep_2,wt_rep_3,wt_rep_4,wt_rep_5,wt_rep_6,wt_rep_7,wt_rep_8,wt_rep_9,...,wt_rep_54,wt_rep_55,wt_rep_56,wt_rep_57,wt_rep_58,wt_rep_59,wt_rep_60,wt_rep_61,wt_rep_62,wt_rep_63
0,0.0,0.323851,0.283377,0.173716,0.0,0.132244,0.0,0.0,0.0,0.0,...,0.0,0.138614,0.0,0.395751,0.0,0.551131,0.0,0.0,0.193707,0.0
1,0.0,0.323851,0.283377,0.173716,0.0,0.132244,0.0,0.0,0.0,0.0,...,0.0,0.138614,0.0,0.395751,0.0,0.551131,0.0,0.0,0.193707,0.0
2,0.0,0.323851,0.283377,0.173716,0.0,0.132244,0.0,0.0,0.0,0.0,...,0.0,0.138614,0.0,0.395751,0.0,0.551131,0.0,0.0,0.193707,0.0
3,0.0,0.323851,0.283377,0.173716,0.0,0.132244,0.0,0.0,0.0,0.0,...,0.0,0.138614,0.0,0.395751,0.0,0.551131,0.0,0.0,0.193707,0.0
4,0.0,0.323851,0.283377,0.173716,0.0,0.132244,0.0,0.0,0.0,0.0,...,0.0,0.138614,0.0,0.395751,0.0,0.551131,0.0,0.0,0.193707,0.0


In [0]:
X_mut_rep = pd.DataFrame(X_mut_rep)
X_mut_rep.columns = list(map(lambda x: "mut_rep_"+str(x),X_mut_rep.columns))
X_mut_rep.head()

Unnamed: 0,mut_rep_0,mut_rep_1,mut_rep_2,mut_rep_3,mut_rep_4,mut_rep_5,mut_rep_6,mut_rep_7,mut_rep_8,mut_rep_9,...,mut_rep_54,mut_rep_55,mut_rep_56,mut_rep_57,mut_rep_58,mut_rep_59,mut_rep_60,mut_rep_61,mut_rep_62,mut_rep_63
0,0.0,0.280976,0.275747,0.174941,0.0,0.150126,0.0,0.0,0.0,0.0,...,0.0,0.154043,0.0,0.378262,0.0,0.554899,0.0,0.0,0.211172,0.0
1,0.0,0.296701,0.261797,0.17144,0.0,0.133568,0.0,0.0,0.0,0.0,...,0.0,0.136852,0.0,0.359738,0.0,0.527372,0.0,0.0,0.163254,0.0
2,0.0,0.273316,0.267469,0.167217,0.0,0.144931,0.0,0.0,0.0,0.0,...,0.0,0.150247,0.0,0.367855,0.0,0.55476,0.0,0.0,0.194163,0.0
3,0.0,0.333896,0.274003,0.183446,0.0,0.12583,0.0,0.0,0.0,0.0,...,0.0,0.152587,0.0,0.382284,0.0,0.523138,0.0,0.0,0.169127,0.0
4,0.0,0.311714,0.295236,0.182146,0.0,0.157462,0.0,0.0,0.0,0.0,...,0.0,0.159272,0.017309,0.406799,0.0,0.554247,0.0,0.0,0.220969,0.0


In [0]:
X_diff_rep = pd.DataFrame(X_diff_rep)
X_diff_rep.columns = list(map(lambda x: "diff_rep_"+str(x),X_diff_rep.columns))
X_diff_rep.head()

Unnamed: 0,diff_rep_0,diff_rep_1,diff_rep_2,diff_rep_3,diff_rep_4,diff_rep_5,diff_rep_6,diff_rep_7,diff_rep_8,diff_rep_9,...,diff_rep_54,diff_rep_55,diff_rep_56,diff_rep_57,diff_rep_58,diff_rep_59,diff_rep_60,diff_rep_61,diff_rep_62,diff_rep_63
0,0.0,-0.042875,-0.007631,0.001225,0.0,0.017882,0.0,0.0,0.0,0.0,...,0.0,0.015429,0.0,-0.017489,0.0,0.003768,0.0,0.0,0.017466,0.0
1,0.0,-0.02715,-0.02158,-0.002276,0.0,0.001324,0.0,0.0,0.0,0.0,...,0.0,-0.001762,0.0,-0.036013,0.0,-0.023759,0.0,0.0,-0.030453,0.0
2,0.0,-0.050535,-0.015909,-0.006499,0.0,0.012687,0.0,0.0,0.0,0.0,...,0.0,0.011632,0.0,-0.027896,0.0,0.003629,0.0,0.0,0.000456,0.0
3,0.0,0.010045,-0.009374,0.00973,0.0,-0.006414,0.0,0.0,0.0,0.0,...,0.0,0.013973,0.0,-0.013467,0.0,-0.027993,0.0,0.0,-0.024579,0.0
4,0.0,-0.012137,0.011858,0.00843,0.0,0.025218,0.0,0.0,0.0,0.0,...,0.0,0.020658,0.017309,0.011048,0.0,0.003115,0.0,0.0,0.027262,0.0


In [0]:
X_all = pd.concat([X_wt_rep, X_mut_rep, X_diff_rep], axis=1)
X_all.head()

Unnamed: 0,wt_rep_0,wt_rep_1,wt_rep_2,wt_rep_3,wt_rep_4,wt_rep_5,wt_rep_6,wt_rep_7,wt_rep_8,wt_rep_9,...,diff_rep_54,diff_rep_55,diff_rep_56,diff_rep_57,diff_rep_58,diff_rep_59,diff_rep_60,diff_rep_61,diff_rep_62,diff_rep_63
0,0.0,0.323851,0.283377,0.173716,0.0,0.132244,0.0,0.0,0.0,0.0,...,0.0,0.015429,0.0,-0.017489,0.0,0.003768,0.0,0.0,0.017466,0.0
1,0.0,0.323851,0.283377,0.173716,0.0,0.132244,0.0,0.0,0.0,0.0,...,0.0,-0.001762,0.0,-0.036013,0.0,-0.023759,0.0,0.0,-0.030453,0.0
2,0.0,0.323851,0.283377,0.173716,0.0,0.132244,0.0,0.0,0.0,0.0,...,0.0,0.011632,0.0,-0.027896,0.0,0.003629,0.0,0.0,0.000456,0.0
3,0.0,0.323851,0.283377,0.173716,0.0,0.132244,0.0,0.0,0.0,0.0,...,0.0,0.013973,0.0,-0.013467,0.0,-0.027993,0.0,0.0,-0.024579,0.0
4,0.0,0.323851,0.283377,0.173716,0.0,0.132244,0.0,0.0,0.0,0.0,...,0.0,0.020658,0.017309,0.011048,0.0,0.003115,0.0,0.0,0.027262,0.0


In [0]:
X_all = pd.concat([Mutation_Table[["MUTATION"]], X_all], axis=1)
tmp_columns = list(X_all.columns)
tmp_columns[0] = "Mutation"
X_all.columns = tmp_columns
X_all.head()

Unnamed: 0,Mutation,wt_rep_0,wt_rep_1,wt_rep_2,wt_rep_3,wt_rep_4,wt_rep_5,wt_rep_6,wt_rep_7,wt_rep_8,...,diff_rep_54,diff_rep_55,diff_rep_56,diff_rep_57,diff_rep_58,diff_rep_59,diff_rep_60,diff_rep_61,diff_rep_62,diff_rep_63
0,1ADO@A@D128A,0.0,0.323851,0.283377,0.173716,0.0,0.132244,0.0,0.0,0.0,...,0.0,0.015429,0.0,-0.017489,0.0,0.003768,0.0,0.0,0.017466,0.0
1,1ADO@A@D128G,0.0,0.323851,0.283377,0.173716,0.0,0.132244,0.0,0.0,0.0,...,0.0,-0.001762,0.0,-0.036013,0.0,-0.023759,0.0,0.0,-0.030453,0.0
2,1ADO@A@D128N,0.0,0.323851,0.283377,0.173716,0.0,0.132244,0.0,0.0,0.0,...,0.0,0.011632,0.0,-0.027896,0.0,0.003629,0.0,0.0,0.000456,0.0
3,1ADO@A@D128Q,0.0,0.323851,0.283377,0.173716,0.0,0.132244,0.0,0.0,0.0,...,0.0,0.013973,0.0,-0.013467,0.0,-0.027993,0.0,0.0,-0.024579,0.0
4,1ADO@A@D128V,0.0,0.323851,0.283377,0.173716,0.0,0.132244,0.0,0.0,0.0,...,0.0,0.020658,0.017309,0.011048,0.0,0.003115,0.0,0.0,0.027262,0.0


In [0]:
X_diff_rep = pd.concat([Mutation_Table[["MUTATION"]], X_diff_rep], axis=1)
tmp_columns = list(X_diff_rep.columns)
tmp_columns[0] = "Mutation"
X_diff_rep.columns = tmp_columns
X_diff_rep.head()

Unnamed: 0,Mutation,diff_rep_0,diff_rep_1,diff_rep_2,diff_rep_3,diff_rep_4,diff_rep_5,diff_rep_6,diff_rep_7,diff_rep_8,...,diff_rep_54,diff_rep_55,diff_rep_56,diff_rep_57,diff_rep_58,diff_rep_59,diff_rep_60,diff_rep_61,diff_rep_62,diff_rep_63
0,1ADO@A@D128A,0.0,-0.042875,-0.007631,0.001225,0.0,0.017882,0.0,0.0,0.0,...,0.0,0.015429,0.0,-0.017489,0.0,0.003768,0.0,0.0,0.017466,0.0
1,1ADO@A@D128G,0.0,-0.02715,-0.02158,-0.002276,0.0,0.001324,0.0,0.0,0.0,...,0.0,-0.001762,0.0,-0.036013,0.0,-0.023759,0.0,0.0,-0.030453,0.0
2,1ADO@A@D128N,0.0,-0.050535,-0.015909,-0.006499,0.0,0.012687,0.0,0.0,0.0,...,0.0,0.011632,0.0,-0.027896,0.0,0.003629,0.0,0.0,0.000456,0.0
3,1ADO@A@D128Q,0.0,0.010045,-0.009374,0.00973,0.0,-0.006414,0.0,0.0,0.0,...,0.0,0.013973,0.0,-0.013467,0.0,-0.027993,0.0,0.0,-0.024579,0.0
4,1ADO@A@D128V,0.0,-0.012137,0.011858,0.00843,0.0,0.025218,0.0,0.0,0.0,...,0.0,0.020658,0.017309,0.011048,0.0,0.003115,0.0,0.0,0.027262,0.0


**Save dataframes:**

In [0]:
X_all.to_csv(data_target_location+"sequence_cnn_rep_all_64_01.csv", index=False)

In [0]:
X_diff_rep.to_csv(data_target_location+"sequence_cnn_rep_diff_64_01.csv", index=False)