In [1]:
import numpy as np
import tensorflow as tf
import keras
from keras import Model, layers, activations, losses

In [2]:
all_aa = "ARNDCEQGHILKMFPSTWYVX"
aa_onehot_dict = dict()
for i, aa in enumerate(all_aa):
    aa_onehot_dict[aa] = i
    
path = "C:/Users/vinicius/Downloads/data/training/"

def aa_onehot_encoding(seq):
    profile = []
    for aa in seq:
        encoded = np.zeros(21)
        encoded[aa_onehot_dict[aa]] = 1
        profile.append(encoded)
    while len(profile) != 800: # pad to 800
        profile.append(np.zeros(21))
    return profile

def parse_dssp(dssp_file):
    with open(path+"dssp/"+dssp_file+".dssp", 'r') as file:
        file.readline()
        ss = file.readline().rstrip()
    return ss

def parse_pssm(pssm_filename):
    profile = []
    seq = ''
    with open(path+"pssm/"+pssm_filename+".pssm", 'r') as pssm:
        pssm_lines = pssm.readlines()
        for line in pssm_lines[3:-6]:
            line = line.rstrip().split()
            seq += line[1]
            profile_line = []
            for n in line[22:-2]:
                profile_line.append(float(n)/100)
            profile.append(profile_line)
    while (len(profile) != 800):
        profile.append(np.zeros(20))
    return profile, seq


def parse_fasta(file):
    pass

ss_map = {'C': 0, 'H': 1, 'E': 2}

def ss_onehot_encoding(ss_sequence):
    ss_encoded = []
    for struc in ss_sequence:
        encoding = np.zeros(3)
        encoding[ss_map[struc]] = 1
        ss_encoded.append(encoding)
    while (len(ss_encoded) != 800):
        ss_encoded.append(np.zeros(3))
    return ss_encoded

def get_data(file, encode_y=True): 
    x = []
    y = []
    with open(path+file, 'r') as sample_file: # add some stuff to check?
        for line in sample_file:
            line = line.rstrip()
            pssm, sequence = parse_pssm(line)
            sequence_hot = aa_onehot_encoding(sequence)
            features = np.concatenate((sequence_hot, pssm), axis=1)
            x.append(features)

            dssp = parse_dssp(line).replace('-','C')
            if encode_y:
                dssp = ss_onehot_encoding(dssp)
            
            y.append(dssp)
    return np.array(x), np.array(y)

In [3]:
x_data, y_data = get_data('list.txt')
x_train, y_train = x_data[:1101], y_data[:1101]
fff, y_data = get_data('list.txt', encode_y=False)
x_test, y_test = x_data[1100:], y_data[1100:]

In [5]:
class InceptionNet_naive(layers.Layer):
    def __init__(self, num_features=2):
        super().__init__()
        self.k = num_features

    def call(self, inputs, num_layers=3, layer_size=8):
        X1 = layers.Conv2D(layer_size, kernel_size=(1), strides=1, padding='same')(inputs)
        X2 = layers.Conv2D(layer_size, kernel_size=(3), strides=1, padding='same')(inputs)
        X3 = layers.Conv2D(layer_size, kernel_size=(5), strides=1, padding='same')(X)
        X = layers.concatenate((X1, X2, X3))

        # X_layers = []
        # for i in range(layers):
        #     fs = i*2 + 1
        #     X_layers.append(layers.conv1D(layer_size, filter=(1,fs)))
        # X = layers.concatenate(X_layers)
        return activations.relu(X)


In [6]:
class InceptionNet_naive(layers.Layer):
    def __init__(self, num_features=41, num_layers=7):
        super().__init__()
        self.k = num_features
        self.conv_Xs = []
        self.conv1 = layers.Conv1D(self.k, kernel_size=1, strides=1, padding='same')
        self.conv2 = layers.Conv1D(self.k, kernel_size=3, strides=1, padding='same')    
        self.conv3 = layers.Conv1D(self.k, kernel_size=5, strides=1, padding='same')
        self.conv4 = layers.Conv1D(self.k, kernel_size=7, strides=1, padding='same') 
        self.conv5 = layers.Conv1D(self.k, kernel_size=9, strides=1, padding='same')
        self.conv6 = layers.Conv1D(self.k, kernel_size=11, strides=1, padding='same') 
        self.conv7 = layers.Conv1D(self.k, kernel_size=13, strides=1, padding='same') 
        self.conv_layers = [self.conv1, self.conv2, self.conv3, self.conv4, self.conv5, self.conv6, self.conv7]
        
    def call(self, inputs):
        X1 = self.conv1(inputs)
        X2 = self.conv2(inputs)
        X3 = self.conv3(inputs)
        X4 = self.conv4(inputs)
        X5 = self.conv5(inputs)
        X6 = self.conv6(inputs) 
        X7 = self.conv7(inputs)
        X = layers.concatenate((X1,X2,X3,X4,X5,X6,X7))
        
        return layers.Activation('relu')(X)
    

In [None]:
class InceptionNet_pool(layers.Layer):
    def __init__(self, num_features=41, num_layers=24):
        super().__init__()
        
        
        
        
    def call(self, inputs):
        
        X = layers.concatenate(Xs)
        return layers.Activation('relu')(X)

In [31]:
num_labels = 3
num_positions = 800

inputs = layers.Input((800, 41))
X = inputs
# X = layers.Masking(mask_value=0)(X)
for i in range(3):
    X = InceptionNet_naive_chat()(X)
Y = layers.Dense(3, activation='softmax')(X)
# Y = layers.Reshape((num_positions,num_labels))(Y)

loss_fn = losses.CategoricalCrossentropy()

model = Model(inputs=inputs, outputs=Y)
model.compile(loss='categorical_crossentropy', # try siome: "categorical_focal_crossentropy, adam, sparse_categorical_crossentropy
              optimizer="sgd",
              metrics=['accuracy'])

In [14]:
class inception_conv(layers.Layer):
    def __init__(self, kernel_s, num_features=100):
        super().__init__()
        self.conv = layers.Conv1D(num_features, kernel_size=kernel_s, strides=1, padding='same')
        self.b_norm = layers.BatchNormalization(epsilon=0.001)
    
    def call(self, inputs):
        X = self.conv(inputs)
        X = self.b_norm(X)
        X = layers.Activation('relu')(X)
        X = layers.Dropout(0.4)(X)
        return X


class InceptionNet_paper(layers.Layer):
    def __init__(self):
        super().__init__()
        self.conv1_1 = inception_conv(1)
        self.conv1_2 = inception_conv(1)
        self.conv1_3 = inception_conv(1)
        self.conv3_1 = inception_conv(3)
        self.conv3_2 = inception_conv(3)
        self.conv3_3 = inception_conv(3)
        self.conv3_4 = inception_conv(3)

    def call(self, inputs):
        X1 = self.conv1_1(inputs)
        X2 = self.conv3_1(self.conv1_2(inputs))
        X3 = self.conv3_4(self.conv3_3(self.conv3_2(self.conv1_3(inputs))))
    
        X = layers.concatenate((X1,X2,X3))
        return X # activation?

class DeepInception_block(layers.Layer):
    def __init__(self):
        super().__init__()
        self.inception1 = InceptionNet_paper()
        self.inception2_1 = InceptionNet_paper()
        self.inception2_2 = InceptionNet_paper()
        self.inception3_1 = InceptionNet_paper()
        self.inception3_2 = InceptionNet_paper()
        self.inception3_3 = InceptionNet_paper()
        self.inception3_4 = InceptionNet_paper()

    def call(self, inputs):
        X1 = self.inception1(inputs)
        X2 = self.inception2_2(self.inception2_1(inputs))
        X3 = self.inception3_4(self.inception3_3(self.inception3_2(self.inception3_1(inputs))))
        X = layers.concatenate((X1,X2,X3))
        return X

num_labels = 3
num_positions = 800

inputs = layers.Input((800, 41))
X = inputs
for i in range(1):
    X = DeepInception_block()(X)
X = inception_conv(11)(X)
X = layers.Dense(100, activation='relu')(X)
Y = layers.Dense(3, activation='softmax')(X)


model = Model(inputs=inputs, outputs=Y)
model.compile(loss='categorical_crossentropy', # try siome: "categorical_focal_crossentropy, adam, sparse_categorical_crossentropy
              optimizer="sgd",
              metrics=['accuracy'])
model.summary()

Model: "model_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_6 (InputLayer)        [(None, 800, 41)]         0         
                                                                 
 deep_inception_block_6 (Dee  (None, 800, 900)         1261400   
 pInception_block)                                               
                                                                 
 inception_conv_369 (incepti  (None, 800, 100)         990500    
 on_conv)                                                        
                                                                 
 dense_10 (Dense)            (None, 800, 100)          10100     
                                                                 
 dense_11 (Dense)            (None, 800, 3)            303       
                                                                 
Total params: 2,262,303
Trainable params: 2,252,303
Non-tra

In [15]:

history = model.fit(x_train, y_train,
                    epochs=10,
                    batch_size=128,
                    validation_split=0.1)

Epoch 1/10


ResourceExhaustedError: Graph execution error:

Detected at node 'model_5/deep_inception_block_6/inception_net_paper_50/inception_conv_355/dropout_35/dropout/Mul' defined at (most recent call last):
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\runpy.py", line 197, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\ipykernel_launcher.py", line 18, in <module>
      app.launch_new_instance()
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
      app.start()
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\ipykernel\kernelapp.py", line 739, in start
      self.io_loop.start()
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\tornado\platform\asyncio.py", line 205, in start
      self.asyncio_loop.run_forever()
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\asyncio\base_events.py", line 601, in run_forever
      self._run_once()
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\asyncio\base_events.py", line 1905, in _run_once
      handle._run()
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\ipykernel\kernelbase.py", line 545, in dispatch_queue
      await self.process_one()
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\ipykernel\kernelbase.py", line 534, in process_one
      await dispatch(*args)
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\ipykernel\kernelbase.py", line 437, in dispatch_shell
      await result
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\ipykernel\ipkernel.py", line 359, in execute_request
      await super().execute_request(stream, ident, parent)
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\ipykernel\kernelbase.py", line 778, in execute_request
      reply_content = await reply_content
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\ipykernel\ipkernel.py", line 446, in do_execute
      res = shell.run_cell(
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\ipykernel\zmqshell.py", line 549, in run_cell
      return super().run_cell(*args, **kwargs)
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\IPython\core\interactiveshell.py", line 3048, in run_cell
      result = self._run_cell(
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\IPython\core\interactiveshell.py", line 3103, in _run_cell
      result = runner(coro)
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\IPython\core\interactiveshell.py", line 3308, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\IPython\core\interactiveshell.py", line 3490, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\IPython\core\interactiveshell.py", line 3550, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\vinicius\AppData\Local\Temp\ipykernel_2556\1359194401.py", line 1, in <module>
      history = model.fit(x_train, y_train,
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\keras\engine\training.py", line 1564, in fit
      tmp_logs = self.train_function(iterator)
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\keras\engine\training.py", line 1160, in train_function
      return step_function(self, iterator)
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\keras\engine\training.py", line 1146, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\keras\engine\training.py", line 1135, in run_step
      outputs = model.train_step(data)
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\keras\engine\training.py", line 993, in train_step
      y_pred = self(x, training=True)
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\keras\engine\training.py", line 557, in __call__
      return super().__call__(*args, **kwargs)
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\keras\engine\functional.py", line 510, in call
      return self._run_internal_graph(inputs, training=training, mask=mask)
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\keras\engine\functional.py", line 667, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\vinicius\AppData\Local\Temp\ipykernel_2556\255167699.py", line 48, in call
      X3 = self.inception3_4(self.inception3_3(self.inception3_2(self.inception3_1(inputs))))
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\vinicius\AppData\Local\Temp\ipykernel_2556\1249676325.py", line 27, in call
      X1 = self.conv1_1(inputs)
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\vinicius\AppData\Local\Temp\ipykernel_2556\3636770591.py", line 11, in call
      X = layers.Dropout(0.4)(X)
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\keras\layers\regularization\dropout.py", line 116, in call
      output = control_flow_util.smart_cond(
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\keras\utils\control_flow_util.py", line 108, in smart_cond
      return tf.__internal__.smart_cond.smart_cond(
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\keras\layers\regularization\dropout.py", line 112, in dropped_inputs
      return self._random_generator.dropout(
    File "c:\Users\vinicius\anaconda3\envs\tf\lib\site-packages\keras\backend.py", line 2162, in dropout
      return tf.nn.dropout(
Node: 'model_5/deep_inception_block_6/inception_net_paper_50/inception_conv_355/dropout_35/dropout/Mul'
failed to allocate memory
	 [[{{node model_5/deep_inception_block_6/inception_net_paper_50/inception_conv_355/dropout_35/dropout/Mul}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_train_function_116767]

In [39]:
ss_map = {'C': 0, 'H': 1, 'E': 2}
from_aa = {0: 'C', 1: 'H', 2: 'E'}
predictions_hot = model.predict(x_test)
predictions = []
for prediction in predictions_hot:
    dssp = ''
    for i in prediction:
        dssp += from_aa[np.argmax(i)]
    predictions.append(dssp)

total = 0
TP = 0
for prediction, truth in zip(predictions, y_test):
    for i, ss in enumerate(truth):
        total +=1
        if ss==prediction[i]:
            TP+=1

accuracy = TP/total
print(accuracy)

0.322124833997344


input:
1. one hot encoded sequence
2. PSSM

Model:
1D convolutional neural network

output:
multiclass classification - dense layer with relu activaiton - 3?

validation metric - accuray + model specific measures

soruces:
https://www.csbj.org/article/S2001-0370(22)00506-2/fulltext
