In [37]:
import numpy as np
import tensorflow as tf
import keras
from keras import Model, activations, layers, losses, optimizers, callbacks, regularizers
from keras.layers import Dense, Convolution1D, Dropout, BatchNormalization, concatenate, TimeDistributed, Layer, Input
import matplotlib.pyplot as plt
from ss_functions import *
from ss_pred_class_funcs import *
from ss_pred_classes import *

path = 'C:/Users/vinicius/Downloads/data/'

In [None]:
x_data1, y_train = get_data2(path+'training/','list.txt')
x_train1, x_train2 = np.array(x_data1[0]), np.array(x_data1[1])

In [None]:
def get_model():
    seq_inputs = Input((None, 21))
    pssm_inputs = Input((None, 20))
    X1 = DeepInception_block()(seq_inputs)
    X2 = DeepInception_block()(pssm_inputs)
    X = concatenate([X1,X2])
    #X = DeepInception_block()(X)
    X = Convolution1D(100, 11, activation='relu', padding='same', kernel_regularizer=regularizers.l2(0.001))(X)

    X = TimeDistributed(Dense(256, activation='relu'))(X)
    X = Dropout(0.4)(X)
    X = TimeDistributed(Dense(8, activation='relu'))(X)
    X = Dropout(0.4)(X)

    Y = TimeDistributed(Dense(3, activation='softmax'))(X)
    model = Model(inputs=[seq_inputs, pssm_inputs], outputs=Y)
    return model

In [None]:
model = get_model()

opt = optimizers.Adam(learning_rate=0.001)
model.compile(loss='categorical_crossentropy', # try siome: "categorical_focal_crossentropy, adam, sparse_categorical_crossentropy
              optimizer=opt,
              metrics=['accuracy', truncated_accuracy])
# model.summary()

In [None]:
stop_monitor_loss = callbacks.EarlyStopping(
    monitor='val_loss',
    min_delta=0,
    patience=4,
    mode='min',
    verbose=1,
    restore_best_weights=True
)

checkpoint = callbacks.ModelCheckpoint(
    './ss_pred_model0.keras',
    monitor='val_truncated_accuracy',
    save_best_only=True,
    mode='max',
    verbose=1
)

with tf.device('/GPU:1'):
    history = model.fit([x_train1, x_train2], y_train,
                        epochs=100,
                        batch_size=16,
                        validation_split=0.2,
                        callbacks=[stop_monitor_loss, checkpoint])
    
# 3rd block - negligeable improvement
# mask - did not work

In [38]:
test_model = keras.models.load_model('ss_pred_model0.keras', 
                                     custom_objects={
                                         'inception_conv': inception_conv,
                                         'InceptionNet_paper': InceptionNet_paper,
                                         'DeepInception_block': DeepInception_block,
                                         'truncated_accuracy': truncated_accuracy
                                     })

In [39]:
(x_test1,x_test2), y_test = get_data2(path+'blindTest/','list.txt',encode_y=False, padding=False,test=True)

In [42]:
len(x_test1)

328

In [40]:
test_model.evaluate((x_test1, x_test2),y_test)

KeyboardInterrupt: 

In [43]:
ss_map = {'C': 0, 'H': 1, 'E': 2}
from_aa = {0: 'C', 1: 'H', 2: 'E'}
predictions_hot = test_model.predict([x_test1[:200],x_test2[:200]])
predictions = []
for prediction in predictions_hot:
    dssp = ''
    for i in prediction:
        dssp += from_aa[np.argmax(i)]
    predictions.append(dssp)

total = 0
TP = 0
for prediction, truth in zip(predictions, y_test[:200]):
    for i, ss in enumerate(truth):
        total +=1
        if ss==prediction[i]:
            TP+=1

accuracy = TP/total
print(accuracy)

KeyboardInterrupt: 

In [None]:
block1 = DeepInception_block()
block2 = DeepInception_block()
block3 = DeepInception_block()

inputs = layers.Input((800,41))
X = block1(inputs)
X = block2(X)
X1 = Convolution1D(100, 11, activation='relu', padding='same', kernel_regularizer=regularizers.l2(0.001))(X)
X1 = TimeDistributed(Dense(256, activation='relu'))(X1)
X1 = Dropout(0.5)(X1)

Y2 = TimeDistributed(Dense(3, activation='softmax'))(X1)
X = block3(X)

# input1 = layers.Input((800,21))
# input2 = layers.Input((800,20))
# X1 = block1(input1)
# X2 = block2(input2)

# X = layers.concatenate([X1,X2])
# X = DeepInception_block()(X)

X = Convolution1D(100, 11, activation='relu', padding='same', kernel_regularizer=regularizers.l2(0.001))(X)
X = TimeDistributed(Dense(256, activation='relu'))(X)
X = Dropout(0.5)(X)

Y1 = TimeDistributed(Dense(3, activation='softmax'))(X)

model = Model(inputs=inputs, outputs=[Y1,Y2])

opt = optimizers.Adam(learning_rate=0.005)
model.compile(loss='categorical_crossentropy', # try siome: "categorical_focal_crossentropy, adam, sparse_categorical_crossentropy
              optimizer=opt,
              metrics=[truncated_accuracy])

stop_monitor_loss = callbacks.EarlyStopping(
    monitor='val_loss',
    min_delta=0,
    patience=8,
    mode='min',
    verbose=1,
    restore_best_weights=True
)

checkpoint = callbacks.ModelCheckpoint(
    './ss_pred_modeltest9.keras',
    monitor='val_truncated_accuracy',
    save_best_only=True,
    mode='max',
    verbose=1
)


input:
1. one hot encoded sequence
2. PSSM

Model:
1D convolutional neural network

output:
multiclass classification - dense layer with relu activaiton - 3?

validation metric - accuray + model specific measures

soruces:
https://www.csbj.org/article/S2001-0370(22)00506-2/fulltext
