In [1]:
import numpy as np
import tensorflow as tf
import keras
from keras import Model, optimizers, callbacks, regularizers
from keras.layers import Dense, Convolution1D, Dropout, TimeDistributed, Input
from ss_functions import *
from ss_pred_class_funcs import *
from ss_pred_classes import *

path = 'C:/Users/vinicius/Downloads/data/'

In [2]:
x_train, y_train = get_data(path+'training/','list.txt')
x_train,y_train = np.array(x_train), np.array(y_train)

In [None]:
stop_monitor_loss = callbacks.EarlyStopping(
    monitor='val_loss',
    min_delta=0,
    patience=4,
    mode='min',
    verbose=1,
    restore_best_weights=True
)

checkpoint = callbacks.ModelCheckpoint(
    './ss_pred_modelstuff.keras',
    monitor='val_truncated_accuracy',
    save_best_only=True,
    mode='max',
    verbose=1
)

def get_model():
    inputs = Input((None,41))
    X = DeepInception_block()(inputs)
    X = DeepInception_block()(X)
    X = Convolution1D(100, 19, activation='relu', padding='same', kernel_regularizer=regularizers.l2(0.001))(X)
    X = TimeDistributed(Dense(256, activation='relu'))(X)
    X = Dropout(0.4)(X)
    X = TimeDistributed(Dense(128, activation='relu'))(X)
    X = Dropout(0.4)(X)

    Y = TimeDistributed(Dense(3, activation='softmax'))(X)
    model = Model(inputs=inputs, outputs=Y)
    return model

In [None]:
model = get_model()

opt = optimizers.Adam(learning_rate=0.001)
model.compile(loss='categorical_crossentropy', # try siome: "categorical_focal_crossentropy, adam, sparse_categorical_crossentropy
              optimizer=opt,
              metrics=['accuracy', truncated_accuracy])
# model.summary()

In [None]:
with tf.device('/GPU:1'):
    history = model.fit(x_train, y_train,
                        epochs=100,
                        batch_size=32,
                        validation_split=0.1,
                        callbacks=[stop_monitor_loss, checkpoint])
    
# 3rd block - negligeable improvement
# mask - did not work

In [3]:
test_model = keras.models.load_model('ss_pred_model.keras', 
                                     custom_objects={
                                         'inception_conv': inception_conv,
                                         'InceptionNet_paper': InceptionNet_paper,
                                         'DeepInception_block': DeepInception_block,
                                         'truncated_accuracy': truncated_accuracy
                                     })
# test_model.summary()

In [4]:
x_test, y_test = get_data(path+'blindTest/','list.txt',encode_y=False, padding_x=True, padding_y=False, test=True)


In [7]:
ss_map = {'C': 0, 'H': 1, 'E': 2}
from_aa = {0: 'C', 1: 'H', 2: 'E'}
predictions_hot = test_model.predict(np.array(x_test))
predictions = []
for prediction in predictions_hot:
    dssp = ''
    for i in prediction:
        dssp += from_aa[np.argmax(i)]
    predictions.append(dssp)

total = 0
TP = 0
for prediction, truth in zip(predictions, y_test[:200]):
    for i, ss in enumerate(truth):
        total +=1
        if ss==prediction[i]:
            TP+=1

accuracy = TP/total
print(accuracy)

0.7670230875625067


input:
1. one hot encoded sequence
2. PSSM

Model:
1D convolutional neural network

output:
multiclass classification - dense layer with relu activaiton - 3?

validation metric - accuray + model specific measures

soruces:
https://www.csbj.org/article/S2001-0370(22)00506-2/fulltext
