In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import joblib
import seaborn as sn


In [None]:
MODELDIR = "../models"

In [None]:
ENCODER_N = joblib.load("../data/HMD/uniprot-hmd-ma_clf_enc.joblib")
ENCODER_N.categories_

In [None]:
ENCODER_P = joblib.load("../data/HMD/hmd-ma_clf_enc.joblib")
ENCODER_P.categories_

In [None]:
def model_eval_binary(model,data):
    yhat = model.predict(data)
    return np.array(yhat)


def model_eval_n(model,data):
    yhat = model.predict(data)
    yhat_prob =[np.round(x[np.argmax(x)],3)  for x in yhat]
    yhat_oh = tf.convert_to_tensor([tf.one_hot(np.argmax(x),depth = len(ENCODER_N.categories_[0])) for x in yhat],dtype=tf.float32)
    classes = ENCODER_N.inverse_transform(yhat_oh).squeeze()
    return np.array(classes), np.array(yhat_prob)


def model_eval_p(model,data):
    yhat = model.predict(data)
    yhat_prob =[np.round(x[np.argmax(x)],3)  for x in yhat]
    yhat_oh = tf.convert_to_tensor([tf.one_hot(np.argmax(x),len(ENCODER_P.categories_[0])) for x in yhat],dtype=tf.float32)
    classes = ENCODER_P.inverse_transform(yhat_oh).squeeze()
    return np.array(classes), np.array(yhat_prob)

In [None]:
pseudo_prot = pd.read_csv("../data/pseudo/pseudo-HMD-l50.csv")
pseudo_prot.head()

In [None]:
pseudo_prot["Sequence"] = pseudo_prot.Sequence.apply(lambda x: " ".join(x))
X = tf.convert_to_tensor(pseudo_prot.Sequence.to_list())

In [None]:
tf.keras.backend.clear_session()
cnn_ARPs = tf.keras.models.load_model(f"{MODELDIR}/Fase4-HMD/cnn")
cnn_aligned = tf.keras.models.load_model(f"{MODELDIR}/Fase4-HMD-align/cnn")
cnn_unaligned = tf.keras.models.load_model(f"{MODELDIR}/Fase4-HMD-unalign/cnn")
cnn_unaligned50 = tf.keras.models.load_model(f"{MODELDIR}/Fase4-HMD-unalign50/cnn")
cnn_unaligned100 = tf.keras.models.load_model(f"{MODELDIR}/Fase4-HMD-unalign100/cnn")

In [None]:
yhat = model_eval_p(cnn_ARPs,X)
yhat_align = model_eval_n(cnn_aligned, X)
yhat_unalign = model_eval_n(cnn_unaligned, X)
yhat_unalign50 = model_eval_n(cnn_unaligned50,X)
yhat_unalign100 = model_eval_n(cnn_unaligned100,X)

In [None]:
pseudo_prot["CNN-Only-ARPs-class"] = yhat[0]
pseudo_prot["CNN-Only-ARPs-prob"] = yhat[1]
pseudo_prot["CNN-Only-ARPs-class"] = pseudo_prot["CNN-Only-ARPs-class"].where(pseudo_prot["CNN-Only-ARPs-prob"] > 0.5, "NonR").to_list()

pseudo_prot["CNN-Aligned-class"] = yhat_align[0]
pseudo_prot["CNN-Aligned-prob"] = yhat_align[1]

pseudo_prot["CNN-Unaligned-class"] = yhat_unalign[0]
pseudo_prot["CNN-Unaligned-prob"] = yhat_unalign[1]

pseudo_prot["CNN-Unaligned50-class"] = yhat_unalign50[0]
pseudo_prot["CNN-Unaligned50-prob"] = yhat_unalign50[1]

pseudo_prot["CNN-Unaligned100-class"] = yhat_unalign100[0]
pseudo_prot["CNN-Unaligned100-prob"] = yhat_unalign100[1]

---

In [None]:
tf.keras.backend.clear_session()
cnns_ARPs = tf.keras.models.load_model(f"{MODELDIR}/Fase4-HMD/cnn-same")
cnns_aligned = tf.keras.models.load_model(f"{MODELDIR}/Fase4-HMD-align/cnn-same")
cnns_unaligned = tf.keras.models.load_model(f"{MODELDIR}/Fase4-HMD-unalign/cnn-same")
cnns_unaligned50 = tf.keras.models.load_model(f"{MODELDIR}/Fase4-HMD-unalign50/cnn-same")
cnns_unaligned100 = tf.keras.models.load_model(f"{MODELDIR}/Fase4-HMD-unalign100/cnn-same")

In [None]:
yhat = model_eval_p(cnns_ARPs,X)
yhat_align = model_eval_n(cnns_aligned, X)
yhat_unalign = model_eval_n(cnns_unaligned, X)
yhat_unalign50 = model_eval_n(cnns_unaligned50,X)
yhat_unalign100 = model_eval_n(cnns_unaligned100,X)

In [None]:
pseudo_prot["CNNS-Only-ARPs-class"] = yhat[0]
pseudo_prot["CNNS-Only-ARPs-prob"] = yhat[1]
pseudo_prot["CNNS-Only-ARPs-class"] = pseudo_prot["CNNS-Only-ARPs-class"].where(pseudo_prot["CNNS-Only-ARPs-prob"] > 0.5, "NonR").to_list()

pseudo_prot["CNNS-Aligned-class"] = yhat_align[0]
pseudo_prot["CNNS-Aligned-prob"] = yhat_align[1]

pseudo_prot["CNNS-Unaligned-class"] = yhat_unalign[0]
pseudo_prot["CNNS-Unaligned-prob"] = yhat_unalign[1]

pseudo_prot["CNNS-Unaligned50-class"] = yhat_unalign50[0]
pseudo_prot["CNNS-Unaligned50-prob"] = yhat_unalign50[1]

pseudo_prot["CNNS-Unaligned100-class"] = yhat_unalign100[0]
pseudo_prot["CNNS-Unaligned100-prob"] = yhat_unalign100[1]

---

In [None]:
tf.keras.backend.clear_session()
gru_ARPs = tf.keras.models.load_model(f"{MODELDIR}/Fase4-HMD/gru")
gru_aligned = tf.keras.models.load_model(f"{MODELDIR}/Fase4-HMD-align/gru")
gru_unaligned = tf.keras.models.load_model(f"{MODELDIR}/Fase4-HMD-unalign/gru")
gru_unaligned50 = tf.keras.models.load_model(f"{MODELDIR}/Fase4-HMD-unalign50/gru")
gru_unaligned100 = tf.keras.models.load_model(f"{MODELDIR}/Fase4-HMD-unalign100/gru")

In [None]:
yhat = model_eval_p(gru_ARPs,X)
yhat_align = model_eval_n(gru_aligned, X)
yhat_unalign = model_eval_n(gru_unaligned, X)
yhat_unalign50 = model_eval_n(gru_unaligned50,X)
yhat_unalign100 = model_eval_n(gru_unaligned100,X)

In [None]:
pseudo_prot["GRU-Only-ARPs-class"] = yhat[0]
pseudo_prot["GRU-Only-ARPs-prob"] = yhat[1]
pseudo_prot["GRU-Only-ARPs-class"] = pseudo_prot["GRU-Only-ARPs-class"].where(pseudo_prot["GRU-Only-ARPs-prob"] > 0.5, "NonR").to_list()


pseudo_prot["GRU-Aligned-class"] = yhat_align[0]
pseudo_prot["GRU-Aligned-prob"] = yhat_align[1]

pseudo_prot["GRU-Unaligned-class"] = yhat_unalign[0]
pseudo_prot["GRU-Unaligned-prob"] = yhat_unalign[1]

pseudo_prot["GRU-Unaligned50-class"] = yhat_unalign50[0]
pseudo_prot["GRU-Unaligned50-prob"] = yhat_unalign50[1]

pseudo_prot["GRU-Unaligned100-class"] = yhat_unalign100[0]
pseudo_prot["GRU-Unaligned100-prob"] = yhat_unalign100[1]

In [None]:
pseudo_prot_long = pseudo_prot.iloc[:,pseudo_prot.columns.str.contains("class")].melt(value_name = "index", var_name = "columns")
pseudo_prot_long = pd.crosstab(index = pseudo_prot_long["index"], columns=pseudo_prot_long["columns"])
pseudo_prot_long.columns.name = None
pseudo_prot_long.index.name = None
pseudo_prot_long.rename(index = {"macrolide-lincosamide-streptogramin":"MLS"}, inplace=True)
pseudo_prot_long = pseudo_prot_long.T.sort_index().T
pseudo_prot_long.columns = pseudo_prot_long.columns.str.rstrip("-class")

In [None]:
pseudo_prot_long

In [None]:
sn.set_context("talk")
sn.clustermap(pseudo_prot_long.T, col_cluster=False, cmap = "coolwarm")
plt.savefig("../figs/hmd-pseudo-clustermap.svg", format = "SVG")

---