In [None]:
import pandas as pd
import seaborn as sn
import numpy as np
import tensorflow as tf
import joblib
from sklearn.manifold import TSNE
import plotly.express as px


In [None]:
HDM = [60,50,50]
NCRD = [70,50,60]


In [None]:
EPOCHS = 20
CNN_DIM = HDM[0]
CNNS_DIM = HMD[1]
GRU_DIM = HDM[2]

In [None]:
ENCODER =joblib.load("../data/NCRD/ncrd95-uniprot-ma_clf_enc.joblib")
TRAIN = "../data/NCRD/TrainNcrd95-ma.csv"
TEST = "../data/NCRD/TestNcrd95-ma.csv"
MODELDIR = "../models/Fase5-NCRD"


In [None]:
with tf.device("cpu"):
    train = pd.read_csv(TRAIN)
    train = train[["Class","Sequence"]]
    test = pd.read_csv(TEST)
    sequences = pd.concat([train,test], axis = 0).Sequence.apply(lambda x: " ".join(x))


In [None]:
MAX_LEN = max(pd.concat([train[train.Class != "NonR"],test[test.Class != "NonR"]], axis = 0).Sequence.apply(lambda x: len(x)))
MAX_LEN

In [None]:
len(ENCODER.categories_[0])

In [None]:
test["Sequence"] = test.Sequence.apply(lambda x: " ".join(x))
X_test = tf.convert_to_tensor(test.Sequence.to_list())
y_test = test.Class.to_numpy().reshape(-1,1)

In [None]:
grur = tf.keras.models.Sequential()        
grur.add(tf.keras.layers.TextVectorization(max_tokens = 21 + 2, output_mode = "int", standardize = None, output_sequence_length = MAX_LEN))
grur.add(tf.keras.layers.Embedding(input_dim = 21 + 2, output_dim = GRU_DIM, input_length = MAX_LEN, mask_zero = True))
grur.add(tf.keras.layers.GRU(512,return_sequences=True))
grur.add(tf.keras.layers.GRU(512,return_sequences=True))
grur.add(tf.keras.layers.GlobalAveragePooling1D())
grur.layers[0].adapt(sequences)

In [None]:
cnnr = tf.keras.models.Sequential()        
cnnr.add(tf.keras.layers.TextVectorization(max_tokens = 21 + 2, output_mode = "int", standardize = None, output_sequence_length = MAX_LEN))
cnnr.add(tf.keras.layers.Embedding(input_dim = 21 + 2, output_dim = CNN_DIM, input_length = MAX_LEN, mask_zero = True))
cnnr.add(tf.keras.layers.Conv1D(filters = 512,   kernel_size = 5, activation = "relu"))
cnnr.add(tf.keras.layers.Conv1D(filters = 512/2, kernel_size = 5, activation = "relu"))
cnnr.add(tf.keras.layers.Conv1D(filters = 512/4, kernel_size = 5, activation = "relu"))
cnnr.add(tf.keras.layers.Conv1D(filters = 512/8, kernel_size = 5, activation = "relu"))
cnnr.add(tf.keras.layers.GlobalAveragePooling1D())
cnnr.layers[0].adapt(sequences)

In [None]:
cnnsr = tf.keras.models.Sequential()        
cnnsr.add(tf.keras.layers.TextVectorization(max_tokens = 21 + 2, output_mode = "int", standardize = None, output_sequence_length = MAX_LEN))
cnnsr.add(tf.keras.layers.Embedding(input_dim = 21 + 2, output_dim = GRU_DIM, input_length = MAX_LEN, mask_zero = True))
cnnsr.add(tf.keras.layers.Conv1D(filters = 512,   kernel_size = 5, activation = "relu"))
cnnsr.add(tf.keras.layers.Conv1D(filters = 512, kernel_size = 5, activation = "relu"))
cnnsr.add(tf.keras.layers.Conv1D(filters = 512, kernel_size = 5, activation = "relu"))
cnnsr.add(tf.keras.layers.Conv1D(filters = 512, kernel_size = 5, activation = "relu"))
cnnsr.add(tf.keras.layers.GlobalAveragePooling1D())
cnnsr.layers[0].adapt(sequences)

In [None]:
def model_apply(model,x):
    model.pop()
    maxpolling = model.predict(x)
    return maxpolling

In [None]:
tf.keras.backend.clear_session()
cnn = tf.keras.models.load_model(f"{MODELDIR}/cnn")
cnn_emb_fitted = model_apply(cnn,X_test)

tf.keras.backend.clear_session()
cnn_emb_random = cnnr.predict(X_test)

In [None]:
tf.keras.backend.clear_session()
cnns = tf.keras.models.load_model(f"{MODELDIR}/cnn-same")
cnns_emb_fitted = model_apply(cnns,X_test)

tf.keras.backend.clear_session()
cnns_emb_random = cnnsr.predict(X_test)

In [None]:
tf.keras.backend.clear_session()
gru = tf.keras.models.load_model(f"{MODELDIR}/gru")
gru_emb_fitted = model_apply(gru,X_test)

tf.keras.backend.clear_session()
gru_emb_random = grur.predict(X_test)

In [None]:
test = test.replace({"macrolide-lincosamide-streptogramin":"MLS","NonR":"NRP"})

In [None]:
cnn_fit_tsne  =  TSNE(n_components=2, learning_rate="auto", n_iter=1000, perplexity=30).fit_transform(cnn_emb_fitted)
cnn_rand_tsne =  TSNE(n_components=2, learning_rate="auto", n_iter=1000, perplexity=30).fit_transform(cnn_emb_random)

In [None]:
category_orders = np.array(['MLS', 'aminoglycoside', 'beta-lactam', 'chloramphenicol','glycopeptide', 'macrolide', 'multidrug', 'phosphonic acid','rifamycin', 'tetracycline',"NRP"])

In [None]:
fig = px.scatter(
    cnn_fit_tsne, x=0, y=1,
    color=test.Class, 
    category_orders = {"color": category_orders},
    labels={'color': 'Class'},
    title = "CNN HMD",
    color_discrete_sequence=px.colors.qualitative.Pastel
)
fig.update_layout(legend_traceorder="normal")
fig.update_traces(marker_size=7)
fig.show()
fig.write_image(f"{MODELDIR}/cnn-tsne-classes.svg")

In [None]:
fig = px.scatter(
    cnn_rand_tsne, x=0, y=1,
    color=test.Class, 
    category_orders = {"color": category_orders},
    labels={'color': 'Class'},
    title = "CNN HMD",
    color_discrete_sequence=px.colors.qualitative.Pastel
)
fig.update_layout(legend_traceorder="normal")
fig.update_traces(marker_size=7)
fig.show()
fig.write_image(f"{MODELDIR}/cnn-random-tsne-classes.svg")

SAME

In [None]:
cnns_fit_tsne  = TSNE(n_components=2, learning_rate="auto", n_iter=1000, perplexity=30).fit_transform(cnns_emb_fitted)
cnns_rand_tsne = TSNE(n_components=2, learning_rate="auto", n_iter=1000, perplexity=30).fit_transform(cnns_emb_random)

In [None]:
fig = px.scatter(
    cnns_fit_tsne, x=0, y=1,
    color=test.Class, 
    category_orders = {"color": category_orders},
    labels={'color': 'Class'},
    title = "CNN-same HMD",
    color_discrete_sequence=px.colors.qualitative.Pastel
)
fig.update_layout(legend_traceorder="normal")
fig.update_traces(marker_size=7)
fig.show()
fig.write_image(f"{MODELDIR}/cnn-same-tsne-classes.svg")

In [None]:
fig = px.scatter(
    cnns_rand_tsne, x=0, y=1,
    color=test.Class, 
    category_orders = {"color": category_orders},
    labels={'color': 'Class'},
    title = "CNN-same HMD",
    color_discrete_sequence=px.colors.qualitative.Pastel
)
fig.update_layout(legend_traceorder="normal")
fig.update_traces(marker_size=7)
fig.show()
fig.write_image(f"{MODELDIR}/cnn-same-random-tsne-classes.svg")

GRU

In [None]:
gru_fit_tsne  =  TSNE(n_components=2, learning_rate="auto", n_iter=1000, perplexity=30).fit_transform(gru_emb_fitted)
gru_rand_tsne =  TSNE(n_components=2, learning_rate="auto", n_iter=1000, perplexity=30).fit_transform(gru_emb_random)

In [None]:
fig = px.scatter(
    gru_fit_tsne, x=0, y=1,
    color=test.Class, 
    category_orders = {"color": category_orders},
    labels={'color': 'Class'},
    title = "GRU HMD",
    color_discrete_sequence=px.colors.qualitative.Pastel
)
fig.update_layout(legend_traceorder="normal")
fig.update_traces(marker_size=7)
fig.show()
fig.write_image(f"{MODELDIR}/gru-tsne-classes.svg")

In [None]:
fig = px.scatter(
    gru_rand_tsne, x=0, y=1,
    color=test.Class, 
    category_orders = {"color": category_orders},
    labels={'color': 'Class'},
    title = "GRU HMD",
    color_discrete_sequence=px.colors.qualitative.Pastel
)
fig.update_layout(legend_traceorder="normal")
fig.update_traces(marker_size=7)
fig.show()
fig.write_image(f"{MODELDIR}/gru-random-tsne-classes.svg")