In [None]:
%load_ext autoreload
%autoreload 2
import logging
import pandas as pd
import numpy as np
from hopfield import Hopfield
from hopfield_helper import *
import tensorflow as tf
from topicpy import gtex
import multiprocessing as mp
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
directory = "../topics/datasets/gtex10/"

In [None]:
df = pd.read_csv(f'{directory}/topsbm/topsbm_level_0_topic-dist.csv', index_col=1).drop("i_doc", axis=1)
df=df.transpose().reset_index()
df.rename({"index":"id"}, axis=1, inplace=True)
#df.set_index("id", inplace=True)
df = df.set_index("id").transpose()
#df = df.subtract(df.mean(axis=0),1).abs().divide(df.std(axis=0),1) ## DNW
df = df.subtract(df.min(axis=0),1).abs().divide(df.max(axis=0)-df.min(axis=0),1) ## threshold should be 0.005
df.head(2)

In [None]:
df_files = pd.read_csv(f'{directory}/files.dat', index_col=0).reindex(index=df.index)
df["tissue"] = df_files["SMTS"]

In [None]:
threshold_f = lambda p: p>0.005
data_df=df.groupby("tissue").mean().applymap(threshold_f).astype(int)
df_threshold = df.transpose().drop("tissue", axis=0).applymap(threshold_f).astype(int)
data_tensor = tf.convert_to_tensor(data_df.values*2-1, dtype=tf.int8)
df_threshold_tensor = tf.convert_to_tensor(df_threshold.values.T, dtype=tf.int8)

In [None]:
model = Hopfield(data_df.shape[1])
model.load(data_tensor)
print(model)
model.train()

In [None]:
logging.getLogger("hopfield").setLevel("INFO")

In [None]:
reals = list(map(lambda sample: get_real_label(df, sample), df.index))
preds = get_predicted_labels(data_df.index, df_threshold_tensor, data_tensor, model)

In [None]:
print("Acc ",accuracy_score(reals, preds))
try:
    print("AUC ",roc_auc_score(tf.one_hot(tf.unique(preds)[1],3),tf.one_hot(tf.unique(reals)[1],3), multi_class="ovr"))
except:
    pass

In [None]:
@tf.function
def get_distance_matrix(data_tensor, model):
    return tf.map_fn(lambda A: tf.map_fn(lambda B: tf.reduce_min([tf.cast(hamming(model.reconstruct(A),B),tf.float64),tf.cast(hamming(-model.reconstruct(A),B),tf.float64)]), tf.cast(data_tensor,tf.float64), parallel_iterations=6), tf.cast(data_tensor,tf.float64), parallel_iterations=6)

dist_matrix = get_distance_matrix(data_tensor, model)

In [None]:
sns.heatmap(dist_matrix, vmin=0, vmax=1, xticklabels=data_df.index, yticklabels=data_df.index)

In [None]:
cm = sns.clustermap(confusion_matrix(reals, preds, normalize="true"),
                    vmin = 0,
                    vmax=1,  
                    row_cluster=False, 
                    col_cluster=False, 
                    xticklabels=data_df.index, 
                     yticklabels=data_df.index,
                    annot=True,
                    annot_kws={"fontsize":15})
ax = cm.ax_heatmap
fig = ax.get_figure()
ax.set_ylabel("real", fontsize=35, rotation=90)
ax.set_yticklabels(labels=data_df.index, rotation=0)
ax.yaxis.tick_left()
ax.yaxis.set_label_position("left")

ax.set_xticklabels(labels=data_df.index, rotation=90)
ax.set_xlabel("predicted",fontsize=35)
ax.tick_params(labelsize=35)

cax = cm.ax_cbar
cax.tick_params(labelsize=30)
cax.set_title("P()", fontsize=30)
plt.tight_layout()
#cm.savefig(f"predict_{label}.pdf")

plt.show()

In [None]:
for i,data in enumerate(data_tensor):
    print(data_df.index[i], data_df.index[predict(data, data_tensor, model).numpy()])