In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from topicpy import gtex
import multiprocessing as mp

In [None]:
df = pd.read_csv('../datasets/gtex/10/lda/lda_level_3_topic-dist.csv', index_col=1).drop("i_doc", axis=1)
df=df.transpose().reset_index()
df.rename({"index":"id"}, axis=1, inplace=True)
#df.set_index("id", inplace=True)
df = df.set_index("id").transpose()
df = df.subtract(df.mean(axis=0),1).abs().divide(df.std(axis=0),1) 
df.head(2)

In [None]:
def get_tissue(sample):
    return gtex.get_gtex_tissue(sample, samples=pd.read_csv('../datasets/gtex/10/files.dat', index_col=0))["SMTS"]

pool = mp.Pool(2)
work = pool.map_async(get_tissue, df.index)

pool.close()

pool.join()
df["tissue"] = work.get()
df.head()

In [None]:
import importlib, hopfield
importlib.reload(hopfield)
from hopfield import Hopfield

In [None]:
threshold_f = lambda p: p>0.4
data_df=df.groupby("tissue").mean().applymap(threshold_f).astype(int)
data_tensor = tf.convert_to_tensor(data_df.values*2-1, dtype=tf.int8)

In [None]:
model = Hopfield(data_df.shape[1])
model.load(data_tensor)
print(model)
model.train()

In [None]:
@tf.function
def hamming(A, B):
    assert(A.shape==B.shape)
    return tf.divide(tf.cast(tf.reduce_sum(tf.abs(tf.subtract(tf.cast(A, dtype=tf.int64),tf.cast(B, dtype=tf.int64)))), tf.float64), 2*tf.cast(A.shape[0], tf.float64))

def process_sample(sample):
    return df_threshold.loc[:,sample].values*2-1

def predict(sample, model):
        reconstructed = model.reconstruct(sample)
        return data_df.index[tf.argmin([hamming(reconstructed, data) for data in data_tensor])]

In [None]:
import logging
logging.getLogger("hopfield").setLevel("INFO")

In [None]:
df_threshold = df.transpose().drop("tissue", axis=0).applymap(threshold_f).astype(int)
reals = []
preds = []
for sample in df.index:
    pred = predict(process_sample(sample), model)
    real = df.loc[sample,"tissue"]
    if type(real)!=str:
        real=real.values[0]
    reals.append(real)
    preds.append(pred)

In [None]:
from sklearn.metrics import accuracy_score, roc_auc_score

In [None]:
print("Acc ",accuracy_score(reals, preds))
try:
    print("AUC ",roc_auc_score(tf.one_hot(tf.unique(preds)[1],3),tf.one_hot(tf.unique(reals)[1],3), multi_class="ovr"))
except:
    pass

In [None]:
threshold_f = lambda p: p>0.3
data_df=df.groupby("tissue").mean().applymap(threshold_f).astype(int)
data_tensor = tf.convert_to_tensor(data_df.values*2-1, dtype=tf.int8)
dist_matrix = tf.map_fn(lambda A: tf.map_fn(lambda B: tf.cast(hamming(A,B),tf.float64), tf.cast(data_tensor,tf.float64)), tf.cast(data_tensor,tf.float64)).numpy()

In [None]:
import seaborn as sns
sns.heatmap(dist_matrix, vmin=0, vmax=1)

In [None]:
for i,data in enumerate(data_tensor):
    print(predict(data, model), data_df.index[i])

In [None]:
for i, (p, r) in enumerate(zip(preds,reals)):
    print(p,r)
    if i >10:
        break