In [None]:
%load_ext autoreload
%autoreload 2
import logging
import pandas as pd
import numpy as np
from hopfield4py import Hopfield
from hopfield4py.hopfield_helper import *
import tensorflow as tf
from topicpy import gtex
import multiprocessing as mp
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
directory = "../cancers/breast/"

In [None]:
df = pd.read_csv(f'{directory}/topsbm/topsbm_level_0_topic-dist.csv', index_col=1).drop("i_doc", axis=1)
df=df.transpose().reset_index()
df.rename({"index":"id"}, axis=1, inplace=True)
#df.set_index("id", inplace=True)
df = df.set_index("id").transpose()
#df = df.subtract(df.mean(axis=0),1).abs().divide(df.std(axis=0),1) ## DNW
df = df.subtract(df.min(axis=0),1).abs().divide(df.max(axis=0)-df.min(axis=0),1) ## threshold should be 0.005
df.head(2)

In [None]:
def refactor_series(s: pd.Series):
    new = s.copy()
    q1, q2 = s.quantile(q=[0.45,0.55])
    new[new<q1] = -1
    new[new>q2] = 1
    new[(new >= q1) & (new <= q2)] = 0
    return new.astype(int)

#df = df.apply(refactor_series, axis=1)

In [None]:
df_files = pd.read_csv(f'{directory}/files.dat', index_col=0).reindex(index=df.index)
df["tissue"] = df_files["Subtype_Selected"]

In [None]:
threshold_f = lambda p: 1 if p>0.6 else -1 if p < 0.1 else 0
data_df=df.groupby("tissue").mean().applymap(threshold_f).astype(int)
df_threshold = df.transpose().drop("tissue", axis=0).applymap(threshold_f).astype(int)
data_tensor = tf.convert_to_tensor(data_df.values, dtype=tf.int8)
df_threshold_tensor = tf.convert_to_tensor(df_threshold.values.T, dtype=tf.int8)

In [None]:
model = Hopfield(data_tensor.shape[1])
model.load(data_tensor)
print(model)
model.train()

In [None]:
logging.getLogger("hopfield").setLevel("INFO")

In [None]:
@tf.function
def dilutedhamming(A: tf.Tensor, B: tf.Tensor)-> tf.Tensor:
    """
    Hamming distance of non zero elements (A.B)/lenght(A)

    A and be must have the same shape

    :param A: first tensor
    :param B: second tensor
    :return: Distance: 1. if all non-zero elements of A are in B, -1. if all non-zero elements of A are opposite in B
    """
    assert(A.shape==B.shape)
    return tf.divide(tf.cast(tf.tensordot(tf.cast(A, tf.float64), tf.cast(B, tf.float64), axes=1), tf.int64),tf.reduce_sum(tf.cast(A != tf.constant(0, A.dtype), tf.int64)))

    
@tf.function
def dilued_predict(sample, data_tensor, model):
        reconstructed = tf.cast(model.reconstruct(sample), tf.int64)
        return tf.argmax(tf.map_fn(lambda data: tf.cast(dilutedhamming(reconstructed, data), tf.float64), tf.cast(data_tensor,tf.float64), fn_output_signature=tf.float64, parallel_iterations=12), output_type=tf.int64)

@tf.function
def get_diluted_prediction(samples: tf.Tensor, data_tensor: tf.Tensor, model: Hopfield)-> tf.Tensor:
    """
    Get the nearest memory

    :param samples: samples to reconstruct
    :param data_tensor: tensor with memories
    :param model: model used to infer
    :return: tensor with list of argmin of the element of data_tensor nearest to each sample
    """
    return tf.map_fn(lambda sample: dilued_predict(sample, data_tensor, model), samples, fn_output_signature=tf.int64, parallel_iterations=12)


def get_predicted_diluted_labels(classes: list, samples: tf.Tensor, data_tensor: tf.Tensor, model:Hopfield)->list:
    """
    Get the classes predicted for each sample

    :param classes: list of classes names with shape (nclasses,)
    :param samples: samples to reconstruct with shape (nsamples, nspins)
    :param data_tensor: tensor with memories (nclasses, nspins)
    :return: tensor with list of classes
    """
    return list(map(lambda label_idx: classes[label_idx], get_diluted_prediction(samples, data_tensor, model).numpy()))

In [None]:
reals = list(map(lambda sample: get_real_label(df, sample), df.index))
#preds = get_predicted_labels(data_df.index, df_threshold_tensor, data_tensor, model)
preds = get_predicted_diluted_labels(data_df.index, df_threshold_tensor, data_tensor, model)

In [None]:
print("Acc ",accuracy_score(reals, preds))
try:
    print("AUC ",roc_auc_score(tf.one_hot(tf.unique(preds)[1],3),tf.one_hot(tf.unique(reals)[1],3), multi_class="ovr"))
except:
    pass

In [None]:
for r,p in zip(data_tensor[0][:10].numpy(),model.reconstruct(data_tensor[0])[:10].numpy()):
    print(r,p)

In [None]:
for mem in data_tensor:
    plt.hist(mem.numpy().ravel(), histtype="step", lw=4)
plt.show()

In [None]:
@tf.function
def get_distance_matrix(data_tensor, model, distance=hamming):
    return tf.map_fn(lambda A: tf.map_fn(lambda B: tf.reduce_min([tf.cast(distance(B,model.reconstruct(A)),tf.float64)]), tf.cast(data_tensor,tf.float64), parallel_iterations=6), tf.cast(data_tensor,tf.float64), parallel_iterations=6)

dist_matrix = get_distance_matrix(data_tensor, model, dilutedhamming)

In [None]:
sns.heatmap(dist_matrix, vmin=-1, vmax=1, xticklabels=data_df.index, yticklabels=data_df.index)

In [None]:
sample = df_threshold_tensor[np.random.randint(0, df_threshold_tensor.shape[0])]
sample = data_tensor[0]
reconstructed = model.reconstruct(sample)
[dilutedhamming(reconstructed, memory).numpy() for memory in data_tensor], dilued_predict(sample, data_tensor, model)

In [None]:
cm = sns.clustermap(confusion_matrix(reals, preds, normalize="true"),
                    vmin = 0,
                    vmax=1,  
                    row_cluster=False, 
                    col_cluster=False, 
                    xticklabels=data_df.index, 
                     yticklabels=data_df.index,
                    annot=True,
                    annot_kws={"fontsize":15})
ax = cm.ax_heatmap
fig = ax.get_figure()
ax.set_ylabel("real", fontsize=35, rotation=90)
ax.set_yticklabels(labels=data_df.index, rotation=0)
ax.yaxis.tick_left()
ax.yaxis.set_label_position("left")

ax.set_xticklabels(labels=data_df.index, rotation=90)
ax.set_xlabel("predicted",fontsize=35)
ax.tick_params(labelsize=35)

cax = cm.ax_cbar
cax.tick_params(labelsize=30)
cax.set_title("P()", fontsize=30)
plt.tight_layout()
#cm.savefig(f"predict_{label}.pdf")

plt.show()

In [None]:
for i,data in enumerate(data_tensor):
    print(data_df.index[i], data_df.index[predict(data, data_tensor, model).numpy()])