In [1]:
import csv
import torch
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from pathlib import Path

from shared.plot import plot_geodesic, plot_hierarchy, plot_train_embed, get_dict_data
from shared.io import read_data, read_ref
from train import init_torch_objects, train

OUT_DIMENSIONS = 10 # 50
NEG_SAMPLES = 10 # 10
EPOCH = 500
DEVICE = "cuda:0" # or "cpu"
torch.set_default_dtype(torch.float64)

In [1]:
# Plot geodesic comparison between Poincaré and Euclidean
# plot_geodesic()
import torch
print(torch.cuda.is_available())

True


In [None]:
# Load edge data
data, weights, objects, neighbors, diff_summed, num_relations = read_data(Path("data","opehr_concepts_11454.csv"))

# load concept reference 
ref = read_ref(Path('data','ref.csv'))

# define fixed index clinical finding
clinical_finding_concept_id = 441840
fixed_index = objects.index(clinical_finding_concept_id)

# initialize torch objects for the training loop
model, optimizer, loss_func = init_torch_objects(objects, OUT_DIMENSIONS, fixed_index)

if "cuda:0" == DEVICE:
    model = model.to(DEVICE)

# ToDo: implement function to load embedding and continue training

# ensure that ref contains all concepts
dict_data = dict(enumerate(objects))
for key, value in dict_data.items():
    try:
        dict_data[key] = ref.loc[ref['concept_id'] == value].concept_name.values[0]
    except Exception as e:
        print(f"Error at Key={key}, Value={value}, Error={e}")


In [None]:
# plot_hierarchy(data, objects, ref, True)

In [None]:
train(data=data, weights=weights, objects=objects, neighbors=neighbors,
      diff_summed=diff_summed, num_relations=num_relations,
      model=model, optimizer=optimizer, loss_func=loss_func,
      out_dimensions=OUT_DIMENSIONS, n_neg_samples=NEG_SAMPLES, n_epochs=EPOCH,
      n_burn_in=10, device=DEVICE)

In [None]:
dict_data = get_dict_data(objects, ref, dict_type="name")
model = torch.load("output/poincare_model_dim_3.pt")
coordinates = model["state_dict"]["embedding.weight"].numpy()
# print(model.state_dict()['embedding.weight'])
# coordinates = model.embedding.weight
print(coordinates)
#######################################################
# some experiment with 3d plotting in TF projector  
x_np = coordinates # .detach().numpy()
x_df = pd.DataFrame(x_np)
x_df.to_csv(Path('output','tf_proj_vec.tsv'), sep="\t", index=False, header=False)

df = pd.Series(dict_data)
df.to_string()
print(df)
df.to_csv(Path('output','tf_proj_lab.tsv'), sep="\t", index=False, header=False,
          quoting=csv.QUOTE_NONNUMERIC)
# df["index"].map(dictData)
###########################

# print(len(objects))
#print(data)

plt.figure()
plt.xlim(-1, 1)
plt.ylim(-1, 1)
plt.axis('off')

data, weights, objects, neighbors, diff_summed, num_relations = read_data("data/opehr_concepts.csv")

# add some jitter to better see labels
jitter = 0.02
jitter_x = np.random.uniform(low=-jitter, high = jitter, size=(coordinates.shape[0], ))
jitter_y = np.random.uniform(low=-jitter, high = jitter, size=(coordinates.shape[0], ))

for x in range(coordinates.shape[0]):
    plt.annotate(dict_data[x], (coordinates[x,0].detach().numpy()+jitter_x[x],
                               coordinates[x,1].detach().numpy()+jitter_y[x]), fontsize=4)
    # plt.annotate(dictData[x], (coordinates[x,0]*100, coordinates[x,1]*100),
    #              bbox={"fc":"white", "alpha":0.9}, fontsize=4)

# Plot edges of original hierarchy
for i in range(data.shape[0]):
    x_values = [coordinates[data[i][0], 0].detach().numpy(), coordinates[data[i][1], 0].detach().numpy()]
    y_values = [coordinates[data[i][0], 1].detach().numpy(), coordinates[data[i][1], 1].detach().numpy()]
#
#     x_val = [coordinates[data[x][0],0].detach().numpy(), coordinates[data[x][1],1].detach().numpy()]
#     y_val = [coordinates[data[x][0],0].detach().numpy(), coordinates[data[x][1],1].detach().numpy()]
    plt.plot(x_values, y_values, color="black", linewidth=0.2)

plt.savefig(Path("output", "hierarchy_embed.png"), dpi=300, facecolor="white")
plt.show()

In [None]:
from shared.io import write_tensorflow_projector_data

model_path = 'output/poincare_model_dim_10_epoch_100.pt'
ref_csv_path = 'data/ref.csv'

write_tensorflow_projector_data(model_path, ref_csv_path)

In [None]:
import torch

# Load the tensor from the .pt file
file_path = "D:/git/omop-poincare/output/embedding.pt"
tensor = torch.load(file_path, map_location=torch.device("cpu"))

# Print the contents of the tensor
print(tensor)


In [7]:
import torch

# Check if CUDA is available
cuda_available = torch.cuda.is_available()

# Print the result
if cuda_available:
    print("CUDA is available.")
else:
    print("CUDA is not available.")



CUDA is not available.


In [1]:
from shared.io import convert_embedding_for_plp

convert_embedding_for_plp("output/poincare_model_dim_10_epoch_250.pt", "output/embedding.pt")


In [4]:
R = torch.load("output/custom_embeddings.pt")
print(R)
P = torch.load("output/embedding.pt", map_location=torch.device("cpu"))
print(P)

  R = torch.load("output/custom_embeddings.pt")


{'concept_ids': tensor([ 4285898,   372328,  4283893,   260139, 40481087,  4218389,  4156265,
           30753,  4112343,  4116491,    28060,    81893,   257012,   378001,
         4001336,  4132546,  4155034,   195588,  4029498,  4152936,  4113008,
         4296204,  4310024,  4280726,   375671,  4296205,  4278672,  4237458,
           81151,   198199, 40486433,  4149245,  4294548,    78272,   134438,
          317576,  4056621,   198809,   196456,  4134304,    80809,  4048171,
         4109685,  4059173,   258780,  4035415,   439777,   381316,   321042,
          261325,  4084167,  4142905]), 'embeddings': tensor([[-9.4811e-01, -2.9709e-01,  1.5680e+00],
        [-2.4519e-01,  2.7120e-01, -1.3439e+00],
        [ 2.9383e-03,  2.2134e-01, -6.7727e-01],
        [ 1.6553e+00,  1.9824e+00, -8.4939e-01],
        [-6.8818e-01, -1.0944e+00, -7.8868e-01],
        [-5.6375e-01,  2.5923e-02, -8.4200e-02],
        [-1.4175e+00, -8.9604e-01, -1.1830e+00],
        [-6.1173e-01,  4.4438e-01, -8.405

  P = torch.load("output/embedding.pt", map_location=torch.device("cpu"))


{'concept_ids': tensor([ 432545, 4193873,  604687,  ...,  317305, 4327861, 4080696]), 'embeddings': tensor([[-0.3742,  0.3554,  0.3598,  ...,  0.2733,  0.1115,  0.4497],
        [-0.3859,  0.3650,  0.3749,  ...,  0.2799,  0.1137,  0.4647],
        [ 0.1416,  0.5443,  0.0320,  ...,  0.3132, -0.3221, -0.0980],
        ...,
        [ 0.2021,  0.1109, -0.4492,  ..., -0.3919, -0.1375, -0.2421],
        [-0.2474, -0.0322, -0.0453,  ...,  0.0594,  0.0922,  0.0137],
        [ 0.0052,  0.5007,  0.1800,  ...,  0.3878, -0.0180, -0.2906]],
       dtype=torch.float32)}
