In [None]:
import csv
import torch
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from pathlib import Path

from shared.plot import plot_geodesic, plot_hierarchy, plot_train_embed, get_dict_data
from shared.io import read_data, read_ref
from train import init_torch_objects, train

OUT_DIMENSIONS = 3 # 50
NEG_SAMPLES = 10 # 10
EPOCH = 501
DEVICE = "cuda:0" # or "cpu"
torch.set_default_dtype(torch.float64)

In [None]:
# Plot geodesic comparison between Poincaré and Euclidean
# plot_geodesic()
import torch
print(torch.cuda.is_available())

In [None]:
# Load edge data
data, weights, objects, neighbors, diff_summed, num_relations = read_data(Path("data","opehr_concepts_11454.csv"))

# load concept reference 
ref = read_ref(Path('data','ref.csv'))

# define fixed index clinical finding
clinical_finding_concept_id = 441840
fixed_index = objects.index(clinical_finding_concept_id)

# initialize torch objects for the training loop
model, optimizer, loss_func = init_torch_objects(objects, OUT_DIMENSIONS, fixed_index)

if "cuda:0" == DEVICE:
    model = model.to(DEVICE)

# ToDo: implement function to load embedding and continue training

# ensure that ref contains all concepts
dict_data = dict(enumerate(objects))
for key, value in dict_data.items():
    try:
        dict_data[key] = ref.loc[ref['concept_id'] == value].concept_name.values[0]
    except Exception as e:
        print(f"Error at Key={key}, Value={value}, Error={e}")


In [None]:
# plot_hierarchy(data, objects, ref, True)

In [None]:
train(data=data, weights=weights, objects=objects, neighbors=neighbors,
      diff_summed=diff_summed, num_relations=num_relations,
      model=model, optimizer=optimizer, loss_func=loss_func,
      out_dimensions=OUT_DIMENSIONS, n_neg_samples=NEG_SAMPLES, n_epochs=EPOCH,
      n_burn_in=10, device=DEVICE) 

In [None]:
dict_data = get_dict_data(objects, ref, dict_type="name")
model = torch.load("output/poincare_model_dim_3.pt")
coordinates = model["state_dict"]["embedding.weight"].numpy()
# print(model.state_dict()['embedding.weight'])
# coordinates = model.embedding.weight
print(coordinates)
#######################################################
# some experiment with 3d plotting in TF projector  
x_np = coordinates # .detach().numpy()
x_df = pd.DataFrame(x_np)
x_df.to_csv(Path('output','tf_proj_vec.tsv'), sep="\t", index=False, header=False)

df = pd.Series(dict_data)
df.to_string()
print(df)
df.to_csv(Path('output','tf_proj_lab.tsv'), sep="\t", index=False, header=False,
          quoting=csv.QUOTE_NONNUMERIC)
# df["index"].map(dictData)
###########################

# print(len(objects))
#print(data)

plt.figure()
plt.xlim(-1, 1)
plt.ylim(-1, 1)
plt.axis('off')

data, weights, objects, neighbors, diff_summed, num_relations = read_data("data/opehr_concepts.csv")

# add some jitter to better see labels
jitter = 0.02
jitter_x = np.random.uniform(low=-jitter, high = jitter, size=(coordinates.shape[0], ))
jitter_y = np.random.uniform(low=-jitter, high = jitter, size=(coordinates.shape[0], ))

for x in range(coordinates.shape[0]):
    plt.annotate(dict_data[x], (coordinates[x,0].detach().numpy()+jitter_x[x],
                               coordinates[x,1].detach().numpy()+jitter_y[x]), fontsize=4)
    # plt.annotate(dictData[x], (coordinates[x,0]*100, coordinates[x,1]*100),
    #              bbox={"fc":"white", "alpha":0.9}, fontsize=4)

# Plot edges of original hierarchy
for i in range(data.shape[0]):
    x_values = [coordinates[data[i][0], 0].detach().numpy(), coordinates[data[i][1], 0].detach().numpy()]
    y_values = [coordinates[data[i][0], 1].detach().numpy(), coordinates[data[i][1], 1].detach().numpy()]
#
#     x_val = [coordinates[data[x][0],0].detach().numpy(), coordinates[data[x][1],1].detach().numpy()]
#     y_val = [coordinates[data[x][0],0].detach().numpy(), coordinates[data[x][1],1].detach().numpy()]
    plt.plot(x_values, y_values, color="black", linewidth=0.2)

plt.savefig(Path("output", "hierarchy_embed.png"), dpi=300, facecolor="white")
plt.show()

In [1]:
from shared.io import write_tensorflow_projector_data

model_path = 'output/poincare_model_dim_3_epoch_500.pt'
ref_csv_path = 'data/ref.csv'

write_tensorflow_projector_data(model_path, ref_csv_path)

Available keys in the state dictionary:
embedding.weight
First 100 embedding weights:
Embedding 0: [0.4034566  0.85267115 0.33130244]
Embedding 1: [0.37091265 0.85063698 0.35481152]
Embedding 2: [0.35314391 0.91834867 0.17142104]
Embedding 3: [ 0.11019133 -0.91036501  0.39208295]
Embedding 4: [ 0.05754362 -0.88446279  0.43867873]
Embedding 5: [ 0.142665  -0.9042851  0.3831774]
Embedding 6: [ 0.10265174 -0.89466806  0.42437099]
Embedding 7: [-0.21033209 -0.68707265  0.69173662]
Embedding 8: [-0.53354435 -0.69008162  0.46336703]
Embedding 9: [-0.36319214 -0.92464404  0.01070366]
Embedding 10: [ 0.31208514 -0.73270328  0.59689635]
Embedding 11: [-0.34486266 -0.80030032  0.47782676]
Embedding 12: [-0.49515176 -0.84650664  0.13620161]
Embedding 13: [0.53186368 0.83128565 0.08520911]
Embedding 14: [0.59988687 0.33102292 0.7088905 ]
Embedding 15: [-0.0367966  -0.77213641  0.61060392]
Embedding 16: [ 0.58407405 -0.23120929  0.76146089]
Embedding 17: [-0.3012234  -0.70799683  0.62379625]
Embedd

In [None]:
import torch

# Load the tensor from the .pt file
file_path = "D:/git/omop-poincare/output/embedding.pt"
tensor = torch.load(file_path, map_location=torch.device("cpu"))

# Print the contents of the tensor
print(tensor)


In [None]:
import torch

# Check if CUDA is available
cuda_available = torch.cuda.is_available()

# Print the result
if cuda_available:
    print("CUDA is available.")
else:
    print("CUDA is not available.")



In [2]:
from shared.io import convert_embedding_for_plp

convert_embedding_for_plp("output/poincare_model_dim_10_epoch_500.pt", "output/embedding_501.pt")
