In [16]:
# boilerplate imports

import numpy as np
from pathlib import Path

from hroov.utils.retrievers import (
    TFIDFRetriever,
    BM25Retriever,
    SBERTRetriever,
    HiTRetriever,
    OnTRetriever
)
from hroov.utils.math_functools import (
    batch_cosine_similarity,
    batch_poincare_dist_with_adaptive_curv_k,
    entity_subsumption,
    concept_subsumption
)

embeddings_dir = '../embeddings'

In [None]:
# pre-computed embeddings

sbert_plm_embs = np.load(f"{embeddings_dir}/sbert-plm-embeddings.npy", mmap_mode="r")
hit_snomed_25_embs = np.load(f"{embeddings_dir}/hit-snomed-25-embeddings.npy", mmap_mode="r") # HiT FULL
ont_snomed_96_embs = np.load(f"{embeddings_dir}/ont-snomed-96-embeddings.npy", mmap_mode="r") # SNOMED FULL
ont_minified_128_embs = np.load(f"{embeddings_dir}/ont-snomed-minified-128-embeddings.npy", mmap_mode="r")

In [17]:
# dirs & fps

embeddings_dir = "../embeddings"
common_map = Path(f"{embeddings_dir}/entity_mappings.json")
common_verbalisations = Path(f"{embeddings_dir}/verbalisations.json")

In [18]:
# retrieval model: OnT-96 (OnT FULL)

ont_snomed_96_model_path = Path('../models/snomed_models/OnT-96')

ont_snomed_96_w_hyp_dist = OnTRetriever(
    embeddings_fp=Path(f"{embeddings_dir}/ont-snomed-96-embeddings.npy"),
    meta_map_fp=common_map,
    verbalisations_fp=common_verbalisations,
    model_fp=ont_snomed_96_model_path,
    score_fn=batch_poincare_dist_with_adaptive_curv_k
)

ont_snomed_96_w_con_sub = OnTRetriever(
    embeddings_fp=Path(f"{embeddings_dir}/ont-snomed-96-embeddings.npy"),
    meta_map_fp=common_map,
    verbalisations_fp=common_verbalisations,
    model_fp=ont_snomed_96_model_path,
    score_fn=concept_subsumption
)

In [19]:
# retrieval model: OnT Miniature (M-B128)

ont_snomed_minified_128_model_fp = Path('../models/snomed_models/OnTr-m-128')

ont_ret_snomed_minified_128_w_hyp_dist = OnTRetriever(
    embeddings_fp=Path(f"{embeddings_dir}/ont-snomed-minified-128-embeddings.npy"),
    meta_map_fp=common_map,
    verbalisations_fp=common_verbalisations,
    model_fp=ont_snomed_minified_128_model_fp,
    score_fn=batch_poincare_dist_with_adaptive_curv_k
)

ont_ret_snomed_minified_128_w_con_sub = OnTRetriever(
    embeddings_fp=Path(f"{embeddings_dir}/ont-snomed-minified-128-embeddings.npy"),
    meta_map_fp=common_map,
    verbalisations_fp=common_verbalisations,
    model_fp=ont_snomed_minified_128_model_fp,
    score_fn=concept_subsumption
)



In [20]:
# Retrieval Model: HiT (FULL-HN)

# Hierarchy Transformer-based Retriever (HiT Trained on Hard Negatives -> Ongoing Work/Future Work <-)

hit_hard_negatives = Path('../models/snomed_models/HiT_mixed_hard_negatives')

hit_ret_hard_w_hyp_dist = HiTRetriever(
  embeddings_fp=Path(f"{embeddings_dir}/hit-snomed-hard-embeddings.npy"),
  meta_map_fp=common_map,
  verbalisations_fp=common_verbalisations,
  model_fp=hit_hard_negatives,
  score_fn=batch_poincare_dist_with_adaptive_curv_k
)

hit_ret_hard_w_ent_sub = HiTRetriever(
  embeddings_fp=Path(f"{embeddings_dir}/hit-snomed-hard-embeddings.npy"),
  meta_map_fp=common_map,
  verbalisations_fp=common_verbalisations,
  model_fp=hit_hard_negatives,
  score_fn=entity_subsumption
)

In [21]:
_model = hit_ret_hard_w_ent_sub._model
MAX_K = 10 or len(hit_ret_hard_w_ent_sub._verbalisations)
centri_w = 0.1
query_string = "fracture of foot"

hit_ret_hard_w_ent_sub.retrieve("fracture of foot", top_k=MAX_K, reverse_candidate_scores=True, model=_model, weight=centri_w)

[(0,
  'http://snomed.info/id/15574005',
  np.float32(-9.34402e-06),
  'fracture of foot'),
 (1,
  'http://snomed.info/id/21351003',
  np.float32(-8.486809),
  'fracture of phalanx of foot'),
 (2,
  'http://snomed.info/id/125605004',
  np.float32(-9.047159),
  'fracture of bone'),
 (3, 'http://snomed.info/id/72704001', np.float32(-9.085127), 'fracture'),
 (4,
  'http://snomed.info/id/239279000',
  np.float32(-9.126209),
  'fixation of fracture'),
 (5,
  'http://snomed.info/id/46866001',
  np.float32(-9.21486),
  'fracture of lower limb'),
 (6,
  'http://snomed.info/id/263563003',
  np.float32(-9.230102),
  'fracture configuration'),
 (7,
  'http://snomed.info/id/281956006',
  np.float32(-9.238402),
  'fracture configurations'),
 (8,
  'http://snomed.info/id/239278008',
  np.float32(-9.244928),
  'operation on fracture'),
 (9,
  'http://snomed.info/id/263250005',
  np.float32(-9.407008),
  'fracture of bone of forefoot')]

*Note that: building an Automated Test Suite using these types of examples would be a great idea.*

# Queries (#1)

$T \equiv Threshold$

* Q1: Fracture of Foot $\rightarrow$ Fracture of Lower limb $<$ T
* Q2: Fracture of Toe $\rightarrow$ Fracture of Lower limb $<$ T
  * &&       Fracture of Toe $\rightarrow$ Fracture of Lower limb $>$ Q1
* Q3: Fracture of Foot $\rightarrow$ Fracture of Toe $>$ T

In [11]:
term_one = "fracture of foot"
term_two = "fracture of lower limb"

emb_one = _model.encode(term_one)
emb_two = _model.encode(term_two)

entity_subsumption(emb_one, emb_two, model=_model, weight=0.1)

array(-9.21486, dtype=float32)

In [13]:
term_one = "fracture of toe"
term_two = "fracture of lower limb"

emb_one = _model.encode(term_one)
emb_two = _model.encode(term_two)

entity_subsumption(emb_one, emb_two, model=_model, weight=0.1)

array(-10.337907, dtype=float32)

In [15]:
term_one = "fracture of foot"
term_two = "tracture of toe"

emb_one = _model.encode(term_one)
emb_two = _model.encode(term_two)

e1_sqsubseteq_e2_score = entity_subsumption(emb_one, emb_two, model=_model, weight=0.1)

e1_sqsubseteq_e2_score

array(-23.766647, dtype=float32)

# Queries (#2)

$T \equiv Threshold$

* Q1: Hand $\rightarrow$ Upper limb $<$ T
* Q2: Finger $\rightarrow$ Upper limb $<$ T
  * &&     Finger $\rightarrow$ Upper limb $>$ Q1
* Q3: Finger $\rightarrow$ Hand $<$ T      
  * &&      Finger $\rightarrow$ Hand $<$ Q2


In [22]:
term_one = "hand"
term_two = "upper limb"

emb_one = _model.encode(term_one)
emb_two = _model.encode(term_two)

entity_subsumption(emb_one, emb_two, model=_model, weight=0.1)

array(-11.5987015, dtype=float32)

In [23]:
term_one = "finger"
term_two = "upper limb"

emb_one = _model.encode(term_one)
emb_two = _model.encode(term_two)

entity_subsumption(emb_one, emb_two, model=_model, weight=0.1)

array(-15.316193, dtype=float32)

In [24]:
term_one = "finger"
term_two = "hand"

emb_one = _model.encode(term_one)
emb_two = _model.encode(term_two)

entity_subsumption(emb_one, emb_two, model=_model, weight=0.1)

array(-12.160582, dtype=float32)

# Queries (#3)

```
Fracture of Hand -> Fracture of Upper limb : -3.9750
Fracture of Finger -> Fracture of Upper limb : -4.8810
Fracture of Finger -> Fracture of Hand : -2.9938
```

$T \equiv Threshold$

* Q1: Fracture of Hand $\rightarrow$ Fracture of Upper limb $<$ T
* Q2: Fracture of Finger $\rightarrow$ Fracture of Upper limb $<$ T
  * &&     Fracture of Finger $\rightarrow$ Fracture of Upper limb $>$ Q1
* Q3: Fracture of Finger $\rightarrow$ Fracture of Hand $<$ T      
  * &&      Fracture of Finger $\rightarrow$ Fracture of Hand $<$ Q2

In [25]:
term_one = "fracture of hand"
term_two = "fracture of upper limb"

emb_one = _model.encode(term_one)
emb_two = _model.encode(term_two)

entity_subsumption(emb_one, emb_two, model=_model, weight=0.1)

array(-9.311131, dtype=float32)

In [26]:
term_one = "fracture of finger"
term_two = "fracture of upper limb"

emb_one = _model.encode(term_one)
emb_two = _model.encode(term_two)

entity_subsumption(emb_one, emb_two, model=_model, weight=0.1)

array(-11.035132, dtype=float32)

In [27]:
term_one = "fracture of finger"
term_two = "fracture of hand"

emb_one = _model.encode(term_one)
emb_two = _model.encode(term_two)

entity_subsumption(emb_one, emb_two, model=_model, weight=0.1)

array(-9.584875, dtype=float32)

In [None]:
# Foot -> Lower limb : -5.4734
# Toe -> Lower limb : -6.0397
# Toe -> Foot : -4.0015

# Queries (#4)

$T \equiv Threshold$

* Q1: Foot $\rightarrow$ Lower limb $<$ T
* Q2: Toe $\rightarrow$ Lower limb $<$ T
  * &&     Toe $\rightarrow$ Lower limb $>$ Q1
* Q3: Toe $\rightarrow$ Foot $<$ T      
  * &&       Toe $\rightarrow$ Foot $<$ Q2

In [28]:
term_one = "foot"
term_two = "lower limb"

emb_one = _model.encode(term_one)
emb_two = _model.encode(term_two)

entity_subsumption(emb_one, emb_two, model=_model, weight=0.1)

array(-12.477948, dtype=float32)

In [29]:
term_one = "toe"
term_two = "lower limb"

emb_one = _model.encode(term_one)
emb_two = _model.encode(term_two)

entity_subsumption(emb_one, emb_two, model=_model, weight=0.1)

array(-14.258653, dtype=float32)

In [30]:
term_one = "toe"
term_two = "foot"

emb_one = _model.encode(term_one)
emb_two = _model.encode(term_two)

entity_subsumption(emb_one, emb_two, model=_model, weight=0.1)

array(-12.266147, dtype=float32)

# Ontology Transformer Models

* Fracture of finger disorder -> Disorder that finding site is finger and morphlogy is fracture
* Fracture of finger disorder -> Fracture of hand disorder
* Fracture of finger disorder -> Disorder that finding site is hand and morphlogy is fracture
* Fracture of hand disorder -> Disorder that finding site is finger and morphlogy is fracture
* Disorder that finding site is finger and morphlogy is fracture -> Disorder that finding site is hand and morphlogy is fracture
* Fracture of hand disorder -> Disorder that finding site is hand and morphlogy is fracture

In [35]:
_ont_model = ont_snomed_96_w_con_sub._model
_mini_model = ont_ret_snomed_minified_128_w_con_sub._model

# OnT Query # 1

In [45]:
concept_expr_one = "fracture of finger disorder"
concept_expr_two = "disorder that finding site is finger and morphlogy is fracture"

emb_one = _ont_model.encode_concept(concept_expr_one)
emb_two = _ont_model.encode_concept(concept_expr_two)

concept_subsumption(emb_one, emb_two, model=_ont_model, weight=0.37)

array(-8.5588, dtype=float32)

In [None]:
concept_expr_one = "fracture of finger disorder"
concept_expr_two = "disorder that finding site is finger and morphlogy is fracture"

emb_one = _mini_model.encode_concept(concept_expr_one)
emb_two = _mini_model.encode_concept(concept_expr_two)

concept_subsumption(emb_one, emb_two, model=_mini_model, weight=0.1)

array(-7.320663, dtype=float32)

# HiT/OnT Query #2

In [48]:
concept_expr_one = "fracture of finger disorder"
concept_expr_two = "fracture of hand disorder"

emb_one = _ont_model.encode_concept(concept_expr_one)
emb_two = _ont_model.encode_concept(concept_expr_two)

concept_subsumption(emb_one, emb_two, model=_ont_model, weight=0.37)

array(-7.624261, dtype=float32)

In [49]:
concept_expr_one = "fracture of finger disorder"
concept_expr_two = "fracture of hand disorder"

emb_one = _mini_model.encode_concept(concept_expr_one)
emb_two = _mini_model.encode_concept(concept_expr_two)

concept_subsumption(emb_one, emb_two, model=_mini_model, weight=0.1)

array(-7.4878893, dtype=float32)

# HiT/OnT Query #3

In [50]:
concept_expr_one = "fracture of finger disorder"
concept_expr_two = "disorder that finding site is hand and morphlogy is fracture"

emb_one = _ont_model.encode_concept(concept_expr_one)
emb_two = _ont_model.encode_concept(concept_expr_two)

concept_subsumption(emb_one, emb_two, model=_ont_model, weight=0.37)

array(-11.179902, dtype=float32)

In [51]:
concept_expr_one = "fracture of finger disorder"
concept_expr_two = "disorder that finding site is hand and morphlogy is fracture"

emb_one = _mini_model.encode_concept(concept_expr_one)
emb_two = _mini_model.encode_concept(concept_expr_two)

concept_subsumption(emb_one, emb_two, model=_mini_model, weight=0.1)

array(-10.2517605, dtype=float32)

# HiT/OnT Query #4

In [52]:
concept_expr_one = "fracture of hand disorder"
concept_expr_two = "disorder that finding site is finger and morphlogy is fracture"

emb_one = _ont_model.encode_concept(concept_expr_one)
emb_two = _ont_model.encode_concept(concept_expr_two)

concept_subsumption(emb_one, emb_two, model=_ont_model, weight=0.37)

array(-11.472399, dtype=float32)

In [54]:
concept_expr_one = "fracture of hand disorder"
concept_expr_two = "disorder that finding site is finger and morphlogy is fracture"

emb_one = _mini_model.encode_concept(concept_expr_one)
emb_two = _mini_model.encode_concept(concept_expr_two)

concept_subsumption(emb_one, emb_two, model=_mini_model, weight=0.1)

array(-10.367128, dtype=float32)

# OnT Query #5

In [55]:
concept_expr_one = "disorder that finding site is finger and morphlogy is fracture"
concept_expr_two = "disorder that finding site is hand and morphlogy is fracture"

emb_one = _ont_model.encode_concept(concept_expr_one)
emb_two = _ont_model.encode_concept(concept_expr_two)

concept_subsumption(emb_one, emb_two, model=_ont_model, weight=0.37)

array(-7.121807, dtype=float32)

In [56]:
concept_expr_one = "disorder that finding site is finger and morphlogy is fracture"
concept_expr_two = "disorder that finding site is hand and morphlogy is fracture"

emb_one = _mini_model.encode_concept(concept_expr_one)
emb_two = _mini_model.encode_concept(concept_expr_two)

concept_subsumption(emb_one, emb_two, model=_mini_model, weight=0.1)

array(-7.0295777, dtype=float32)

# OnT Query #6

In [57]:
concept_expr_one = "fracture of hand disorder"
concept_expr_two = "disorder that finding site is hand and morphlogy is fracture"

emb_one = _ont_model.encode_concept(concept_expr_one)
emb_two = _ont_model.encode_concept(concept_expr_two)

concept_subsumption(emb_one, emb_two, model=_ont_model, weight=0.37)

array(-8.830052, dtype=float32)

In [58]:
concept_expr_one = "fracture of hand disorder"
concept_expr_two = "disorder that finding site is hand and morphlogy is fracture"

emb_one = _mini_model.encode_concept(concept_expr_one)
emb_two = _mini_model.encode_concept(concept_expr_two)

concept_subsumption(emb_one, emb_two, model=_mini_model, weight=0.1)

array(-7.8999004, dtype=float32)