In [1]:
import pandas as pd

# Load anatomical terms

In [2]:
coords = pd.read_csv("../brain/coordinates.csv", index_col=0)

In [9]:
to_remove = ["left_", "right_", "_crus", "_lobules", "_vermis"]

In [10]:
terms = set()
for term in coords.columns:
    for part in to_remove:
        term = term.replace(part, "").replace("cerebellar", "cerebellum")
    terms.add(term)
terms = sorted(list(terms))
len(terms)

57

In [11]:
with open("../lexicon/lexicon_harvard-oxford.txt", "w+") as outfile:
    for term in terms:
        print(term)
        outfile.write(term + "\n")

accumbens
amygdala
angular_gyrus
brainstem
caudate
central_opercular_cortex
cerebellum
cingulate_gyrus_anterior_division
cingulate_gyrus_posterior_division
cuneal_cortex
frontal_medial_cortex
frontal_operculum_cortex
frontal_orbital_cortex
frontal_pole
heschls_gyrus
hippocampus
inferior_frontal_gyrus_pars_opercularis
inferior_frontal_gyrus_pars_triangularis
inferior_temporal_gyrus_anterior_division
inferior_temporal_gyrus_posterior_division
inferior_temporal_gyrus_temporooccipital_part
insular_cortex
intracalcarine_cortex
lateral_occipital_cortex_inferior_division
lateral_occipital_cortex_superior_division
lingual_gyrus
middle_frontal_gyrus
middle_temporal_gyrus_anterior_division
middle_temporal_gyrus_posterior_division
middle_temporal_gyrus_temporooccipital_part
occipital_fusiform_gyrus
occipital_pole
pallidum
paracingulate_gyrus
parahippocampal_gyrus_anterior_division
parahippocampal_gyrus_posterior_division
parietal_operculum_cortex
planum_polare
planum_temporale
postcentral_gyrus
p

# Load word embeddings

In [7]:
vsm_version = "bias"
vsm = pd.read_csv("glove_gen_anat_n100_win15_min5_iter500_{}.txt".format(vsm_version), 
                    index_col=0, header=None, sep=" ")
print("Vocab N={}, Embedding N={}".format(vsm.shape[0], vsm.shape[1]))

Vocab N=351530, Embedding N=100


In [20]:
terms = list(vsm.index.intersection(terms))
len(terms)

52

# Identify candidate synonyms

In [13]:
from scipy.spatial.distance import cdist

In [41]:
exclude = ["frontal", "parietal", "temporal", "occipital", 
           "superior", "middle", "inferior", "medial", "lateral", "anterior", "posterior"]

In [46]:
synonyms = {}
for term in terms:
    dists = cdist(vsm.loc[term].values.reshape(1,vsm.shape[1]), vsm.values, metric="cosine")
    candidates = list(pd.Series(dists[0], index=vsm.index).sort_values().index[:20])
    candidates = [cand for cand in candidates if ((len(cand) <= 3 and cand[0] == term[0]) or (cand[:5] == term[:5])) and cand not in terms and cand not in exclude]
    if len(candidates) > 0:
        synonyms[term] = candidates

In [47]:
synonyms

{'amygdala': ['amygdalar'],
 'hippocampus': ['hippocampal', 'hippocampal_formation'],
 'cerebellum': ['cerebellar'],
 'thalamus': ['thalamic'],
 'middle_frontal_gyrus': ['mfg'],
 'caudate': ['caudate_nucleus', 'caudate_head', 'caudate_nucleus_putamen'],
 'precentral_gyrus': ['precentral'],
 'superior_frontal_gyrus': ['sfg'],
 'postcentral_gyrus': ['postcentral'],
 'lingual_gyrus': ['lingual'],
 'superior_parietal_lobule': ['spl', 'superior_parietal_gyrus'],
 'brainstem': ['brain_stem'],
 'insular_cortex': ['insula', 'insular', 'insular_region'],
 'frontal_pole': ['frontopolar', 'frontal_operculum'],
 'heschls_gyrus': ['heschl', 'hg', 'heschls'],
 'planum_temporale': ['planum'],
 'supplementary_motor_cortex': ['supplementary_motor_area', 'sma'],
 'paracingulate_gyrus': ['paracingulate', 'paracentral'],
 'planum_polare': ['planum'],
 'subcallosal_cortex': ['subcallosal'],
 'frontal_medial_cortex': ['fmc'],
 'precuneous_cortex': ['precuneous'],
 'frontal_operculum_cortex': ['frontal_operc