# UMLS Shelve File 

Python dictionary take CUI as keys, and provide information on ULMS name, type, definitions and relationships.  
In the following section, we provide examples of loading the data and basic data exploration tasks such as print ULMS names and track term relationships. 

## Load shelve file:

In [10]:
from UMLS import UMLSUtils
import sys
sys.modules['UMLSUtils'] = UMLSUtils  

In [102]:
import os
import pickle
import shelve
from pprint import pprint

In [11]:
dataDir = '../../../sample_shelf/'
umls_shelve_dict  = shelve.open(dataDir + "umls_shelve_dict.shlf")

In [4]:
len(umls_shelve_dict)

4251

## Data exploration

In [68]:
key = 'C0004096'

### Meta information

In [75]:
print umls_shelve_dict[key]

CUI:	C0004096
NAME:	Asthma
TYPES:	[('Disease or Syndrome', 'B2.2.1.2.1')]
36	 NAMES 	42	 CODES 	14	 DEFINITIONS 	0	 RELATIONS 



### Check out details

#### Names:

In [72]:
print umls_shelve_dict[key].cui
print umls_shelve_dict[key].name
print umls_shelve_dict[key].names

C0004096
Asthma
['Asthma, NOS', 'Asthma', 'Asthma unspecified', 'Airway hyperreactivity', 'Bronchitic asthma', '-- Asthma', 'Bronchial asthma, NOS', '2-51 ASTHMA', 'Asthmatic', 'asthma disorders', 'Asthma bronchial', 'Bronchial Asthma', 'asthma', 'Cardio/pulm: Asthma', 'br asthma', 'Unspecified asthma', 'bronchitic asthma', 'Asthmas', 'Asthma unspecified (disorder)', 'ASTHMA BRONCHIAL', 'asthmatic', 'Bronchial asthma', 'asthmatics', 'BRONCHIAL ASTHMA', 'Asthma NOS (disorder)', 'Asthma, Bronchial', 'Asthma (disorder) [Ambiguous]', 'Asthma, unspecified', 'Asthma [Disease/Finding]', 'bronchial asthma', 'ASTHMA', 'Asthma NOS', 'asthma (diagnosis)', 'Asthma (disorder)', 'Br. asthma', 'ASTHMA, BRONCHIAL']


#### Definition:

In [73]:
print umls_shelve_dict[key].cui
print umls_shelve_dict[key].name
print umls_shelve_dict[key].definitions[0]

C0004096
Asthma
A form of bronchial disorder with three distinct components: airway hyper-responsiveness (RESPIRATORY HYPERSENSITIVITY), airway INFLAMMATION, and intermittent AIRWAY OBSTRUCTION. It is characterized by spasmodic contraction of airway smooth muscle, WHEEZING, and dyspnea (DYSPNEA, PAROXYSMAL).


#### Semantic types

In [74]:
print umls_shelve_dict[key].cui
print umls_shelve_dict[key].name
print umls_shelve_dict[key].semantic_types[0]

C0004096
Asthma
('Disease or Syndrome', 'B2.2.1.2.1')


#### Codes

In [106]:
print umls_shelve_dict[key].cui
print umls_shelve_dict[key].name
# ICD-9 code
print 'ICD-9: ' + str(umls_shelve_dict[key].codes['MTHICD9']) + '\n'
pprint(umls_shelve_dict[key].codes.items()[0:10])

C0001127
Acidosis, Respiratory
ICD-9: ['276.2']

[('NCI_NICHD', ['C50728']),
 ('BI', ['BI00356']),
 ('MSH', ['D000142']),
 ('RCD', ['C3621']),
 ('DXP', ['NOCODE', 'U003359']),
 ('AOD', ['0000005822']),
 ('WHO', ['1465']),
 ('MDR', ['10000495', '10038660', '10038661']),
 ('MEDCIN', ['33074']),
 ('ICD10CM', ['E87.2'])]


### Relationships:

In [96]:
key = 'C0000833'
print umls_shelve_dict[key]

# Relationship 1
key2 = 'C0007557'
print 'Relationship example 1: ' + str(key) + ' <-- ' + str(key2) 
print umls_shelve_dict[key].relation_cuis[key2]
print str(key2) + ' name: ' + str(umls_shelve_dict[key2].name)
print str(key2) + ' definition: ' + str(umls_shelve_dict[key2].definitions[0])

# Relationship 2
key3 = 'C0002895'
print '\nRelationship example 2: '+ str(key) + ' <-- ' + str(key3) 
print umls_shelve_dict[key].relation_cuis[key3]
print str(key3) + ' name: ' + str(umls_shelve_dict[key3].name)
print str(key3) + ' definition: ' + str(umls_shelve_dict[key3].definitions[0])


CUI:	C0000833
NAME:	Abscess
TYPES:	[('Disease or Syndrome', 'B2.2.1.2.1')]
13	 NAMES 	27	 CODES 	12	 DEFINITIONS 	1327	 RELATIONS 

Relationship example 1: C0000833 <-- C0007557
[('RO', 'may_treat')]
C0007557 name: Cefoxitin
C0007557 definition: A semisynthetic cephamycin antibiotic resistant to beta-lactamase.

Relationship example 2: C0000833 <-- C0002895
[('RQ', 'clinically_associated_with')]
C0002895 name: Anemia, Sickle Cell
C0002895 definition: A disease characterized by chronic hemolytic anemia, episodic painful crises, and pathologic involvement of many organs. It is the clinical expression of homozygosity for hemoglobin S.


In [98]:
key = 'C0001127'
print umls_shelve_dict[key]

# Relationship 1
key2 = 'C0004096'
print 'Relationship example 1: ' + str(key) + ' <-- ' + str(key2) 
print umls_shelve_dict[key].relation_cuis[key2]
print str(key2) + ' name: ' + str(umls_shelve_dict[key2].name)
print str(key2) + ' definition: ' + str(umls_shelve_dict[key2].definitions[0])


# Relationship 2
key3 = 'C0002063'
print '\nRelationship example 2: '+ str(key) + ' <-- ' + str(key3) 
print umls_shelve_dict[key].relation_cuis[key3]
print str(key3) + ' name: ' + str(umls_shelve_dict[key3].name)
print str(key3) + ' definition: ' + str(umls_shelve_dict[key3].definitions[0])


CUI:	C0001127
NAME:	Acidosis, Respiratory
TYPES:	[('Disease or Syndrome', 'B2.2.1.2.1')]
20	 NAMES 	22	 CODES 	12	 DEFINITIONS 	249	 RELATIONS 

Relationship example 1: C0001127 <-- C0004096
[('RQ', 'clinically_associated_with')]
C0004096 name: Asthma
C0004096 definition: A form of bronchial disorder with three distinct components: airway hyper-responsiveness (RESPIRATORY HYPERSENSITIVITY), airway INFLAMMATION, and intermittent AIRWAY OBSTRUCTION. It is characterized by spasmodic contraction of airway smooth muscle, WHEEZING, and dyspnea (DYSPNEA, PAROXYSMAL).

Relationship example 2: C0001127 <-- C0002063
[('RQ', 'clinically_associated_with'), ('SIB', '')]
C0002063 name: Alkalosis
C0002063 definition: A pathological condition that removes acid or adds base to the body fluids.


# UMLS Index File

Python dictionaries with different lookup keys mapping to CUI.

## Load umls index:

In [None]:
umls_index = pickle.load(open(dataDir + 'umls_index.pk'))

### Lookup query examples

#### Query by ICD-9 code

In [115]:
umls_index.mappings['MTHICD9']['276.2']

['C0001125', 'C0001127', 'C0001122']

In [116]:
def printUMLSBatch(lsKeys, umls_shelve_dict):
    for key in lsKeys:
        print umls_shelve_dict[key]

In [117]:
printUMLSBatch(umls_index.mappings['MTHICD9']['276.2'], umls_shelve_dict)

CUI:	C0001125
NAME:	Acidosis, Lactic
TYPES:	[('Disease or Syndrome', 'B2.2.1.2.1')]
13	 NAMES 	21	 CODES 	8	 DEFINITIONS 	295	 RELATIONS 

CUI:	C0001127
NAME:	Acidosis, Respiratory
TYPES:	[('Disease or Syndrome', 'B2.2.1.2.1')]
20	 NAMES 	22	 CODES 	12	 DEFINITIONS 	249	 RELATIONS 

CUI:	C0001122
NAME:	Acidosis
TYPES:	[('Pathologic Function', 'B2.2.1.2')]
12	 NAMES 	26	 CODES 	14	 DEFINITIONS 	315	 RELATIONS 



#### Lookup by string

In [118]:
umls_index.mappings['STRING']['anaemia']

['C0002871']

In [119]:
printUMLSBatch(umls_index.mappings['STRING']['anaemia'], umls_shelve_dict)

CUI:	C0002871
NAME:	Anemia
TYPES:	[('Disease or Syndrome', 'B2.2.1.2.1')]
30	 NAMES 	46	 CODES 	16	 DEFINITIONS 	0	 RELATIONS 



In [120]:
# All possible keys to use
#umls_index.mappings.keys()

## More UMLS shelve query examples

#### 1. Extract all CUIs that has semantic type as 'Disease or Syndrome'

In [39]:
def getDisease(umls_shelve_dict):
    lsKey = []
    for key in umls_shelve_dict.keys():
        if str(umls_shelve_dict[key].semantic_types[0][0])== 'Disease or Syndrome':
            lsKey.append(key)
    return lsKey

In [41]:
lsDiseaseKey = getDisease(umls_shelve_dict)

#### 2. Given a set of CUIs, extract those with at least one relationships

In [86]:
def getCuiWithRelation(lsKey):
    lsCntRelation2 = []
    for key in lsKey:
        cntRelation = len(umls_shelve_dict[key].relation_cuis)
        if cntRelation > 0:
            lsCntRelation2.append((key, cntRelation))
    return lsCntRelation2

In [87]:
lsCntRelation2 = getCuiWithRelation(lsDiseaseKey)

In [88]:
lsCntRelation2

[('C0000833', 1327),
 ('C0001083', 1),
 ('C0001080', 232),
 ('C0001169', 63),
 ('C0001163', 143),
 ('C0001125', 295),
 ('C0001127', 249),
 ('C0001126', 210),
 ('C0000809', 99),
 ('C0000814', 478),
 ('C0001139', 222),
 ('C0000880', 105),
 ('C0000889', 187),
 ('C0001142', 21),
 ('C0001145', 131),
 ('C0001144', 382),
 ('C0000744', 81),
 ('C0000774', 78),
 ('C0000823', 87)]

#### 3. Find cases where the relationship has description and the related CUI exists in current dictionary 

In [89]:
allKeys = umls_shelve_dict.keys()
lsExample = []
for pair in lsCntRelation2:
    key = pair[0]
    lsRelationKey = umls_shelve_dict[key].relation_cuis.keys()
    for key2 in lsRelationKey:
        if (key2 in allKeys) & (key2 != key):
            if umls_shelve_dict[key].relation_cuis[key2][0][1] != '':
                example = (key, key2, umls_shelve_dict[key].relation_cuis[key2])
                lsExample.append(example)

In [90]:
lsExample

[('C0000833', 'C0001304', [('CHD', 'isa'), ('RN', '')]),
 ('C0000833', 'C0007557', [('RO', 'may_treat')]),
 ('C0000833', 'C0004623', [('RO', 'parent_is_cdrh'), ('SIB', '')]),
 ('C0000833',
  'C0007645',
  [('RB', 'mapped_from'), ('RL', 'mapped_from'), ('RQ', ''), ('RB', '')]),
 ('C0000833', 'C0004611', [('RO', 'causative_agent_of')]),
 ('C0000833', 'C0002895', [('RQ', 'clinically_associated_with')]),
 ('C0000833', 'C0007555', [('RO', 'may_treat')]),
 ('C0000833',
  'C0006105',
  [('RO', 'has_associated_morphology'),
   ('RO', 'associated_with'),
   ('CHD', ''),
   ('RN', '')]),
 ('C0001080',
  'C0008449',
  [('RQ', 'mapped_from'),
   ('RQ', 'primary_mapped_to'),
   ('SIB', ''),
   ('RQ', ''),
   ('RQ', 'classifies'),
   ('RL', 'mapped_from')]),
 ('C0001080', 'C0008073', [('RO', 'parent_is_nichd')]),
 ('C0001163', 'C0006111', [('PAR', 'inverse_isa')]),
 ('C0001125',
  'C0001122',
  [('RQ', 'mapped_from'),
   ('SIB', ''),
   ('PAR', ''),
   ('PAR', 'inverse_isa'),
   ('RQ', ''),
   ('RO'