# Extraction and manipulation of MeSH tree/terms

Author: **Pablo Iriarte, University of Geneva - pablo.iriarte@unige.ch**

## Exemple: extract all the possible entry terms for one particular pharmacological action

### Notebook 2: merge entry terms and pharmacological action and export the result for one pharmacological action in particular

**Input**: This notebook use two files generated by the first notebook "[1_mesh_extraction_pharamacological_actions_and_entry_terms.ipynb](1_mesh_extraction_pharamacological_actions_and_entry_terms.ipynb)" :

1. mesh2018_pharmacological_actions.tsv
1. mesh2018_entry_terms.tsv

**Output**: a tab delimited file with all the possible entry terms for one particular pharmacological action

In [1]:
# define the pharmacological action you want to extract
my_pa = 'Antihypertensive Agents'

### Merge of pharmacological actions with Entry Terms

In [2]:
import pandas as pd
# open pharmacological actions
pa = pd.read_csv('mesh2018_pharmacological_actions.tsv', delimiter='\t', header=0)
pa

Unnamed: 0,name,ui,pharmacological_action_name,pharmacological_action_ui
0,Calcimycin,D000001,Anti-Bacterial Agents,D000900
1,Calcimycin,D000001,Calcium Ionophores,D061207
2,Temefos,D000002,Insecticides,D007306
3,Abscisic Acid,D000040,Plant Growth Regulators,D010937
4,Aripiprazole,D000068180,Antipsychotic Agents,D014150
5,Albumin-Bound Paclitaxel,D000068196,Antineoplastic Agents,D000970
6,Lubiprostone,D000068238,Chloride Channel Agonists,D065101
7,Darbepoetin alfa,D000068256,Hematinics,D006397
8,"Efavirenz, Emtricitabine, Tenofovir Disoproxil...",D000068257,Reverse Transcriptase Inhibitors,D018894
9,"Efavirenz, Emtricitabine, Tenofovir Disoproxil...",D000068257,Anti-HIV Agents,D019380


In [3]:
# chek the terms of one pharmacological action define above
pa.loc[pa['pharmacological_action_name'] == my_pa]

Unnamed: 0,name,ui,pharmacological_action_name,pharmacological_action_ui
30,Brimonidine Tartrate,D000068438,Antihypertensive Agents,D000959
36,Olmesartan Medoxomil,D000068557,Antihypertensive Agents,D000959
38,"Amlodipine Besylate, Olmesartan Medoxomil Drug...",D000068558,Antihypertensive Agents,D000959
42,Nebivolol,D000068577,Antihypertensive Agents,D000959
82,Valsartan,D000068756,Antihypertensive Agents,D000959
100,"Amlodipine, Valsartan Drug Combination",D000068838,Antihypertensive Agents,D000959
197,Travoprost,D000069557,Antihypertensive Agents,D000959
200,Bimatoprost,D000069580,Antihypertensive Agents,D000959
217,Acebutolol,D000070,Antihypertensive Agents,D000959
329,Alprenolol,D000526,Antihypertensive Agents,D000959


In [4]:
# open entry terms
et = pd.read_csv('mesh2018_entry_terms.tsv', delimiter='\t', header=0)
et

Unnamed: 0,name,ui,entry_term_name,entry_term_ui
0,Calcimycin,D000001,Calcimycin,T000002
1,Calcimycin,D000001,A-23187,T000001
2,Calcimycin,D000001,A 23187,T000001
3,Calcimycin,D000001,A23187,T000004
4,Calcimycin,D000001,Antibiotic A23187,T000003
5,Calcimycin,D000001,"A23187, Antibiotic",T000003
6,Temefos,D000002,Temefos,T000008
7,Temefos,D000002,Temephos,T000007
8,Temefos,D000002,Abate,T000005
9,Temefos,D000002,Difos,T000006


In [5]:
# merge both
pa_et = pd.merge(pa, et, how='left', on='ui')
pa_et

Unnamed: 0,name_x,ui,pharmacological_action_name,pharmacological_action_ui,name_y,entry_term_name,entry_term_ui
0,Calcimycin,D000001,Anti-Bacterial Agents,D000900,Calcimycin,Calcimycin,T000002
1,Calcimycin,D000001,Anti-Bacterial Agents,D000900,Calcimycin,A-23187,T000001
2,Calcimycin,D000001,Anti-Bacterial Agents,D000900,Calcimycin,A 23187,T000001
3,Calcimycin,D000001,Anti-Bacterial Agents,D000900,Calcimycin,A23187,T000004
4,Calcimycin,D000001,Anti-Bacterial Agents,D000900,Calcimycin,Antibiotic A23187,T000003
5,Calcimycin,D000001,Anti-Bacterial Agents,D000900,Calcimycin,"A23187, Antibiotic",T000003
6,Calcimycin,D000001,Calcium Ionophores,D061207,Calcimycin,Calcimycin,T000002
7,Calcimycin,D000001,Calcium Ionophores,D061207,Calcimycin,A-23187,T000001
8,Calcimycin,D000001,Calcium Ionophores,D061207,Calcimycin,A 23187,T000001
9,Calcimycin,D000001,Calcium Ionophores,D061207,Calcimycin,A23187,T000004


In [6]:
# remove repeated name
del pa_et['name_y']

In [7]:
# rename name_x
pa_et = pa_et.rename(columns={'name_x': 'name'})
pa_et

Unnamed: 0,name,ui,pharmacological_action_name,pharmacological_action_ui,entry_term_name,entry_term_ui
0,Calcimycin,D000001,Anti-Bacterial Agents,D000900,Calcimycin,T000002
1,Calcimycin,D000001,Anti-Bacterial Agents,D000900,A-23187,T000001
2,Calcimycin,D000001,Anti-Bacterial Agents,D000900,A 23187,T000001
3,Calcimycin,D000001,Anti-Bacterial Agents,D000900,A23187,T000004
4,Calcimycin,D000001,Anti-Bacterial Agents,D000900,Antibiotic A23187,T000003
5,Calcimycin,D000001,Anti-Bacterial Agents,D000900,"A23187, Antibiotic",T000003
6,Calcimycin,D000001,Calcium Ionophores,D061207,Calcimycin,T000002
7,Calcimycin,D000001,Calcium Ionophores,D061207,A-23187,T000001
8,Calcimycin,D000001,Calcium Ionophores,D061207,A 23187,T000001
9,Calcimycin,D000001,Calcium Ionophores,D061207,A23187,T000004


In [8]:
# chek the terms of one pharmacological action define above
pa_et.loc[pa_et['pharmacological_action_name'] == my_pa]

Unnamed: 0,name,ui,pharmacological_action_name,pharmacological_action_ui,entry_term_name,entry_term_ui
308,Brimonidine Tartrate,D000068438,Antihypertensive Agents,D000959,Brimonidine Tartrate,T000875824
309,Brimonidine Tartrate,D000068438,Antihypertensive Agents,D000959,"Tartrate, Brimonidine",T000875824
310,Brimonidine Tartrate,D000068438,Antihypertensive Agents,D000959,5-Bromo-6-(2-imidazolin-2-ylamino)quinoxaline ...,T000876771
311,Brimonidine Tartrate,D000068438,Antihypertensive Agents,D000959,"UK 14,304",T095045
312,Brimonidine Tartrate,D000068438,Antihypertensive Agents,D000959,UK-14304,T095051
313,Brimonidine Tartrate,D000068438,Antihypertensive Agents,D000959,UK14304,T095051
314,Brimonidine Tartrate,D000068438,Antihypertensive Agents,D000959,UK 14304,T095047
315,Brimonidine Tartrate,D000068438,Antihypertensive Agents,D000959,Ratio-Brimonidine,T000875829
316,Brimonidine Tartrate,D000068438,Antihypertensive Agents,D000959,Ratio Brimonidine,T000875829
317,Brimonidine Tartrate,D000068438,Antihypertensive Agents,D000959,"UK 14,304-18",T095046


In [9]:
# export entry terms for the pharmacological action defined above
myfileout = 'mesh2018_' + my_pa.replace(' ', '_') + '.tsv'
pa_et.loc[pa_et['pharmacological_action_name'] == my_pa][['pharmacological_action_name', 'pharmacological_action_ui', 'name', 'ui', 'entry_term_name', 'entry_term_ui']].sort_values(by='entry_term_name').to_csv(myfileout, sep='\t', index=False)

In [12]:
# export only entry terms for one pharmacological action
myfileout = 'mesh2018_' + my_pa.replace(' ', '_') + '_entry_terms.txt'
pa_et.loc[pa_et['pharmacological_action_name'] == 'Antihypertensive Agents']['entry_term_name'].to_csv(myfileout, sep='\t', index=False)

In [13]:
# export the complete table with all the combinations of pharmacological actions and entry terms in one tsv file
pa_et[['pharmacological_action_name', 'pharmacological_action_ui', 'name', 'ui', 'entry_term_name', 'entry_term_ui']].sort_values(by='pharmacological_action_name').to_csv('mesh2018_pharmacological_actions_entry_terms.tsv', sep='\t', index=False)