In [1]:
import pandas as pd
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_rows', 500)

In [2]:
from SPARQLWrapper import SPARQLWrapper, CSV
from io import StringIO

In [3]:
sparql = SPARQLWrapper("http://localhost:8890/sparql")

Two SPARQL optimizations:
 - Internal query generates a list of RIDs which we are interested on:
 ```SPARQL
 SELECT ?rid WHERE {
    <http://www.radlex.org/RID/#RID1302> radlex:Has_Part{1,2} ?rid
 }
 ```
 The `radlex:Has_Part{1,2}` syntax allows us to fetch children and grand children of the ID of interest. In this case, the sub-parts and sub-sub-parts of RID1302 (right lung). See [sparql documentation](https://www.w3.org/TR/sparql11-property-paths/) for more details.
 - Then external query finds the `preferred_name` and `synonym` of the IDs of interest.

In [4]:
qry = """
PREFIX radlex: <http://www.radlex.org/RID/#>

SELECT ?rid ?name ?synonym 
WHERE {
  {
    SELECT ?rid WHERE {
      <http://www.radlex.org/RID/#RID1302> radlex:Has_Part{1,2} ?rid
    }
  } .
  ?rid radlex:Preferred_name ?name .
  OPTIONAL{?rid radlex:Synonym ?synonym  }
}
"""

In [5]:
sparql.setQuery(qry)
sparql.setReturnFormat(CSV)
res = sparql.queryAndConvert()
resAsStr = res.decode('utf-8')

In [9]:
df = pd.read_csv(StringIO(resAsStr))
df = df.fillna('')
df.head(200)

Unnamed: 0,rid,name,synonym
0,http://www.radlex.org/RID/#RID1303,upper lobe of right lung,RUL
1,http://www.radlex.org/RID/#RID1303,upper lobe of right lung,right upper lobe
2,http://www.radlex.org/RID/#RID1304,apical segment of upper lobe of right lung,S1 segment of upper lobe of right lung
3,http://www.radlex.org/RID/#RID1306,posterior segment of upper lobe of right lung,S2 segment of upper lobe of right lung
4,http://www.radlex.org/RID/#RID1308,anterior segment of upper lobe of right lung,S3 segment of upper lobe of right lung
5,http://www.radlex.org/RID/#RID1315,lower lobe of right lung,RLL
6,http://www.radlex.org/RID/#RID1315,lower lobe of right lung,right lower lobe
7,http://www.radlex.org/RID/#RID1318,medial basal segment of lower lobe of right lung,S7 segment of lower lobe of right lung
8,http://www.radlex.org/RID/#RID1310,middle lobe of lung,RML
9,http://www.radlex.org/RID/#RID1310,middle lobe of lung,middle lobe of right lung


In [10]:
Lst = list(set(df['name'].tolist() + df['synonym'].tolist()))
Lst = [ x for x in Lst if x!='' ]

In [12]:
terms = pd.DataFrame(Lst, columns=['meanings'])
terms['keys'] = 'RIGHT_LUNG'
terms

Unnamed: 0,meanings,keys
0,anterior basal segment of lower lobe of right lung,RIGHT_LUNG
1,lower lobe of right lung,RIGHT_LUNG
2,right middle lobe,RIGHT_LUNG
3,lateral segment of middle lobe of right lung,RIGHT_LUNG
4,right mid lung zone,RIGHT_LUNG
5,S9 segment of lower lobe of right lung,RIGHT_LUNG
6,S10 segment of lower lobe of right lung,RIGHT_LUNG
7,anterior segment of upper lobe of right lung,RIGHT_LUNG
8,medial basal segment of lower lobe of right lung,RIGHT_LUNG
9,S4 segment of middle lobe of right lung,RIGHT_LUNG
