In [2]:
import pandas as pd
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_rows', 500)

In [3]:
from SPARQLWrapper import SPARQLWrapper, CSV
from io import StringIO

In [4]:
sparql = SPARQLWrapper("http://localhost:8890/sparql")

Three SPARQL optimizations:
 - Internal query generates a list of RIDs which we are interested on:
 ```SPARQL
 SELECT ?rid ?seed_name WHERE {
   VALUES ?seed { <http://www.radlex.org/RID/#RID1302> <http://www.radlex.org/RID/#RID1326> }
   ?seed radlex:Preferred_name ?seed_name .
   ?seed radlex:Has_Part{1,2} ?rid
 }
 ```
 The `radlex:Has_Part{1,2}` syntax allows us to fetch children and grand children of the ID of interest. In this case, the sub-parts and sub-sub-parts of RID1302 (right lung). See [sparql documentation](https://www.w3.org/TR/sparql11-property-paths/) for more details.
 - The `VALUES` clause allows us to use several values as our starting points (in this case `http://www.radlex.org/RID/#RID1302>` and `<http://www.radlex.org/RID/#RID1326` -- the URIs for `left lung` and `right lung`
 - Then external query finds the `preferred_name` and `synonym` of the IDs of interest.

In [1]:
qry = """
PREFIX radlex: <http://www.radlex.org/RID/#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT ?rid ?seed_name ?name ?synonym 
WHERE {
  {
    SELECT ?rid ?seed_name WHERE {
      VALUES ?seed { <http://www.radlex.org/RID/#RID1302> <http://www.radlex.org/RID/#RID1326> }
      ?seed radlex:Preferred_name ?seed_name .
      ?seed radlex:Has_Part{1,2} ?rid
    }
  } .
  ?rid radlex:Preferred_name ?name .
  OPTIONAL{?rid radlex:Synonym ?synonym  }
} ORDER BY ?seed_name
"""

In [5]:
sparql.setQuery(qry)
sparql.setReturnFormat(CSV)
res = sparql.queryAndConvert()
resAsStr = res.decode('utf-8')

In [6]:
df = pd.read_csv(StringIO(resAsStr))
df = df.fillna('')
df.head(200)

Unnamed: 0,rid,seed_name,name,synonym
0,http://www.radlex.org/RID/#RID1338,left lung,lower lobe of left lung,LLL
1,http://www.radlex.org/RID/#RID1338,left lung,lower lobe of left lung,left lower lobe
2,http://www.radlex.org/RID/#RID1339,left lung,superior segment of lower lobe of left lung,S6 segment of lower lobe of left lung
3,http://www.radlex.org/RID/#RID1341,left lung,anteromedial basal segment of lower lobe of left lung,S7+8 segment of lower lobe of left lung
4,http://www.radlex.org/RID/#RID1327,left lung,upper lobe of left lung,LUL
5,http://www.radlex.org/RID/#RID1327,left lung,upper lobe of left lung,left upper lobe
6,http://www.radlex.org/RID/#RID1343,left lung,lateral basal segment of lower lobe of left lung,S9 segment of lower lobe of left lung
7,http://www.radlex.org/RID/#RID1345,left lung,posterior basal segment of lower lobe of left lung,S10 segment of lower lobe of left lung
8,http://www.radlex.org/RID/#RID1328,left lung,superior division of upper lobe of left lung,
9,http://www.radlex.org/RID/#RID1349,left lung,left upper lung zone,


In [12]:
term_frames = []
for key, df_key in df.groupby('seed_name'):
    Lst = list(set(df_key['name'].tolist() + df_key['synonym'].tolist()))
    Lst = [ x for x in Lst if x!='' ]
    
    terms = pd.DataFrame(Lst, columns=['meanings'])
    terms['keys'] = key
    term_frames.append(terms)

all_terms = pd.concat(term_frames, ignore_index=True)

In [13]:
all_terms

Unnamed: 0,meanings,keys
0,lower lobe of left lung,left lung
1,left lower lobe,left lung
2,S6 segment of lower lobe of left lung,left lung
3,S7+8 segment of lower lobe of left lung,left lung
4,axillary region of left lung,left lung
5,superior segment of lower lobe of left lung,left lung
6,anteromedial basal segment of lower lobe of left lung,left lung
7,left lower lung zone,left lung
8,left upper lung zone,left lung
9,lateral basal segment of lower lobe of left lung,left lung
