In [1]:
import pandas as pd

import sys
sys.path.append('../') # Root of the repo

from helpers import sparqlToDataframe, sparqlToTermDict, wordsToSingleToken

In [2]:
parent_qry = """
# This query queries all decedents (children and grandchildren) of right lung 
# and left lung and maps them to their direct ancestor
PREFIX radlex: <http://www.radlex.org/RID/#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT ?ancestor ?word
WHERE {
  {
    SELECT (?root_name as ?ancestor) (?child_name as ?word) 
    WHERE {
      VALUES ?root { <http://www.radlex.org/RID/#RID1302> <http://www.radlex.org/RID/#RID1326> }

      ?root radlex:Has_Part ?child .

      ?root radlex:Preferred_name ?root_name .
      ?child radlex:Preferred_name ?child_name
    }
  } UNION {
    SELECT (?child_name as ?ancestor) (?grandchild_name as ?word) 
    WHERE {
      VALUES ?root { <http://www.radlex.org/RID/#RID1302> <http://www.radlex.org/RID/#RID1326> }

      ?root  radlex:Has_Part ?child .
      ?child radlex:Has_Part ?grandchild .

      ?child radlex:Preferred_name ?child_name .
      ?grandchild radlex:Preferred_name ?grandchild_name
    }
  }
} ORDER BY ?word
"""

In [3]:
grandparent_qry = """
# This query queries secondary decedents (grandchildren) of right lung and 
# left lung and maps them to RIGHT_LUNG or LEFT_LUNG 
PREFIX radlex: <http://www.radlex.org/RID/#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT (?root_name as ?ancestor) (?grandchild_name as ?word) 
WHERE {
  VALUES ?root { <http://www.radlex.org/RID/#RID1302> <http://www.radlex.org/RID/#RID1326> }

  ?root  radlex:Has_Part ?child .
  ?child radlex:Has_Part ?grandchild .

  ?root  radlex:Preferred_name ?root_name .
  ?grandchild radlex:Preferred_name ?grandchild_name
} ORDER BY ?word
"""

In [14]:
parent_df = sparqlToTermDict(parent_qry, keyColumn='ancestor', nameCols=[ 'word' ])
parent_df['meanings'] = parent_df['meanings'].apply(wordsToSingleToken)

In [15]:
grandparent_df = sparqlToTermDict(grandparent_qry, keyColumn='ancestor', nameCols=[ 'word' ])
grandparent_df['meanings'] = grandparent_df['meanings'].apply(wordsToSingleToken)

In [16]:
parent_df.to_csv('../Data/clever_ext_parent.txt',sep='|',index_label='id', 
                    columns=['meanings', 'keys'])
grandparent_df.to_csv('../Data/clever_ext_grandparent.txt',sep='|',index_label='id', 
                    columns=['meanings', 'keys'])