In [1]:
import pandas as pd
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_rows', 500)

In [2]:
from SPARQLWrapper import SPARQLWrapper, CSV
from io import StringIO

In [3]:
sparql = SPARQLWrapper("http://localhost:8890/sparql")

In [4]:
qry = """
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX radlex: <http://www.radlex.org/RID/#>

SELECT DISTINCT ?RID ?name ?sub_name1 ?sub_name2 ?sub_name3 ?synonym1 ?synonym2 ?synonym3
WHERE
{
 ?term radlex:Preferred_name ?name .
 FILTER regex(?name,"lung cancer",'i')
 ?term rdfs:label ?RID .
 OPTIONAL{?term radlex:Synonym ?synonym  .}
 
 OPTIONAL{?term radlex:Is_A ?Sub_type1  .
 ?Sub_type1 radlex:Preferred_name ?sub_name1 .
 ?Sub_type1 rdfs:label ?sub1_RID .
 
 OPTIONAL{?Sub_type1 radlex:Synonym ?synonym1  .}
 
 OPTIONAL{?Sub_type1 radlex:Has_Subtype ?Sub_type2  .
 ?Sub_type2 radlex:Preferred_name ?sub_name2 .
 ?Sub_type2 rdfs:label ?sub2_RID .
 
 OPTIONAL{?Sub_type2 radlex:Synonym ?synonym2  .}
 
 OPTIONAL{?Sub_type2 radlex:Has_Subtype ?Sub_type3  .
 ?Sub_type3 radlex:Preferred_name ?sub_name3 .
 ?Sub_type3 rdfs:label ?sub3_RID .
 
 OPTIONAL{?Sub_type3 radlex:Synonym ?synonym3  .}
 

 }}}}
 """

In [5]:
sparql.setQuery(qry)
sparql.setReturnFormat(CSV)
res = sparql.queryAndConvert()
resAsStr = res.decode('utf-8')

In [6]:
df = pd.read_csv(StringIO(resAsStr))
df.head(200)

Unnamed: 0,RID,name,sub_name1,sub_name2,sub_name3,synonym1,synonym2,synonym3
0,RID45686,lung cancer,malignant neoplastic disease,prostate cancer,clinically significant prostate cancer,cancer,carcinoma of prostate,intermediate high risk prostate cancer
1,RID45686,lung cancer,malignant neoplastic disease,lymphoma,T-cell lymphoma,cancer,lymphosarcoma,malignant histiocytosis syndrome
2,RID45686,lung cancer,malignant neoplastic disease,prostate cancer,clinically significant prostate cancer,cancer,cancer of prostate,intermediate high risk prostate cancer
3,RID45686,lung cancer,malignant neoplastic disease,prostate cancer,clinically significant prostate cancer,cancer,"prostate cancer, NOS",intermediate high risk prostate cancer
4,RID45686,lung cancer,malignant neoplastic disease,lymphoma,non-Hodgkin lymphoma,cancer,lymphosarcoma,
5,RID45686,lung cancer,malignant neoplastic disease,lymphoma,Hodgkin lymphoma,cancer,lymphosarcoma,
6,RID45686,lung cancer,malignant neoplastic disease,malignant neoplasm of pelvis,malignant neoplasm of genitourinary system,cancer,malignant tumor of pelvis,
7,RID45686,lung cancer,malignant neoplastic disease,lymphoma,large cell lymphoma,cancer,lymphosarcoma,
8,RID45686,lung cancer,malignant neoplastic disease,metastatic disease,,cancer,metastasis,
9,RID45686,lung cancer,malignant neoplastic disease,gastric cancer,,cancer,stomach cancer,


In [7]:
Lst = [df['name'].tolist(), df['sub_name1'].tolist(), df['sub_name2'].tolist(), df['sub_name3'].tolist(), df['synonym1'].tolist(), df['synonym2'].tolist(), df['synonym3'].tolist()]

In [8]:
print(Lst)

[['lung cancer', 'lung cancer', 'lung cancer', 'lung cancer', 'lung cancer', 'lung cancer', 'lung cancer', 'lung cancer', 'lung cancer', 'lung cancer', 'lung cancer', 'lung cancer', 'lung cancer', 'lung cancer', 'lung cancer', 'lung cancer', 'lung cancer', 'lung cancer', 'lung cancer', 'lung cancer', 'lung cancer', 'lung cancer', 'lung cancer'], ['malignant neoplastic disease', 'malignant neoplastic disease', 'malignant neoplastic disease', 'malignant neoplastic disease', 'malignant neoplastic disease', 'malignant neoplastic disease', 'malignant neoplastic disease', 'malignant neoplastic disease', 'malignant neoplastic disease', 'malignant neoplastic disease', 'malignant neoplastic disease', 'malignant neoplastic disease', 'malignant neoplastic disease', 'malignant neoplastic disease', 'malignant neoplastic disease', 'malignant neoplastic disease', 'malignant neoplastic disease', 'malignant neoplastic disease', 'malignant neoplastic disease', 'malignant neoplastic disease', 'malignant 

In [9]:
Cancer_values = []
for lists in Lst:
    for string in lists:
        if str(string) == 'nan':
            pass
        else:
            Cancer_values.append(string)

Cancer_values = list(set(Cancer_values))
print(Cancer_values)

['cancer of prostate', 'malignant tumor of pelvis', 'ovarian cancer', 'breast cancer', 'stomach cancer', 'cancer of unknown primary', 'malignant histiocytosis syndrome', 'testicular cancer', 'lymphoma', 'gastric cancer', 'T-cell lymphoma', 'clinically significant prostate cancer', 'carcinoma of prostate', 'cancer', 'metastasis', 'lung cancer', 'cervical cancer', 'malignant neoplasm of pelvis', 'head and neck cancer', 'melanoma', 'non-Hodgkin lymphoma', 'pancreatic cancer', 'metastatic disease', 'malignant neoplastic disease', 'prostate cancer', 'Hodgkin lymphoma', 'esophageal cancer', 'lymphosarcoma', 'endometrial cancer', 'prostate cancer, NOS', 'malignant neoplasm of genitourinary system', 'large cell lymphoma', 'colorectal cancer', 'thyroid cancer', 'intermediate high risk prostate cancer']
