In [1]:
import pixiedust

if sc.version.startswith('1.6.'):  # Spark 1.6
    pixiedust.installPackage("graphframes:graphframes:0.5.0-spark1.6-s_2.11")
elif sc.version.startswith('2.'):  # Spark 2.1, 2.0
    pixiedust.installPackage("graphframes:graphframes:0.5.0-spark2.1-s_2.11")


pixiedust.installPackage("com.typesafe.scala-logging:scala-logging-api_2.11:2.1.2")
pixiedust.installPackage("com.typesafe.scala-logging:scala-logging-slf4j_2.11:2.1.2")

print("done")


Pixiedust database opened successfully


Package already installed: graphframes:graphframes:0.5.0-spark2.1-s_2.11
Package already installed: com.typesafe.scala-logging:scala-logging-api_2.11:2.1.2
Package already installed: com.typesafe.scala-logging:scala-logging-slf4j_2.11:2.1.2
done


In [2]:
# import findspark
# findspark.init()

from pyspark.sql import SparkSession
from pyspark.sql import SQLContext

# import os

# os.environ['PYSPARK_SUBMIT_ARGS'] = '--packages graphframes:graphframes:0.5.0-spark2.1-s_2.11 pyspark-shell'


# Build the SparkSession
spark = SparkSession.builder \
   .master("local") \
   .appName("sparkPlot") \
   .config("spark.executor.memory", "2gb") \
   .getOrCreate()

# spark.conf.set("spark.jars.packages", "graphframes:graphframes:0.5.0-spark2.1-s_2.11")
sc = spark.sparkContext

sqlContext=SQLContext(sc)

In [3]:
from pyspark.sql.types import StructType, StructField
from pyspark.sql.types import DoubleType, IntegerType, StringType

schema = StructType([
    StructField("ONT_NAME", StringType()),
    StructField("CLASS_IRI", StringType()),
    StructField("PARENT_CLASS_IRI", StringType())
])

df_class_hier= spark.read \
    .schema(schema) \
    .option("header", "true") \
    .option("mode", "DROPMALFORMED") \
    .csv("hdfs://localhost:9000/ONT_CLASS_HIERARCHY.csv")
    
    
print(df_class_hier.count())
df_class_hier.printSchema()
df_class_hier=df_class_hier.distinct()
# df_class_hier.createGlobalTempView("class_hier")
df_class_hier.createOrReplaceTempView("class_hier")

# iri = "http://purl.obolibrary.org/obo/iao_0000030"
# print(spark.sql("SELECT * from class_hier where CLASS_IRI = '" + iri +"'").collect())
# print(df_class_hier.count())


809619
root
 |-- ONT_NAME: string (nullable = true)
 |-- CLASS_IRI: string (nullable = true)
 |-- PARENT_CLASS_IRI: string (nullable = true)



In [30]:
from pyspark.sql.types import StructType, StructField
from pyspark.sql.types import DoubleType, IntegerType, StringType

schema = StructType([
    StructField("ONT_NAME", StringType()),
    StructField("CLASS_IRI", StringType()),
    StructField("CLASS_LABEL", StringType())
])

df_class_labels= spark.read \
    .schema(schema) \
    .option("header", "true") \
    .option("mode", "DROPMALFORMED") \
    .csv("hdfs://localhost:9000/ONT_CLASS_LABELS.csv")

    
print(df_class_labels.count())
df_class_labels.printSchema()
df_class_labels=df_class_labels.distinct()
# df_class_labels.createGlobalTempView("class_labels")
df_class_labels.createOrReplaceTempView("class_labels")
    

711444
root
 |-- ONT_NAME: string (nullable = true)
 |-- CLASS_IRI: string (nullable = true)
 |-- CLASS_LABEL: string (nullable = true)



In [None]:
from pyspark.sql.types import StructType, StructField
from pyspark.sql.types import DoubleType, IntegerType, StringType

schema = StructType([
    StructField("ONT_NAME", StringType()),
    StructField("TAX_TYPE", StringType()),
    StructField("AREA_ID", StringType()),
    StructField("AREA_NAME", StringType()),
    StructField("AREA_LEVEL", IntegerType())
])

df_tax_areas= spark.read \
    .schema(schema) \
    .option("header", "true") \
    .option("mode", "DROPMALFORMED") \
    .csv("hdfs://localhost:9000/TAX_AREAS.csv")

print(df_tax_areas.count())
df_tax_areas.printSchema()
df_tax_areas=df_tax_areas.distinct()
# df_tax_areas.createGlobalTempView("tax_areas")
df_tax_areas.createOrReplaceTempView("tax_areas")

In [None]:
from pyspark.sql.types import StructType, StructField
from pyspark.sql.types import DoubleType, IntegerType, StringType

schema = StructType([
    StructField("ONT_NAME", StringType()),
    StructField("TAX_TYPE", StringType()),
    StructField("AREA_ID", StringType()),
    StructField("CLASS_IRI", StringType())
])

df_tax_areas_concepts= spark.read \
    .schema(schema) \
    .option("header", "true") \
    .option("mode", "DROPMALFORMED") \
    .csv("hdfs://localhost:9000/TAX_AREAS_CONCEPTS.csv")

print(df_tax_areas_concepts.count())
df_tax_areas_concepts.printSchema()
df_tax_areas_concepts=df_tax_areas_concepts.distinct()
# df_tax_areas_concepts.createGlobalTempView("tax_areas_concepts")
df_tax_areas_concepts.createOrReplaceTempView("tax_areas_concepts")

In [None]:
from pyspark.sql.types import StructType, StructField
from pyspark.sql.types import DoubleType, IntegerType, StringType

schema = StructType([
    StructField("ONT_NAME", StringType()),
    StructField("TAX_TYPE", StringType()),
    StructField("PAREA_ROOT_IRI", StringType()),
    StructField("CLASS_IRI", StringType())
])

df_tax_areas_pareas= spark.read \
    .schema(schema) \
    .option("header", "true") \
    .option("mode", "DROPMALFORMED") \
    .csv("hdfs://localhost:9000/TAX_AREAS_PAREAS.csv")

print(df_tax_areas_pareas.count())
df_tax_areas_pareas.printSchema()
df_tax_areas_pareas=df_tax_areas_pareas.distinct()
# df_tax_areas_pareas.createGlobalTempView("tax_areas_pareas")
df_tax_areas_pareas.createOrReplaceTempView("tax_areas_pareas")


In [None]:
schema = StructType([])
iri_class_labels = sqlContext.createDataFrame(sc.emptyRDD(), schema)
iri_class_labels = spark.sql("SELECT * from class_labels where ONT_NAME = 'chembio.owl'")
# iri_class_labels.describe()

sql = "SELECT distinct ONT_NAME from class_labels where CLASS_IRI = 'http://chem2bio2rdf.org/chem2bio2rdf.owl#ubiquitination'"
df = spark.sql(sql)
df.show()
df.createOrReplaceTempView("people")

df_tax_areas_pareas.join(df, 'ONT_NAME').show()

sql2 = "SELECT * FROM class_labels INNER JOIN people ON class_labels.ONT_NAME=people.ONT_NAME"
dff = spark.sql(sql2)
dff.show()


In [None]:
spark.stop()

In [None]:
df1=df_tax_areas_pareas.select('ONT_NAME').limit(5)
df1.show()
df2=df_tax_areas_pareas.select('ONT_NAME').limit(3)
df2.show()
df2.subtract(df1).show()
print( df2.subtract(df1).count()==0)

a = set()
if not a:
    print('empty set')
else:
    print('not empty')

In [None]:
#init 5 tables
schema = StructType([])
iri_class_labels = sqlContext.createDataFrame(sc.emptyRDD(), schema)
iri_class_hier = sqlContext.createDataFrame(sc.emptyRDD(), schema)
iri_tax_areas = sqlContext.createDataFrame(sc.emptyRDD(), schema)
iri_tax_areas_concepts = sqlContext.createDataFrame(sc.emptyRDD(), schema)
iri_tax_areas_pareas = sqlContext.createDataFrame(sc.emptyRDD(), schema)

#init ontologies list
global_ontList=sqlContext.createDataFrame(sc.emptyRDD(), schema)


class IRIRelatedTables:
    
    
    def __init__(self, iri):
        self.iri = iri
        self.local_ontList = self.getOntNames()
        self.initializeTables()
        
    def initializeTables(self):
        global iri_class_labels,\
        iri_class_hier,\
        iri_tax_areas,\
        iri_tax_areas_concepts,\
        iri_tax_areas_pareas,\
        global_ontList
        
        if global_ontList.count()==0:
            #initialize 5 tables
            print('init')
            iri_class_labels = self.initializeTable(df_class_labels)
            iri_class_hier = self.initializeTable(df_class_hier)
            iri_tax_areas = self.initializeTable(df_tax_areas)
            iri_tax_areas_concepts = self.initializeTable(df_tax_areas_concepts)
            iri_tax_areas_pareas = self.initializeTable(df_tax_areas_pareas)
            global_ontList = self.getOntNames()
        else:
            print('update')
            ontNames = self.local_ontList.subtract(global_ontList)
            if ontNames.count()!=0:
                print('inside update')
                #update 5 tables
                iri_class_labels = self.updateTable(iri_class_labels, df_class_labels, ontNames) 
                iri_class_hier = self.updateTable(iri_class_hier, df_class_hier, ontNames)
                iri_tax_areas = self.updateTable(iri_tax_areas, df_tax_areas, ontNames)
                iri_tax_areas_concepts = self.updateTable(iri_tax_areas_concepts, df_tax_areas_concepts, ontNames)
                iri_tax_areas_pareas = self.updateTable(iri_tax_areas_pareas, df_tax_areas_pareas, ontNames)
            global_ontList = global_ontList.union(self.local_ontList)
        
    def initializeTable(self, df):
        try:
            ontNames = self.getOntNames() 
            return df.join(ontNames, 'ONT_NAME').distinct()
        except:
            print('iri has no corresponding ontology found!')
            raise

    def updateTable(self, df, dff, ontNames):
        try:
            dff= dff.join(ontNames, 'ONT_NAME').distinct()
            return df.union(dff)
        except:
            print('iri has no corresponding ontology found!')
            raise
        
    def getOntNames(self):
        sql = "SELECT distinct ONT_NAME from class_labels where CLASS_IRI = '"+ self.iri +"'"
        df = spark.sql(sql)
        return df

iri= 'http://purl.obolibrary.org/obo/iao_0000030'
test = IRIRelatedTables(iri)
# print(iri_class_labels.count())
# print(iri_class_hier.count())

iri2 = 'http://chem2bio2rdf.org/chem2bio2rdf.owl#bioassay'
test2 = IRIRelatedTables(iri2)

def getChildrenFast(iri):
    result = set()
    children = iri_class_hier.filter(iri_class_hier.PARENT_CLASS_IRI==iri).distinct().collect()
    for row in children:
        result.add(row['CLASS_IRI'])
    print(result)
    return result

print(getChildrenFast(iri))
print(getChildrenFast(iri2))

In [38]:
# iri = 'http://chem2bio2rdf.org/chem2bio2rdf.owl#bioassay'
iri= 'http://purl.obolibrary.org/obo/iao_0000030'
df_hier = df_class_hier.rdd.map(lambda x : (x[1], x[2])).distinct().toDF(['src', 'dst'])

df_label = df_class_labels.rdd.map(lambda x: (x[1], x[2])).distinct().toDF(['id', 'label'])
# df_hier.reduceByKey(lambda x, y: x +y ).take(2)

In [35]:

from graphframes import *
g = GraphFrame(df_label, df_hier)
g.inDegrees.show()
# g = GraphFrame(vertex, edges)

+--------------------+--------+
|                  id|inDegree|
+--------------------+--------+
|http://purl.oboli...|       6|
|http://purl.oboli...|       5|
|http://purl.oboli...|       3|
|http://purl.oboli...|       1|
|http://purl.oboli...|       6|
|http://ncicb.nci....|      10|
|http://www.orpha....|       3|
|http://purl.oboli...|       7|
|http://ncicb.nci....|      23|
|http://purl.oboli...|      16|
|http://purl.oboli...|       2|
|http://purl.oboli...|       1|
|http://purl.oboli...|       1|
|http://purl.oboli...|       8|
|http://purl.oboli...|      28|
|http://purl.oboli...|       3|
|http://purl.oboli...|       1|
|http://purl.oboli...|      56|
|http://purl.oboli...|       1|
|http://www.ebi.ac...|       4|
+--------------------+--------+
only showing top 20 rows



In [51]:
# iri= 'http://purl.obolibrary.org/obo/iao_0000030'
# http://chem2bio2rdf.org/chem2bio2rdf.owl#bioassay
# http://purl.obolibrary.org/obo/chebi_62943
# http://purl.obolibrary.org/obo/chebi_133771
# http://www.w3.org/2002/07/owl#thing
paths = g.bfs("id = 'http://purl.obolibrary.org/obo/chebi_133771'", "id = 'http://purl.obolibrary.org/obo/chebi_62943'")
paths.show()

# # Specify edge filters or max path lengths.
# g.bfs("name = 'Esther'", "age < 32",\
#   edgeFilter="relationship != 'friend'", maxPathLength=3)

+--------------------+--------------------+--------------------+
|                from|                  e0|                  to|
+--------------------+--------------------+--------------------+
|[http://purl.obol...|[http://purl.obol...|[http://purl.obol...|
+--------------------+--------------------+--------------------+



In [36]:
from pyspark.sql.functions import *
degrees = g.degrees.sort(desc("degree"))
degrees.show()

+--------------------+------+
|                  id|degree|
+--------------------+------+
|http://purl.oboli...|353299|
|http://www.w3.org...| 37470|
|http://purl.oboli...| 14253|
|http://purl.oboli...|  7331|
|http://purl.oboli...|  7230|
|http://purl.oboli...|  4805|
|http://purl.oboli...|  4107|
|http://purl.oboli...|  3939|
|http://purl.oboli...|  2824|
|http://purl.oboli...|  2468|
|http://purl.oboli...|  2030|
|http://purl.oboli...|  1941|
|http://purl.oboli...|  1930|
|http://purl.oboli...|  1798|
|http://purl.oboli...|  1684|
|http://purl.oboli...|  1471|
|http://www.cogpo....|  1468|
|http://www.ebi.ac...|  1429|
|http://www.geneon...|  1345|
|http://purl.oboli...|  1182|
+--------------------+------+
only showing top 20 rows



In [37]:
# Display the vertex and edge DataFrames
g.vertices.show()
# +--+-------+---+
# |id|   name|age|
# +--+-------+---+
# | a|  Alice| 34|
# | b|    Bob| 36|
# | c|Charlie| 30|
# | d|  David| 29|
# | e| Esther| 32|
# | f|  Fanny| 36|
# | g|  Gabby| 60|
# +--+-------+---+

g.edges.show()
# +---+---+------------+
# |src|dst|relationship|
# +---+---+------------+
# |  a|  b|      friend|
# |  b|  c|      follow|
# |  c|  b|      follow|
# |  f|  c|      follow|
# |  e|  f|      follow|
# |  e|  d|      friend|
# |  d|  a|      friend|
# |  a|  e|      friend|
# +---+---+------------+

# Get a DataFrame with columns "id" and "inDegree" (in-degree)
vertexInDegrees = g.inDegrees

# Find the youngest user's age in the graph.
# This queries the vertex DataFrame.
# g.vertices.groupBy().min("age").show()

# Count the number of "follows" in the graph.
# This queries the edge DataFrame.
# numFollows = g.edges.filter("relationship = 'follow'").count()

+--------------------+--------------------+
|                  id|               label|
+--------------------+--------------------+
|http://purl.oboli...|       dron 00060820|
|http://purl.oboli...|       dron 00273407|
|http://purl.oboli...|       dron 00116305|
|http://purl.oboli...|       dron 00636386|
|http://purl.oboli...|       dron 00641546|
|http://purl.oboli...|       dron 00056018|
|http://ncicb.nci....|benign sertoli ce...|
|http://purl.oboli...|       dron 00176142|
|http://www.orpha....|     orphanet 295024|
|http://purl.oboli...|       dron 00067248|
|http://purl.oboli...|       dron 00581219|
|http://purl.oboli...|       dron 00678487|
|http://purl.oboli...|       dron 00040995|
|http://purl.oboli...|   desmosine residue|
|http://purl.oboli...|       dron 00604278|
|http://purl.oboli...|       dron 00043974|
|http://ncicb.nci....|      barth syndrome|
|http://purl.oboli...|       dron 00518678|
|http://purl.oboli...|         chebi 18729|
|http://www.bioass...|         b

In [29]:
iri2 = 'http://chem2bio2rdf.org/chem2bio2rdf.owl#bioassay'
df_hier.filter(lambda x: x[0] == iri2).take(2)
df_hier.filter(lambda x: x[0] == iri).take(2)
# def findChildrenDF(iri):
#     return df_hier.filter(lambda x: x[0] == iri)

# df = sc.parallelize(iri)

TypeError: condition should be string or Column

In [None]:
def getAllChildren(iri, visited=None):
    if visited is None:
        visited = set()
    visited.add(iri)
    result = []
    pair = []
    for child_iri in getChildrenFast(iri) - visited:
        print("get child: ", child_iri)
        if child_iri:
            result.append(child_iri)
            pair.append((child_iri, iri))
            result1, pair1 = getAllChildren(child_iri, visited)
            result += result1
            pair += pair1
            visited.add(child_iri)
            
    return result, pair

iri= 'http://purl.obolibrary.org/obo/chebi_62943'
test = IRIRelatedTables(iri)
c_vertices, c_edges = getAllChildren(iri)


In [None]:
df_class_labels.createOrReplaceTempView("class_labels")

iri = "http://purl.obolibrary.org/obo/iao_0000030"
print(spark.sql("SELECT * from class_labels where CLASS_IRI = '" + iri +"'").collect())


In [None]:
df_IRI_LABEL=df_class_hier.join(df_class_labels, 'CLASS_IRI').select(df_class_hier.CLASS_IRI, df_class_labels.CLASS_LABEL)

In [None]:
df_IRI_LABEL.count()
df_IRI_LABEL.printSchema()

In [None]:
df_IRI_LABEL.groupBy("CLASS_IRI").count().sort("count", ascending=False).limit(20).toPandas()

In [None]:
df_IRI_LABEL.show(10,truncate= True)
result = df_IRI_LABEL.where(df_IRI_LABEL.CLASS_IRI == 'http://purl.obolibrary.org/obo/iao_0000030').distinct()

In [None]:
print(result.count())
result.collect()[1]['CLASS_LABEL']

In [None]:
for row in result.collect():
    print(row['CLASS_IRI'], row['CLASS_LABEL'])

In [None]:
import re
def isIRIEqualLabel(iri, label):
    iri = iri.split('/')[-1].replace('_',' ')
    return label ==iri 

In [None]:
for row in result.collect():
    if not isIRIEqualLabel(row[0], row[1]):
        print(row[0], row[1])

In [None]:
def getIRI(label):
    result = []
    iris = df_class_labels.filter(df_class_labels.CLASS_LABEL==label).select('CLASS_IRI','CLASS_LABEL').distinct().collect()
    for row in iris:
        if not isIRIEqualLabel(row["CLASS_IRI"], row['CLASS_LABEL']):
            result.append(row["CLASS_IRI"])
    return result

def getLabel(iri):
    result =[]
    labels = df_class_labels.filter(df_class_labels.CLASS_IRI==iri).select('CLASS_IRI','CLASS_LABEL').distinct().collect()
    for row in labels:
        if not isIRIEqualLabel(row["CLASS_IRI"], row['CLASS_LABEL']):
            result.append(row["CLASS_LABEL"])
    return result

def getOnts(iri):
    result =[]
    ont_names = df_class_labels.filter(df_class_labels.CLASS_IRI==iri).select('ONT_NAME').distinct().collect()
    for row in ont_names:
        result.append(row["ONT_NAME"])
    return result
            

def getParents(iri):
    result =set()
    if iri == 'http://www.w3.org/2002/07/owl#thing':
        return result
    print("get parent for: ", iri) 
    parents = df_class_hier.filter(df_class_hier.CLASS_IRI==iri).distinct().collect()
    for row in parents:
        result.add(row['PARENT_CLASS_IRI'])
#     print(result)
    return result

def getChildren(iri):
    result =set()
    children = df_class_hier.filter(df_class_hier.PARENT_CLASS_IRI==iri).distinct().collect()
    for row in children:
        result.add(row['CLASS_IRI'])
#     print(result)
    return result

def getArea(iri, tax_type = 'op_restriction'):
    result = df_tax_areas_concepts.filter((df_tax_areas_concepts.TAX_TYPE==tax_type)&\
                                          (df_tax_areas_concepts.CLASS_IRI==iri) & \
                                          (df_tax_areas_concepts.AREA_ID!='[empty set]'))\
    .join(df_tax_areas,'AREA_ID').drop(df_tax_areas.TAX_TYPE)
    area = result.select('TAX_TYPE','AREA_ID', 'AREA_NAME', 'AREA_LEVEL').collect()
#     area = result.collect()
    return area

def getOntName(iri):
    return ont

def getPArea(iri, tax_type = 'op_restriction'):
    result = df_tax_areas_pareas.filter((df_tax_areas_pareas.TAX_TYPE==tax_type)&\
                                        (df_tax_areas_pareas.CLASS_IRI==iri) & \
                                          (df_tax_areas_pareas.PAREA_ROOT_IRI!='[empty set]'))
    parea = result.drop('ONT_NAME').distinct().collect()
    return parea

def getAreaLevel(iri, tax_type = 'op_restriction'):
    df_tax_areas_concepts2=df_tax_areas_concepts.filter((df_tax_areas_concepts.TAX_TYPE==tax_type)&\
                                                        (df_tax_areas_concepts.CLASS_IRI == iri)& \
                                                          (df_tax_areas_concepts.AREA_ID!='[empty set]'))
    
    result = df_tax_areas.join(df_tax_areas_concepts2, 'AREA_ID').drop('ONT_NAME').distinct().first()
    
    if not result:
        return 0
    else:
        return result['AREA_LEVEL']


In [None]:
getAreaLevel("http://purl.obolibrary.org/obo/apollo_sv_00000144")

In [None]:
print(getIRI("information content entity"))
print(getLabel("http://purl.obolibrary.org/obo/iao_0000030"))

In [None]:
getArea('http://chem2bio2rdf.org/chem2bio2rdf.owl#bioassay')
getChildren('http://chem2bio2rdf.org/chem2bio2rdf.owl#bioassay')
# result = df_tax_areas_concepts.filter(df_tax_areas_concepts.CLASS_IRI=='http://chem2bio2rdf.org/chem2bio2rdf.owl#bioassay')

# result.filter(df_tax_areas_concepts.AREA_ID!='[empty set]').show()
getParents('http://chem2bio2rdf.org/chem2bio2rdf.owl#bioassay')

In [None]:
getPArea('http://chem2bio2rdf.org/chem2bio2rdf.owl#bioassay')

In [None]:
df_class_hier
df_class_labels
df_tax_areas
df_tax_areas_concepts
df_tax_areas_pareas

In [None]:
getPArea('http://www.w3.org/2002/07/owl#thing')
getParents('http://www.w3.org/2002/07/owl#thing')
getParents('http://www.ifomis.org/bfo/1.1#entity')

In [None]:
def getPAreaParent(iri, depth=1):
    result = []
    for i in range(depth):
        for row in getPArea(iri):
            result.append((iri, row['PAREA_ROOT_IRI']))
            result + getPAreaParent(getParents(row['PAREA_ROOT_IRI']))
            
def getAreaParent(iri, depth):
    parents= []

    
    
def getPAreaChildren(iri, depth=1):
    result = []
    for i in range(depth):
        for row in getPArea(iri):
            result.append((row['PAREA_ROOT_IRI'], iri))
            result + getPAreaChilren(getChildren(row['PAREA_ROOT_IRI']))
    

In [None]:
def getAllChildren(iri, visited=None):
    if visited is None:
        visited = set()
    visited.add(iri)
    result = []
    pair = []
    for child_iri in getChildren(iri) - visited:
        print("get child: ",child_iri)
        if child_iri:
            result.append(child_iri)
            pair.append((child_iri, iri))
            result1, pair1 = getAllChildren(child_iri, visited)
            result += result1
            pair += pair1
            visited.add(child_iri)
            
    return result, pair
c_vertices, c_edges = getAllChildren('http://purl.obolibrary.org/obo/chebi_62943')


In [None]:
def getAllParents(iri, visited = None):
    if visited is None:
        visited = set()
    visited.add(iri)

    result = []
    pair = []
    if iri != 'http://www.w3.org/2002/07/owl#thing':
        for parent_iri in getParents(iri) - visited:
            print("get parent: ", parent_iri)
            if parent_iri:
                result.append(parent_iri)
                pair.append((iri, parent_iri))
                result1, pair1 = getAllParents(parent_iri, visited)
                result += result1
                pair += pair1
                visited.add(parent_iri)
    return result, pair

p_vertices, p_edges = getAllParents('http://purl.obolibrary.org/obo/chebi_62943')


In [None]:
import igraph as ig
g = ig.Graph()


In [None]:
g.add_vertex(name = 'http://purl.obolibrary.org/obo/chebi_62943')
g.add_vertices(p_vertices)

# for vertex in vertices:
#     g.add_vertex(name=vertex)

N=g.vcount()
print('total number of vertices imported: ' , N)
print(p_edges)

g.add_edges(p_edges)

L= g.ecount()
print('added # of edges: ', L)

In [None]:
# g.add_vertex(name = 'http://purl.obolibrary.org/obo/chebi_62943')
g.add_vertices(c_vertices)

# for vertex in vertices:
#     g.add_vertex(name=vertex)

N=g.vcount()
print('total number of vertices imported: ' , N)
print(c_edges)

g.add_edges(c_edges)

L= g.ecount()
print('added # of edges: ', L)

In [None]:
labels=[]
group=[]
for node in g.vs:
    labels.append(getLabel(node['name']))
    group.append(getAreaLevel(node['name']))

In [None]:
for i in g.vs:
    print(i)

In [None]:
layt=g.layout_auto(dim=3)
layt[5]

In [None]:
Xn=[layt[k][0] for k in range(N)]# x-coordinates of nodes
Yn=[layt[k][1] for k in range(N)]# y-coordinates
Zn=[layt[k][2] for k in range(N)]# z-coordinates
Xe=[]
Ye=[]
Ze=[]
for e in g.es:
    e=e.tuple
    Xe+=[layt[e[0]][0],layt[e[1]][0], None]# x-coordinates of edge ends
    Ye+=[layt[e[0]][1],layt[e[1]][1], None]
    Ze+=[layt[e[0]][2],layt[e[1]][2], None]

In [None]:
import plotly as py
from plotly.graph_objs import *

In [None]:
trace1=Scatter3d(x=Xe,
               y=Ye,
               z=Ze,
               mode='lines',
               line=Line(color='rgb(125,125,125)', width=1),
               hoverinfo='none'
               )
trace2=Scatter3d(x=Xn,
               y=Yn,
               z=Zn,
               mode='markers',
               name='actors',
               marker=Marker(symbol='dot',
                             size=6,
                             color=group,
                             colorscale='Viridis',
                             line=Line(color='rgb(50,50,50)', width=0.5)
                             ),
               text=labels,
               hoverinfo='text'
               )

In [None]:
axis=dict(showbackground=False,
          showline=False,
          zeroline=False,
          showgrid=False,
          showticklabels=False,
          title=''
          )

In [None]:
layout = Layout(
         title="Network of coappearances of characters in Victor Hugo's novel<br> Les Miserables (3D visualization)",
         width=1000,
         height=1000,
         showlegend=False,
         scene=Scene(
         xaxis=XAxis(axis),
         yaxis=YAxis(axis),
         zaxis=ZAxis(axis),
        ),
     margin=Margin(
        t=100
    ),
    hovermode='closest',
    annotations=Annotations([
           Annotation(
           showarrow=False,
            text="Data source: <a href='http://bost.ocks.org/mike/miserables/miserables.json'>[1] miserables.json</a>",
            xref='paper',
            yref='paper',
            x=0,
            y=0.1,
            xanchor='left',
            yanchor='bottom',
            font=Font(
            size=14
            )
            )
        ]),    )

In [None]:
data=Data([trace1, trace2])
fig=Figure(data=data, layout=layout)

py.offline.init_notebook_mode(connected=True)

py.offline.iplot(fig, filename='Les-Miserables')