In [None]:
import pixiedust

if sc.version.startswith('1.6.'):  # Spark 1.6
    pixiedust.installPackage("graphframes:graphframes:0.5.0-spark1.6-s_2.11")
elif sc.version.startswith('2.'):  # Spark 2.1, 2.0
    pixiedust.installPackage("graphframes:graphframes:0.5.0-spark2.1-s_2.11")


pixiedust.installPackage("com.typesafe.scala-logging:scala-logging-api_2.11:2.1.2")
pixiedust.installPackage("com.typesafe.scala-logging:scala-logging-slf4j_2.11:2.1.2")

print("done")


In [1]:
import findspark
findspark.init()

from pyspark.sql import SparkSession
from pyspark.sql import SQLContext

# import os

# os.environ['PYSPARK_SUBMIT_ARGS'] = '--packages graphframes:graphframes:0.5.0-spark2.1-s_2.11 pyspark-shell'


# Build the SparkSession
spark = SparkSession.builder \
   .master("local") \
   .appName("sparkPlot") \
   .config("spark.executor.memory", "2gb") \
   .getOrCreate()

# spark.conf.set("spark.jars.packages", "graphframes:graphframes:0.5.0-spark2.1-s_2.11")
sc = spark.sparkContext

sqlContext=SQLContext(sc)

In [177]:
from pyspark.sql.types import StructType, StructField
from pyspark.sql.types import DoubleType, IntegerType, StringType

schema = StructType([
    StructField("ONT_NAME", StringType()),
    StructField("CLASS_IRI", StringType()),
    StructField("PARENT_CLASS_IRI", StringType())
])

df_class_hier= spark.read \
    .schema(schema) \
    .option("header", "true") \
    .option("mode", "DROPMALFORMED") \
    .csv("hdfs://localhost:9000/ONT_CLASS_HIERARCHY.csv")
    
    
print(df_class_hier.count())
df_class_hier=df_class_hier.drop('ONT_NAME').distinct()
print(df_class_hier.count())

809619
800060


In [3]:
# df.show()
df_class_hier.printSchema()

root
 |-- ONT_NAME: string (nullable = true)
 |-- CLASS_IRI: string (nullable = true)
 |-- PARENT_CLASS_IRI: string (nullable = true)



In [4]:
df_class_hier.groupBy("CLASS_IRI").count().sort("count", ascending=False).show()

+--------------------+-----+
|           CLASS_IRI|count|
+--------------------+-----+
|http://www.biopax...|   77|
|http://chem2bio2r...|   76|
|http://www.owl-on...|   33|
|http://www.owl-on...|   33|
|http://www.owl-on...|   33|
|http://www.owl-on...|   33|
|http://www.owl-on...|   33|
|http://www.owl-on...|   33|
|http://www.owl-on...|   33|
|http://www.owl-on...|   33|
|http://www.owl-on...|   33|
|http://www.owl-on...|   33|
|http://www.owl-on...|   33|
|http://www.owl-on...|   33|
|http://www.owl-on...|   33|
|http://www.owl-on...|   33|
|http://www.owl-on...|   33|
|http://www.owl-on...|   33|
|http://www.owl-on...|   33|
|http://www.owl-on...|   33|
+--------------------+-----+
only showing top 20 rows



In [179]:
from pyspark.sql.types import StructType, StructField
from pyspark.sql.types import DoubleType, IntegerType, StringType

schema = StructType([
    StructField("ONT_NAME", StringType()),
    StructField("CLASS_IRI", StringType()),
    StructField("CLASS_LABEL", StringType())
])

df_class_labels= spark.read \
    .schema(schema) \
    .option("header", "true") \
    .option("mode", "DROPMALFORMED") \
    .csv("hdfs://localhost:9000/ONT_CLASS_LABELS.csv")

    
df_class_labels.printSchema()

print(df_class_labels.count())
df_class_labels = df_class_labels.drop("ONT_NAME").distinct()
print(df_class_labels.count())
df_class_labels.distinct().show(10)
print(df_class_labels.select('CLASS_IRI', 'CLASS_LABEL').distinct().count())
print(df_class_labels.select('CLASS_IRI', 'CLASS_LABEL').dropDuplicates().count())

root
 |-- ONT_NAME: string (nullable = true)
 |-- CLASS_IRI: string (nullable = true)
 |-- CLASS_LABEL: string (nullable = true)

711444
709148
+--------------------+--------------------+
|           CLASS_IRI|         CLASS_LABEL|
+--------------------+--------------------+
|http://purl.oboli...|    prezonal element|
|http://purl.oboli...|        metatarsal v|
|http://purl.oboli...|         aao 0010459|
|http://purl.oboli...|endolymphatic system|
|http://purl.oboli...|  anatomical cluster|
|http://purl.oboli...|lamina nariochoan...|
|http://purl.oboli...|            carpal 2|
|http://purl.oboli...|              a8.27*|
|http://purl.org/i...|  integrate-and-fire|
|http://purl.oboli...|speciintealized e...|
+--------------------+--------------------+
only showing top 10 rows

709148
709148
709148


In [184]:
from pyspark.sql.types import StructType, StructField
from pyspark.sql.types import DoubleType, IntegerType, StringType

schema = StructType([
    StructField("ONT_NAME", StringType()),
    StructField("TAX_TYPE", StringType()),
    StructField("AREA_ID", StringType()),
    StructField("AREA_NAME", StringType()),
    StructField("AREA_LEVEL", IntegerType())
])

df_tax_areas= spark.read \
    .schema(schema) \
    .option("header", "true") \
    .option("mode", "DROPMALFORMED") \
    .csv("hdfs://localhost:9000/TAX_AREAS.csv")

print(df_tax_areas.count())
df_tax_areas.printSchema()
print(df_tax_areas.drop('ONT_NAME').distinct().count())
df_tax_areas = df_tax_areas.drop('ONT_NAME').distinct()

1258
root
 |-- ONT_NAME: string (nullable = true)
 |-- TAX_TYPE: string (nullable = true)
 |-- AREA_ID: string (nullable = true)
 |-- AREA_NAME: string (nullable = true)
 |-- AREA_LEVEL: integer (nullable = true)

1114


In [186]:
from pyspark.sql.types import StructType, StructField
from pyspark.sql.types import DoubleType, IntegerType, StringType

schema = StructType([
    StructField("ONT_NAME", StringType()),
    StructField("TAX_TYPE", StringType()),
    StructField("AREA_ID", StringType()),
    StructField("CLASS_IRI", StringType())
])

df_tax_areas_concepts= spark.read \
    .schema(schema) \
    .option("header", "true") \
    .option("mode", "DROPMALFORMED") \
    .csv("hdfs://localhost:9000/TAX_AREAS_CONCEPTS.csv")

print(df_tax_areas_concepts.count())
df_tax_areas_concepts.printSchema()
print(df_tax_areas_concepts.drop('ONT_NAME').distinct().count())
df_tax_areas_concepts = df_tax_areas_concepts.drop('ONT_NAME').distinct()

343737
root
 |-- ONT_NAME: string (nullable = true)
 |-- TAX_TYPE: string (nullable = true)
 |-- AREA_ID: string (nullable = true)
 |-- CLASS_IRI: string (nullable = true)

339472


In [188]:
from pyspark.sql.types import StructType, StructField
from pyspark.sql.types import DoubleType, IntegerType, StringType

schema = StructType([
    StructField("ONT_NAME", StringType()),
    StructField("TAX_TYPE", StringType()),
    StructField("PAREA_ROOT_IRI", StringType()),
    StructField("CLASS_IRI", StringType())
])

df_tax_areas_pareas= spark.read \
    .schema(schema) \
    .option("header", "true") \
    .option("mode", "DROPMALFORMED") \
    .csv("hdfs://localhost:9000/TAX_AREAS_PAREAS.csv")

print(df_tax_areas_pareas.count())
df_tax_areas_pareas.printSchema()
print(df_tax_areas_pareas.drop('ONT_NAME').distinct().count())
df_tax_areas_pareas = df_tax_areas_pareas.drop('ONT_NAME').distinct()

381471
root
 |-- ONT_NAME: string (nullable = true)
 |-- TAX_TYPE: string (nullable = true)
 |-- PAREA_ROOT_IRI: string (nullable = true)
 |-- CLASS_IRI: string (nullable = true)

377019


In [None]:
df_IRI_LABEL=df_class_hier.join(df_class_labels, 'CLASS_IRI').select(df_class_hier.CLASS_IRI, df_class_labels.CLASS_LABEL)

In [None]:
df_IRI_LABEL.count()
df_IRI_LABEL.printSchema()

In [None]:
df_IRI_LABEL.groupBy("CLASS_IRI").count().sort("count", ascending=False).limit(20).toPandas()

In [None]:
df_IRI_LABEL.show(10,truncate= True)
result = df_IRI_LABEL.where(df_IRI_LABEL.CLASS_IRI == 'http://purl.obolibrary.org/obo/iao_0000030').distinct()

In [None]:
print(result.count())
result.collect()[1]['CLASS_LABEL']

In [None]:
for row in result.collect():
    print(row['CLASS_IRI'], row['CLASS_LABEL'])

In [9]:
import re
def isIRIEqualLabel(iri, label):
    iri = iri.split('/')[-1].replace('_',' ')
    return label ==iri 

In [None]:
for row in result.collect():
    if not isIRIEqualLabel(row[0], row[1]):
        print(row[0], row[1])

In [307]:
def getIRI(label):
    result = df_class_labels.filter(df_class_labels.CLASS_LABEL==label).select('CLASS_IRI','CLASS_LABEL').distinct().collect()
    for row in result:
        if not isIRIEqualLabel(row["CLASS_IRI"], row['CLASS_LABEL']):
            return row["CLASS_IRI"]

def getLabel(iri):
    result = df_class_labels.filter(df_class_labels.CLASS_IRI==iri).select('CLASS_IRI','CLASS_LABEL').distinct().collect()
    for row in result:
        if not isIRIEqualLabel(row["CLASS_IRI"], row['CLASS_LABEL']):
            return row["CLASS_LABEL"]

def getParents(iri):
    result =set()
    if iri == 'http://www.w3.org/2002/07/owl#thing':
        return result
    print("get parent for: ", iri) 
    parents = df_class_hier.filter(df_class_hier.CLASS_IRI==iri).distinct().collect()
    for row in parents:
        result.add(row['PARENT_CLASS_IRI'])
#     print(result)
    return result

def getChildren(iri):
    result =set()
    children = df_class_hier.filter(df_class_hier.PARENT_CLASS_IRI==iri).distinct().collect()
    for row in children:
        result.add(row['CLASS_IRI'])
#     print(result)
    return result

def getArea(iri, tax_type = 'op_restriction'):
    result = df_tax_areas_concepts.filter((df_tax_areas_concepts.TAX_TYPE==tax_type)&\
                                          (df_tax_areas_concepts.CLASS_IRI==iri) & \
                                          (df_tax_areas_concepts.AREA_ID!='[empty set]'))\
    .join(df_tax_areas,'AREA_ID').drop(df_tax_areas.TAX_TYPE)
    area = result.select('TAX_TYPE','AREA_ID', 'AREA_NAME', 'AREA_LEVEL').collect()
#     area = result.collect()
    return area

def getOntName(iri):
    return ont

def getPArea(iri, tax_type = 'op_restriction'):
    result = df_tax_areas_pareas.filter((df_tax_areas_pareas.TAX_TYPE==tax_type)&\
                                        (df_tax_areas_pareas.CLASS_IRI==iri) & \
                                          (df_tax_areas_pareas.PAREA_ROOT_IRI!='[empty set]'))
    parea = result.drop('ONT_NAME').distinct().collect()
    return parea

def getAreaLevel(iri, tax_type = 'op_restriction'):
    df_tax_areas_concepts2=df_tax_areas_concepts.filter((df_tax_areas_concepts.TAX_TYPE==tax_type)&\
                                                        (df_tax_areas_concepts.CLASS_IRI == iri)& \
                                                          (df_tax_areas_concepts.AREA_ID!='[empty set]'))
    
    result = df_tax_areas.join(df_tax_areas_concepts2, 'AREA_ID').drop('ONT_NAME').distinct().first()
    
    if not result:
        return 0
    else:
        return result['AREA_LEVEL']


In [291]:
getAreaLevel("http://purl.obolibrary.org/obo/apollo_sv_00000144")

2

In [190]:
print(getIRI("information content entity"))
print(getLabel("http://purl.obolibrary.org/obo/iao_0000030"))

http://purl.obolibrary.org/obo/iao_0000030
information content entity


In [109]:
getArea('http://chem2bio2rdf.org/chem2bio2rdf.owl#bioassay')
getChildren('http://chem2bio2rdf.org/chem2bio2rdf.owl#bioassay')
# result = df_tax_areas_concepts.filter(df_tax_areas_concepts.CLASS_IRI=='http://chem2bio2rdf.org/chem2bio2rdf.owl#bioassay')

# result.filter(df_tax_areas_concepts.AREA_ID!='[empty set]').show()
getParents('http://chem2bio2rdf.org/chem2bio2rdf.owl#bioassay')

['http://www.biopax.org/release/biopax-level3.owl#evidence']

In [101]:
getPArea('http://chem2bio2rdf.org/chem2bio2rdf.owl#bioassay')

[Row(TAX_TYPE='op_restriction', PAREA_ROOT_IRI='http://www.w3.org/2002/07/owl#thing', CLASS_IRI='http://chem2bio2rdf.org/chem2bio2rdf.owl#bioassay')]

In [None]:
df_class_hier
df_class_labels
df_tax_areas
df_tax_areas_concepts
df_tax_areas_pareas

In [191]:
getPArea('http://www.w3.org/2002/07/owl#thing')
getParents('http://www.w3.org/2002/07/owl#thing')
getParents('http://www.ifomis.org/bfo/1.1#entity')

['http://www.w3.org/2002/07/owl#thing']

In [102]:
def getPAreaParent(iri, depth=1):
    result = []
    for i in range(depth):
        for row in getPArea(iri):
            result.append((iri, row['PAREA_ROOT_IRI']))
            result + getPAreaParent(getParents(row['PAREA_ROOT_IRI']))
            
def getAreaParent(iri, depth):
    parents= []

    
    
def getPAreaChildren(iri, depth=1):
    result = []
    for i in range(depth):
        for row in getPArea(iri):
            result.append((row['PAREA_ROOT_IRI'], iri))
            result + getPAreaChilren(getChildren(row['PAREA_ROOT_IRI']))
    

In [302]:
def getAllChildren(iri, visited=None):
    if visited is None:
        visited = set()
    visited.add(iri)
    result = []
    pair = []
    for child_iri in getChildren(iri) - visited:
        print("get child: ",child_iri)
        if child_iri:
            result.append(child_iri)
            pair.append((child_iri, iri))
            result1, pair1 = getAllChildren(child_iri, visited)
            result += result1
            pair += pair1
            visited.add(child_iri)
            
    return result, pair
c_vertices, c_edges = getAllChildren('http://purl.obolibrary.org/obo/chebi_62943')


get child:  http://purl.obolibrary.org/obo/chebi_62945
get child:  http://purl.obolibrary.org/obo/chebi_137988
get child:  http://purl.obolibrary.org/obo/chebi_90696
get child:  http://purl.obolibrary.org/obo/chebi_57445
get child:  http://purl.obolibrary.org/obo/chebi_137989
get child:  http://purl.obolibrary.org/obo/chebi_136539
get child:  http://purl.obolibrary.org/obo/chebi_133769
get child:  http://purl.obolibrary.org/obo/chebi_133768
get child:  http://purl.obolibrary.org/obo/chebi_59326
get child:  http://purl.obolibrary.org/obo/chebi_133132
get child:  http://purl.obolibrary.org/obo/chebi_134577
get child:  http://purl.obolibrary.org/obo/chebi_133374
get child:  http://purl.obolibrary.org/obo/chebi_90827
get child:  http://purl.obolibrary.org/obo/chebi_133422
get child:  http://purl.obolibrary.org/obo/chebi_133134
get child:  http://purl.obolibrary.org/obo/chebi_77768
get child:  http://purl.obolibrary.org/obo/chebi_133392
get child:  http://purl.obolibrary.org/obo/chebi_13340

In [301]:
def getAllParents(iri, visited = None):
    if visited is None:
        visited = set()
    visited.add(iri)

    result = []
    pair = []
    if iri != 'http://www.w3.org/2002/07/owl#thing':
        for parent_iri in getParents(iri) - visited:
            print("get parent: ", parent_iri)
            if parent_iri:
                result.append(parent_iri)
                pair.append((iri, parent_iri))
                result1, pair1 = getAllParents(parent_iri, visited)
                result += result1
                pair += pair1
                visited.add(parent_iri)
    return result, pair

p_vertices, p_edges = getAllParents('http://purl.obolibrary.org/obo/chebi_62943')


get parent for:  http://purl.obolibrary.org/obo/chebi_62943
get parent:  http://purl.obolibrary.org/obo/chebi_2580
get parent for:  http://purl.obolibrary.org/obo/chebi_2580
get parent:  http://purl.obolibrary.org/obo/chebi_28868
get parent for:  http://purl.obolibrary.org/obo/chebi_28868
get parent:  http://purl.obolibrary.org/obo/chebi_35757
get parent for:  http://purl.obolibrary.org/obo/chebi_35757
get parent:  http://purl.obolibrary.org/obo/chebi_29067
get parent for:  http://purl.obolibrary.org/obo/chebi_29067
get parent:  http://purl.obolibrary.org/obo/chebi_25696
get parent for:  http://purl.obolibrary.org/obo/chebi_25696
get parent:  http://purl.obolibrary.org/obo/chebi_22563
get parent for:  http://purl.obolibrary.org/obo/chebi_22563
get parent:  http://purl.obolibrary.org/obo/chebi_24870
get parent for:  http://purl.obolibrary.org/obo/chebi_24870
get parent:  http://purl.obolibrary.org/obo/chebi_23367
get parent for:  http://purl.obolibrary.org/obo/chebi_23367
get parent:  h

In [303]:
import igraph as ig
g = ig.Graph()


In [304]:
g.add_vertex(name = 'http://purl.obolibrary.org/obo/chebi_62943')
g.add_vertices(p_vertices)

# for vertex in vertices:
#     g.add_vertex(name=vertex)

N=g.vcount()
print('total number of vertices imported: ' , N)
print(p_edges)

g.add_edges(p_edges)

L= g.ecount()
print('added # of edges: ', L)

total number of vertices imported:  41
[('http://purl.obolibrary.org/obo/chebi_62943', 'http://purl.obolibrary.org/obo/chebi_2580'), ('http://purl.obolibrary.org/obo/chebi_2580', 'http://purl.obolibrary.org/obo/chebi_28868'), ('http://purl.obolibrary.org/obo/chebi_28868', 'http://purl.obolibrary.org/obo/chebi_35757'), ('http://purl.obolibrary.org/obo/chebi_35757', 'http://purl.obolibrary.org/obo/chebi_29067'), ('http://purl.obolibrary.org/obo/chebi_29067', 'http://purl.obolibrary.org/obo/chebi_25696'), ('http://purl.obolibrary.org/obo/chebi_25696', 'http://purl.obolibrary.org/obo/chebi_22563'), ('http://purl.obolibrary.org/obo/chebi_22563', 'http://purl.obolibrary.org/obo/chebi_24870'), ('http://purl.obolibrary.org/obo/chebi_24870', 'http://purl.obolibrary.org/obo/chebi_23367'), ('http://purl.obolibrary.org/obo/chebi_23367', 'http://purl.obolibrary.org/obo/chebi_24431'), ('http://purl.obolibrary.org/obo/chebi_24431', 'http://www.ifomis.org/bfo/1.1/snap#materialentity'), ('http://www.if

In [305]:
# g.add_vertex(name = 'http://purl.obolibrary.org/obo/chebi_62943')
g.add_vertices(c_vertices)

# for vertex in vertices:
#     g.add_vertex(name=vertex)

N=g.vcount()
print('total number of vertices imported: ' , N)
print(c_edges)

g.add_edges(c_edges)

L= g.ecount()
print('added # of edges: ', L)

total number of vertices imported:  101
[('http://purl.obolibrary.org/obo/chebi_62945', 'http://purl.obolibrary.org/obo/chebi_62943'), ('http://purl.obolibrary.org/obo/chebi_137988', 'http://purl.obolibrary.org/obo/chebi_62945'), ('http://purl.obolibrary.org/obo/chebi_90696', 'http://purl.obolibrary.org/obo/chebi_62945'), ('http://purl.obolibrary.org/obo/chebi_57445', 'http://purl.obolibrary.org/obo/chebi_62945'), ('http://purl.obolibrary.org/obo/chebi_137989', 'http://purl.obolibrary.org/obo/chebi_57445'), ('http://purl.obolibrary.org/obo/chebi_136539', 'http://purl.obolibrary.org/obo/chebi_62945'), ('http://purl.obolibrary.org/obo/chebi_133769', 'http://purl.obolibrary.org/obo/chebi_62943'), ('http://purl.obolibrary.org/obo/chebi_133768', 'http://purl.obolibrary.org/obo/chebi_62943'), ('http://purl.obolibrary.org/obo/chebi_59326', 'http://purl.obolibrary.org/obo/chebi_62943'), ('http://purl.obolibrary.org/obo/chebi_133132', 'http://purl.obolibrary.org/obo/chebi_59326'), ('http://purl

In [308]:
labels=[]
group=[]
for node in g.vs:
    labels.append(getLabel(node['name']))
    group.append(getAreaLevel(node['name']))

In [229]:
for i in g.vs:
    print(i)

igraph.Vertex(<igraph.Graph object at 0x7f3e6ce7fd68>, 0, {'name': 'http://purl.obolibrary.org/obo/chebi_62943'})
igraph.Vertex(<igraph.Graph object at 0x7f3e6ce7fd68>, 1, {'name': 'http://purl.obolibrary.org/obo/chebi_2580'})
igraph.Vertex(<igraph.Graph object at 0x7f3e6ce7fd68>, 2, {'name': 'http://purl.obolibrary.org/obo/chebi_28868'})
igraph.Vertex(<igraph.Graph object at 0x7f3e6ce7fd68>, 3, {'name': 'http://purl.obolibrary.org/obo/chebi_35757'})
igraph.Vertex(<igraph.Graph object at 0x7f3e6ce7fd68>, 4, {'name': 'http://purl.obolibrary.org/obo/chebi_29067'})
igraph.Vertex(<igraph.Graph object at 0x7f3e6ce7fd68>, 5, {'name': 'http://purl.obolibrary.org/obo/chebi_25696'})
igraph.Vertex(<igraph.Graph object at 0x7f3e6ce7fd68>, 6, {'name': 'http://purl.obolibrary.org/obo/chebi_22563'})
igraph.Vertex(<igraph.Graph object at 0x7f3e6ce7fd68>, 7, {'name': 'http://purl.obolibrary.org/obo/chebi_24870'})
igraph.Vertex(<igraph.Graph object at 0x7f3e6ce7fd68>, 8, {'name': 'http://purl.obolibrar

In [309]:
layt=g.layout_auto(dim=3)
layt[5]

[123.57665174734694, -77.70050841918419, -250.0532535132881]

In [310]:
Xn=[layt[k][0] for k in range(N)]# x-coordinates of nodes
Yn=[layt[k][1] for k in range(N)]# y-coordinates
Zn=[layt[k][2] for k in range(N)]# z-coordinates
Xe=[]
Ye=[]
Ze=[]
for e in g.es:
    e=e.tuple
    Xe+=[layt[e[0]][0],layt[e[1]][0], None]# x-coordinates of edge ends
    Ye+=[layt[e[0]][1],layt[e[1]][1], None]
    Ze+=[layt[e[0]][2],layt[e[1]][2], None]

In [311]:
import plotly as py
from plotly.graph_objs import *

In [312]:
trace1=Scatter3d(x=Xe,
               y=Ye,
               z=Ze,
               mode='lines',
               line=Line(color='rgb(125,125,125)', width=1),
               hoverinfo='none'
               )
trace2=Scatter3d(x=Xn,
               y=Yn,
               z=Zn,
               mode='markers',
               name='actors',
               marker=Marker(symbol='dot',
                             size=6,
                             color=group,
                             colorscale='Viridis',
                             line=Line(color='rgb(50,50,50)', width=0.5)
                             ),
               text=labels,
               hoverinfo='text'
               )

In [313]:
axis=dict(showbackground=False,
          showline=False,
          zeroline=False,
          showgrid=False,
          showticklabels=False,
          title=''
          )

In [314]:
layout = Layout(
         title="Network of coappearances of characters in Victor Hugo's novel<br> Les Miserables (3D visualization)",
         width=1000,
         height=1000,
         showlegend=False,
         scene=Scene(
         xaxis=XAxis(axis),
         yaxis=YAxis(axis),
         zaxis=ZAxis(axis),
        ),
     margin=Margin(
        t=100
    ),
    hovermode='closest',
    annotations=Annotations([
           Annotation(
           showarrow=False,
            text="Data source: <a href='http://bost.ocks.org/mike/miserables/miserables.json'>[1] miserables.json</a>",
            xref='paper',
            yref='paper',
            x=0,
            y=0.1,
            xanchor='left',
            yanchor='bottom',
            font=Font(
            size=14
            )
            )
        ]),    )

In [315]:
data=Data([trace1, trace2])
fig=Figure(data=data, layout=layout)

py.offline.init_notebook_mode(connected=True)

py.offline.iplot(fig, filename='Les-Miserables')