# SciPi Spark Implementation - Part 2

## Load CSV in Neo4j

#### Joseph Azzopardi & Andrew Cachia

In [1]:
from py2neo import Graph
from py2neo import Node
from py2neo import Relationship

In [2]:
import time
start = time.time()

# Connection Configuration

In [3]:
#connect to Neo4j API

def neo4jConnect(IP, boltPort, username, pwd, httpPort):
    bolturl = "bolt://" + IP + ":" + boltPort
    mygraph = Graph(bolturl, user=username, password=pwd, bolt=True, secure = False, http_port = httpPort)
    #mygraph = Graph("bolt://40.114.206.146:7697", user="neo4j", password="joseph", bolt=True, secure = False, http_port = 7484)
    print (mygraph)
                    
    return mygraph

In [4]:
#Connect to Neo4j Enterprise on Azure

IP = "10.99.99.10" # IP Address of Neo4j Container
boltPort = "7687"
httpPort = 7474
user = "neo4j"
pwd  = "test"

mygraph = neo4jConnect(IP, boltPort, user, pwd, httpPort)

<Graph database=<Database uri='bolt://10.99.99.10:7687' secure=False user_agent='py2neo/4.3.0 neobolt/1.7.13 Python/3.5.3-final-0 (linux)'> name='data'>


# Create Constraints

In [5]:
cquery1 = """
CREATE 
CONSTRAINT ON (n:Author) 
ASSERT n.authorid IS UNIQUE
"""

cquery2 = """
CREATE 
CONSTRAINT ON (n:Paper)
ASSERT n.paperid IS UNIQUE
"""

cquery3 = """
CREATE 
CONSTRAINT ON (n:Publishers)
ASSERT n.publisherid IS UNIQUE
"""

cquery4 = """
CREATE 
CONSTRAINT ON (n:ConfInstance) 
ASSERT n.confid IS UNIQUE
"""

cquery5 = """
CREATE 
CONSTRAINT ON (n:Journal)
ASSERT n.journalid IS UNIQUE
"""

cquery6 = """
CREATE 
CONSTRAINT ON (n:Institution)
ASSERT n.institutionid IS UNIQUE
"""

cquery7 = """
CREATE 
CONSTRAINT ON (n:Keyword) 
ASSERT n.name IS UNIQUE
"""

tx = mygraph.begin()
tx.run (cquery1)
tx.run (cquery2)
tx.run (cquery3)
tx.run (cquery4)
tx.run (cquery5)
tx.run (cquery6)
tx.run (cquery7)
tx.commit()

## Node Queries

#### Author Nodes 

In [6]:
# sample: 2797,Elena Frantova,3,28

query_author_nodes = """
    USING PERIODIC COMMIT 1000
    LOAD CSV FROM {csvfile}  AS line
    MERGE (:Author { authorid: line[0], name: line[1] } )
    """
#CREATE (:author { authorid: line[0], name: line[1], PaperCount: toInteger(line[2]), CiteCount: toInteger(line[3]) })

#### Paper Nodes 

In [7]:
# sample: 1979425243,Journal,Kinesin superfamily protein member 4 (KIF4) is localized to midzone and midbody in dividing cells,2004

query_paper_nodes = """
        USING PERIODIC COMMIT 1000
        LOAD CSV FROM {csvfile}  AS line
        MERGE (:Paper{ paperid: line[0], doc_type:line[1], title:line[2], year: line[3] } )
        """

##### Publisher Nodes

In [8]:
# sample: 8589934592,Western Economic Association International

query_publisher_nodes = """
        USING PERIODIC COMMIT 1000
        LOAD CSV FROM {csvfile}  AS line
        MERGE (:Publishers { publisherid: line[0], name:line[1] })
        """

##### ConferenceInstance  Nodes

In [9]:
# sample: 31227610,eurocon 2011,"Lisbon, Portugal"

query_conferenceinstance_nodes = """
        USING PERIODIC COMMIT 1000
        LOAD CSV FROM {csvfile}  AS line
        MERGE (:ConfInstance { confid: line[0], name:line[1], location:line[2] })
        """

#### Journal Nodes

In [10]:
# sample: 18204665,international journal of multiphase flow

query_journal_nodes = """
        USING PERIODIC COMMIT 1000
        LOAD CSV FROM {csvfile}  AS line
        MERGE (:Journal { journalid: line[0], name:line[1] })
        """

##### Institutions Nodes

In [11]:
# sample: 4,USSR Academy of Medical Sciences

query_institution_nodes = """
        USING PERIODIC COMMIT 1000
        LOAD CSV FROM {csvfile}  AS line
        MERGE (:Institution { institutionid: line[0], name:line[1] })
        """

##### Keywords

In [12]:
query_keywords = """
        USING PERIODIC COMMIT 5000
        LOAD CSV FROM {csvfile}  AS line
        MERGE (paper:Paper{paperid:line[0]})
        MERGE (keyword:Keyword{name:line[1]})
        MERGE (paper)-[:Contains]->(keyword)
        """

## Relationships Queries

##### Author-Author Relationships - Author Collaborations

In [13]:
# sample: 

#query_rel_auth_auth = """
#        USING PERIODIC COMMIT 1000
#        LOAD CSV FROM {csvfile}  AS line
#        MATCH (a:Author { authorid: line[0] })
#        MATCH (b:Author { authorid: line[1] })
#        CREATE (a)-[r:co_author { collaborations: line[2] }]->(b);
#        """

query_rel_auth_auth = """
        USING PERIODIC COMMIT 1000
        LOAD CSV FROM {csvfile}  AS line
        MATCH (a:Author { authorid: line[0] })
        MATCH (b:Author { authorid: line[1] })
        CREATE (a)-[r:co_author]->(b);
        """

##### Paper to Publisher Relationship

In [14]:
# sample:  1968760085,1709396983808   (paperId, publisherId)

query_rel_pub_paper = """
        USING PERIODIC COMMIT 1000
        LOAD CSV FROM {csvfile}  AS line
        MATCH (p:Paper {paperid: line[0]} )
        MATCH (publisher:Publishers {publisherid: line[1]} )
        CREATE (p)-[:published_by ]->(publisher);
        """

##### Paper to ConfInstance Relationship

In [15]:
# sample: 2140101510,2624888355   (paperId, conferenceInstanceId)

query_rel_conf_paper = """
        USING PERIODIC COMMIT 1000
        LOAD CSV FROM {csvfile}  AS line
        MATCH (p:Paper {paperid: line[0]} )
        MATCH (c:ConfInstance {confid: line[1]} )
        CREATE (p)-[:conf_part_of]->(c);
        """

##### Paper to Journal Relationship

In [16]:
# sample: 2374592160,2764507941 (paperId, journalId)

query_rel_journal_paper = """
        USING PERIODIC COMMIT 1000
        LOAD CSV FROM {csvfile}  AS line
        MATCH (p:Paper {paperid: line[0]} )
        MATCH (j:Journal {journalid: line[1]} )
        CREATE (p)-[:journal_part_of]->(j);
        """

##### Paper-Author Relationships

In [17]:
# sample author:    15,199142497,A      (paperid, authorid, relationship_type)
# sample co-author: 15,680395887,CO_A   (paperid, authorid, relationship_type) 


query_rel_paper_author = """
        USING PERIODIC COMMIT 1000
        LOAD CSV FROM {csvfile}  AS line
        MATCH (p:Paper { paperid: line[0]})
        MATCH (a:Author { authorid: line[1] })
        CREATE (p)-[ :Authored{Type:line[2]} ]->(a);
        """


##### Author-Institution Relationships

In [18]:
# sample: 2430849057,93   (distData.authorId, distData.institutionId)

query_rel_author_inst = """
        USING PERIODIC COMMIT 1000
        LOAD CSV FROM {csvfile}  AS line
        MATCH (a:Author {authorid: line[0]} )
        MATCH (i:Institution {institutionid: line[1]} )
        CREATE (a)-[:member_of]->(i);
        """



## Import Csv To Neo4j Function

In [19]:
def importCsv(filename, query):
    print(filename)
    #csv_file_base = "https://ics5114mag.blob.core.windows.net/parsed-csv-files/"
    csv_file_base = "file:///"
    csvfile = csv_file_base + filename
    
    params = { "csvfile":csvfile }
    mygraph.run(query, parameters=params )

# Loading Csv Files

In [20]:
### From Azure
#from azure.storage.blob import BlockBlobService
#blob_service = BlockBlobService(account_name="ics5114mag",account_key="ConHeKBYxAuTLZbVQsLM5ltqGmxv8aCrDoRylQGcus/P4yEqlzluItdl/5z8ZG3NXdyJ/f2Aye39ZMkHdGQwSg==")
#generator = blob_service.list_blobs("parsed-csv-files")

### Locally
import os
root_dir = "/home/data"
generator = set()

for dir_, _, files in os.walk(root_dir):
    for file_name in files:
        rel_dir = os.path.relpath(dir_, root_dir)
        rel_file = os.path.join(rel_dir, file_name)
        generator.add(rel_file)

In [21]:
def filterCsv(directory, blob, query):
    #filename = blob.name
    filename = blob
    if (filename.endswith(".csv")):
        if (filename.startswith("results/" + directory)):
            importCsv(filename, query)

In [22]:
for blob in generator:
        filterCsv("authors/", blob, query_author_nodes)

results/authors/part-00001-e0c7c581-b0b8-418d-b02d-8a45c1d9ca12-c000.csv
results/authors/part-00000-e0c7c581-b0b8-418d-b02d-8a45c1d9ca12-c000.csv


In [23]:
for blob in generator:
        filterCsv("papers/", blob, query_paper_nodes)

In [24]:
for blob in generator:
        filterCsv("publishers/", blob, query_publisher_nodes)

In [25]:
for blob in generator:
        filterCsv("conferenceinstance/", blob, query_conferenceinstance_nodes)

results/conferenceinstance/part-00001-2a0ff3e7-4ba6-4f73-9440-e33e89d68770-c000.csv
results/conferenceinstance/part-00000-2a0ff3e7-4ba6-4f73-9440-e33e89d68770-c000.csv


In [26]:
for blob in generator:
        filterCsv("journals/", blob, query_journal_nodes)

results/journals/part-00000-d290666a-55a6-4ab6-9207-415d5dac4d6e-c000.csv
results/journals/part-00001-d290666a-55a6-4ab6-9207-415d5dac4d6e-c000.csv


In [27]:
for blob in generator:
        filterCsv("institutions/", blob, query_institution_nodes)

results/institutions/part-00066-5c86c038-4309-4bc4-9a26-c50b419ff5dd-c000.csv
results/institutions/part-00072-5c86c038-4309-4bc4-9a26-c50b419ff5dd-c000.csv
results/institutions/part-00190-5c86c038-4309-4bc4-9a26-c50b419ff5dd-c000.csv
results/institutions/part-00104-5c86c038-4309-4bc4-9a26-c50b419ff5dd-c000.csv
results/institutions/part-00159-5c86c038-4309-4bc4-9a26-c50b419ff5dd-c000.csv
results/institutions/part-00130-5c86c038-4309-4bc4-9a26-c50b419ff5dd-c000.csv
results/institutions/part-00019-5c86c038-4309-4bc4-9a26-c50b419ff5dd-c000.csv
results/institutions/part-00161-5c86c038-4309-4bc4-9a26-c50b419ff5dd-c000.csv
results/institutions/part-00143-5c86c038-4309-4bc4-9a26-c50b419ff5dd-c000.csv
results/institutions/part-00088-5c86c038-4309-4bc4-9a26-c50b419ff5dd-c000.csv
results/institutions/part-00158-5c86c038-4309-4bc4-9a26-c50b419ff5dd-c000.csv
results/institutions/part-00102-5c86c038-4309-4bc4-9a26-c50b419ff5dd-c000.csv
results/institutions/part-00179-5c86c038-4309-4bc4-9a26-c50b419f

results/institutions/part-00096-5c86c038-4309-4bc4-9a26-c50b419ff5dd-c000.csv
results/institutions/part-00097-5c86c038-4309-4bc4-9a26-c50b419ff5dd-c000.csv
results/institutions/part-00139-5c86c038-4309-4bc4-9a26-c50b419ff5dd-c000.csv
results/institutions/part-00051-5c86c038-4309-4bc4-9a26-c50b419ff5dd-c000.csv
results/institutions/part-00178-5c86c038-4309-4bc4-9a26-c50b419ff5dd-c000.csv
results/institutions/part-00068-5c86c038-4309-4bc4-9a26-c50b419ff5dd-c000.csv
results/institutions/part-00049-5c86c038-4309-4bc4-9a26-c50b419ff5dd-c000.csv
results/institutions/part-00189-5c86c038-4309-4bc4-9a26-c50b419ff5dd-c000.csv
results/institutions/part-00141-5c86c038-4309-4bc4-9a26-c50b419ff5dd-c000.csv
results/institutions/part-00124-5c86c038-4309-4bc4-9a26-c50b419ff5dd-c000.csv
results/institutions/part-00173-5c86c038-4309-4bc4-9a26-c50b419ff5dd-c000.csv
results/institutions/part-00170-5c86c038-4309-4bc4-9a26-c50b419ff5dd-c000.csv
results/institutions/part-00166-5c86c038-4309-4bc4-9a26-c50b419f

In [28]:
for blob in generator:
        filterCsv("keywords/", blob, query_keywords)

results/keywords/part-00042-f296c383-fcc2-4d94-80b7-e6994d35af79-c000.csv
results/keywords/part-00130-f296c383-fcc2-4d94-80b7-e6994d35af79-c000.csv
results/keywords/part-00011-f296c383-fcc2-4d94-80b7-e6994d35af79-c000.csv
results/keywords/part-00091-f296c383-fcc2-4d94-80b7-e6994d35af79-c000.csv
results/keywords/part-00099-f296c383-fcc2-4d94-80b7-e6994d35af79-c000.csv
results/keywords/part-00185-f296c383-fcc2-4d94-80b7-e6994d35af79-c000.csv
results/keywords/part-00159-f296c383-fcc2-4d94-80b7-e6994d35af79-c000.csv
results/keywords/part-00076-f296c383-fcc2-4d94-80b7-e6994d35af79-c000.csv
results/keywords/part-00008-f296c383-fcc2-4d94-80b7-e6994d35af79-c000.csv
results/keywords/part-00187-f296c383-fcc2-4d94-80b7-e6994d35af79-c000.csv
results/keywords/part-00132-f296c383-fcc2-4d94-80b7-e6994d35af79-c000.csv
results/keywords/part-00139-f296c383-fcc2-4d94-80b7-e6994d35af79-c000.csv
results/keywords/part-00019-f296c383-fcc2-4d94-80b7-e6994d35af79-c000.csv
results/keywords/part-00134-f296c383-f

results/keywords/part-00165-f296c383-fcc2-4d94-80b7-e6994d35af79-c000.csv
results/keywords/part-00003-f296c383-fcc2-4d94-80b7-e6994d35af79-c000.csv
results/keywords/part-00136-f296c383-fcc2-4d94-80b7-e6994d35af79-c000.csv
results/keywords/part-00129-f296c383-fcc2-4d94-80b7-e6994d35af79-c000.csv
results/keywords/part-00001-f296c383-fcc2-4d94-80b7-e6994d35af79-c000.csv
results/keywords/part-00128-f296c383-fcc2-4d94-80b7-e6994d35af79-c000.csv
results/keywords/part-00104-f296c383-fcc2-4d94-80b7-e6994d35af79-c000.csv
results/keywords/part-00127-f296c383-fcc2-4d94-80b7-e6994d35af79-c000.csv
results/keywords/part-00182-f296c383-fcc2-4d94-80b7-e6994d35af79-c000.csv
results/keywords/part-00032-f296c383-fcc2-4d94-80b7-e6994d35af79-c000.csv
results/keywords/part-00169-f296c383-fcc2-4d94-80b7-e6994d35af79-c000.csv
results/keywords/part-00192-f296c383-fcc2-4d94-80b7-e6994d35af79-c000.csv
results/keywords/part-00152-f296c383-fcc2-4d94-80b7-e6994d35af79-c000.csv
results/keywords/part-00183-f296c383-f

In [29]:
for blob in generator:
        filterCsv("paper-publisher-rel/", blob, query_rel_pub_paper)

results/paper-publisher-rel/part-00142-b09f7468-57ab-4fe7-93b0-498f3b46b0dd-c000.csv
results/paper-publisher-rel/part-00019-b09f7468-57ab-4fe7-93b0-498f3b46b0dd-c000.csv
results/paper-publisher-rel/part-00163-b09f7468-57ab-4fe7-93b0-498f3b46b0dd-c000.csv
results/paper-publisher-rel/part-00037-b09f7468-57ab-4fe7-93b0-498f3b46b0dd-c000.csv
results/paper-publisher-rel/part-00063-b09f7468-57ab-4fe7-93b0-498f3b46b0dd-c000.csv
results/paper-publisher-rel/part-00025-b09f7468-57ab-4fe7-93b0-498f3b46b0dd-c000.csv
results/paper-publisher-rel/part-00043-b09f7468-57ab-4fe7-93b0-498f3b46b0dd-c000.csv
results/paper-publisher-rel/part-00049-b09f7468-57ab-4fe7-93b0-498f3b46b0dd-c000.csv
results/paper-publisher-rel/part-00154-b09f7468-57ab-4fe7-93b0-498f3b46b0dd-c000.csv
results/paper-publisher-rel/part-00034-b09f7468-57ab-4fe7-93b0-498f3b46b0dd-c000.csv
results/paper-publisher-rel/part-00135-b09f7468-57ab-4fe7-93b0-498f3b46b0dd-c000.csv
results/paper-publisher-rel/part-00197-b09f7468-57ab-4fe7-93b0-49

results/paper-publisher-rel/part-00170-b09f7468-57ab-4fe7-93b0-498f3b46b0dd-c000.csv
results/paper-publisher-rel/part-00147-b09f7468-57ab-4fe7-93b0-498f3b46b0dd-c000.csv
results/paper-publisher-rel/part-00077-b09f7468-57ab-4fe7-93b0-498f3b46b0dd-c000.csv
results/paper-publisher-rel/part-00137-b09f7468-57ab-4fe7-93b0-498f3b46b0dd-c000.csv
results/paper-publisher-rel/part-00006-b09f7468-57ab-4fe7-93b0-498f3b46b0dd-c000.csv
results/paper-publisher-rel/part-00138-b09f7468-57ab-4fe7-93b0-498f3b46b0dd-c000.csv
results/paper-publisher-rel/part-00009-b09f7468-57ab-4fe7-93b0-498f3b46b0dd-c000.csv
results/paper-publisher-rel/part-00045-b09f7468-57ab-4fe7-93b0-498f3b46b0dd-c000.csv
results/paper-publisher-rel/part-00109-b09f7468-57ab-4fe7-93b0-498f3b46b0dd-c000.csv
results/paper-publisher-rel/part-00111-b09f7468-57ab-4fe7-93b0-498f3b46b0dd-c000.csv
results/paper-publisher-rel/part-00069-b09f7468-57ab-4fe7-93b0-498f3b46b0dd-c000.csv
results/paper-publisher-rel/part-00005-b09f7468-57ab-4fe7-93b0-49

In [30]:
for blob in generator:
        filterCsv("paper-confinstance-rel/", blob, query_rel_conf_paper)

results/paper-confinstance-rel/part-00001-b49addfe-0ebe-45db-b866-87857762eef6-c000.csv
results/paper-confinstance-rel/part-00000-b49addfe-0ebe-45db-b866-87857762eef6-c000.csv


In [31]:
for blob in generator:
        filterCsv("paper-journal-rel/", blob, query_rel_journal_paper)

results/paper-journal-rel/part-00001-dab04474-5556-4182-bcf5-5ff1723bbf90-c000.csv
results/paper-journal-rel/part-00000-dab04474-5556-4182-bcf5-5ff1723bbf90-c000.csv


In [32]:
for blob in generator:
        filterCsv("papers-author-rel/", blob, query_rel_paper_author)

results/papers-author-rel/part-00000-91071011-a86f-4d3c-aabb-7268db5e90ef-c000.csv
results/papers-author-rel/part-00001-91071011-a86f-4d3c-aabb-7268db5e90ef-c000.csv


In [33]:
for blob in generator:
        filterCsv("author-institution-rel/", blob, query_rel_author_inst)

results/author-institution-rel/part-00103-41f01c89-dd28-4322-914d-39b5d6057b48-c000.csv
results/author-institution-rel/part-00076-41f01c89-dd28-4322-914d-39b5d6057b48-c000.csv
results/author-institution-rel/part-00053-41f01c89-dd28-4322-914d-39b5d6057b48-c000.csv
results/author-institution-rel/part-00177-41f01c89-dd28-4322-914d-39b5d6057b48-c000.csv
results/author-institution-rel/part-00124-41f01c89-dd28-4322-914d-39b5d6057b48-c000.csv
results/author-institution-rel/part-00159-41f01c89-dd28-4322-914d-39b5d6057b48-c000.csv
results/author-institution-rel/part-00127-41f01c89-dd28-4322-914d-39b5d6057b48-c000.csv
results/author-institution-rel/part-00036-41f01c89-dd28-4322-914d-39b5d6057b48-c000.csv
results/author-institution-rel/part-00107-41f01c89-dd28-4322-914d-39b5d6057b48-c000.csv
results/author-institution-rel/part-00037-41f01c89-dd28-4322-914d-39b5d6057b48-c000.csv
results/author-institution-rel/part-00083-41f01c89-dd28-4322-914d-39b5d6057b48-c000.csv
results/author-institution-rel/p

results/author-institution-rel/part-00135-41f01c89-dd28-4322-914d-39b5d6057b48-c000.csv
results/author-institution-rel/part-00061-41f01c89-dd28-4322-914d-39b5d6057b48-c000.csv
results/author-institution-rel/part-00198-41f01c89-dd28-4322-914d-39b5d6057b48-c000.csv
results/author-institution-rel/part-00154-41f01c89-dd28-4322-914d-39b5d6057b48-c000.csv
results/author-institution-rel/part-00167-41f01c89-dd28-4322-914d-39b5d6057b48-c000.csv
results/author-institution-rel/part-00001-41f01c89-dd28-4322-914d-39b5d6057b48-c000.csv
results/author-institution-rel/part-00187-41f01c89-dd28-4322-914d-39b5d6057b48-c000.csv
results/author-institution-rel/part-00019-41f01c89-dd28-4322-914d-39b5d6057b48-c000.csv
results/author-institution-rel/part-00097-41f01c89-dd28-4322-914d-39b5d6057b48-c000.csv
results/author-institution-rel/part-00196-41f01c89-dd28-4322-914d-39b5d6057b48-c000.csv
results/author-institution-rel/part-00021-41f01c89-dd28-4322-914d-39b5d6057b48-c000.csv
results/author-institution-rel/p

results/author-institution-rel/part-00060-41f01c89-dd28-4322-914d-39b5d6057b48-c000.csv
results/author-institution-rel/part-00180-41f01c89-dd28-4322-914d-39b5d6057b48-c000.csv
results/author-institution-rel/part-00116-41f01c89-dd28-4322-914d-39b5d6057b48-c000.csv
results/author-institution-rel/part-00164-41f01c89-dd28-4322-914d-39b5d6057b48-c000.csv
results/author-institution-rel/part-00045-41f01c89-dd28-4322-914d-39b5d6057b48-c000.csv
results/author-institution-rel/part-00033-41f01c89-dd28-4322-914d-39b5d6057b48-c000.csv
results/author-institution-rel/part-00007-41f01c89-dd28-4322-914d-39b5d6057b48-c000.csv
results/author-institution-rel/part-00194-41f01c89-dd28-4322-914d-39b5d6057b48-c000.csv
results/author-institution-rel/part-00050-41f01c89-dd28-4322-914d-39b5d6057b48-c000.csv
results/author-institution-rel/part-00119-41f01c89-dd28-4322-914d-39b5d6057b48-c000.csv
results/author-institution-rel/part-00125-41f01c89-dd28-4322-914d-39b5d6057b48-c000.csv
results/author-institution-rel/p

In [34]:
for blob in generator:
        filterCsv("author-author-rel/", blob, query_rel_auth_auth)

results/author-author-rel/part-00039-2897fccc-9139-4150-929f-f563c6bb0760-c000.csv
results/author-author-rel/part-00023-2897fccc-9139-4150-929f-f563c6bb0760-c000.csv
results/author-author-rel/part-00014-2897fccc-9139-4150-929f-f563c6bb0760-c000.csv
results/author-author-rel/part-00031-2897fccc-9139-4150-929f-f563c6bb0760-c000.csv
results/author-author-rel/part-00142-2897fccc-9139-4150-929f-f563c6bb0760-c000.csv
results/author-author-rel/part-00161-2897fccc-9139-4150-929f-f563c6bb0760-c000.csv
results/author-author-rel/part-00036-2897fccc-9139-4150-929f-f563c6bb0760-c000.csv
results/author-author-rel/part-00051-2897fccc-9139-4150-929f-f563c6bb0760-c000.csv
results/author-author-rel/part-00098-2897fccc-9139-4150-929f-f563c6bb0760-c000.csv
results/author-author-rel/part-00191-2897fccc-9139-4150-929f-f563c6bb0760-c000.csv
results/author-author-rel/part-00121-2897fccc-9139-4150-929f-f563c6bb0760-c000.csv
results/author-author-rel/part-00067-2897fccc-9139-4150-929f-f563c6bb0760-c000.csv
resu

results/author-author-rel/part-00065-2897fccc-9139-4150-929f-f563c6bb0760-c000.csv
results/author-author-rel/part-00144-2897fccc-9139-4150-929f-f563c6bb0760-c000.csv
results/author-author-rel/part-00106-2897fccc-9139-4150-929f-f563c6bb0760-c000.csv
results/author-author-rel/part-00192-2897fccc-9139-4150-929f-f563c6bb0760-c000.csv
results/author-author-rel/part-00104-2897fccc-9139-4150-929f-f563c6bb0760-c000.csv
results/author-author-rel/part-00052-2897fccc-9139-4150-929f-f563c6bb0760-c000.csv
results/author-author-rel/part-00119-2897fccc-9139-4150-929f-f563c6bb0760-c000.csv
results/author-author-rel/part-00062-2897fccc-9139-4150-929f-f563c6bb0760-c000.csv
results/author-author-rel/part-00139-2897fccc-9139-4150-929f-f563c6bb0760-c000.csv
results/author-author-rel/part-00130-2897fccc-9139-4150-929f-f563c6bb0760-c000.csv
results/author-author-rel/part-00019-2897fccc-9139-4150-929f-f563c6bb0760-c000.csv
results/author-author-rel/part-00155-2897fccc-9139-4150-929f-f563c6bb0760-c000.csv
resu

In [35]:
end = time.time()
print(end - start)

106.38267493247986
