# Introduction

In [3]:
%load_ext cypher 

The cypher extension is already loaded. To reload it, use:
  %reload_ext cypher


In [4]:
from py2neo import Graph, Node, Relationship
import neo4j_scripts
import scripts.visTreebank
from scripts.visTreebank import drawSent, vis_network
import pandas as pd
import matplotlib as mp
import sys
import graphAGDT.objects as gagdt

In [4]:
from IPython.display import IFrame, HTML

In [5]:
g = Graph(password="boston4ever")
conn = "http://neo4j:boston4ever@localhost:7474/db/data"

Some basics:

https://github.com/nicolewhite/neo4j-jupyter/blob/master/hello-world.ipynb

# Run Cypher Queries

You can run a cypher query simply like this:

In [6]:
%%cypher {conn}
MATCH (n:Token) RETURN count(n) as `N. of Sentences`

1 rows affected.


N. of Sentences
281065


Or you can use the Panda's dataframe import functionalities

In [7]:
res = %cypher {conn} MATCH (s:Sentence) \
              RETURN s.work as Work
df = res.get_dataframe()
df.Work.value_counts()

19140 rows affected.


Iliad               8415
Odyssey             6723
Oedipus Tyrannus     931
Electra              929
Ajax                 785
Antigone             687
Trachiniae           669
Prova                  1
dtype: int64

By the way, the same data can be obtained with a single Cypher query

In [8]:
%%cypher {conn}
MATCH (s:Sentence)
RETURN DISTINCT s.author as Author, s.work AS Title, count(*) AS `Nr. of Sent`

8 rows affected.


Author,Title,Nr. of Sent
Homer,Iliad,8415
Sophocles,Electra,929
Homer,Odyssey,6723
Neo4j,Prova,1
Sophocles,Ajax,785
Sophocles,Trachiniae,669
Sophocles,Antigone,687
Sophocles,Oedipus Tyrannus,931


# Visualize sentences

The quickest (yet rather unsatisfactory) way to visualize trees is via the vizTreebank script that is placed in the "scripts" folder

In [13]:
opt = {"Sentence" : "address", "Token" : "form", "Artificial" : "form"}

In [17]:
sentence_address = "tlg0012.tlg001.perseus-grc1#2277078#0"
drawSent(g,sentence_address, opt, physics=True)

# Scrapbook

In [56]:
import importlib
importlib.reload(scripts.visTreebank)

<module 'scripts.visTreebank' from '/Users/fmambrini/Documents/lavoro/Notebooks/Treebanks/scripts/visTreebank.py'>

In [None]:
query = """
    MATCH (n)
    WITH n, rand() AS random
    ORDER BY random
    LIMIT {limit}
    OPTIONAL MATCH (n)-[r]->(m)
    RETURN n AS source_node,
           id(n) AS source_id,
           r,
           m AS target_node,
           id(m) AS target_id
    """

data = g.run(query, limit=4)

In [None]:
query = """
    MATCH (s:Sentence)-[*]->(n)-[r]->(m)
    WHERE s.address = "tlg0011.tlg001.perseus-grc2#2898476#0"
    RETURN n AS source_node,
           id(n) AS source_id,
           r,
           m AS target_node,
           id(m) AS target_id
    """
data = g.run(query)

Hopefully, it's identical!

In [17]:
query = """
    MATCH (n)-[r]->(m)
    WHERE n.address =~ "^tlg0011.tlg001.perseus-grc2#2898476#[0-9]+$"
    RETURN n AS source_node,
           id(n) AS source_id,
           r,
           m AS target_node,
           id(m) AS target_id
    """
data = g.run(query)

In [None]:
for row in data:
    source_node = row[0]
    source_form = row[0].get("form")
    rel = row[2]
    target_node = row[3]
    target_form = row[3].get("form")
    print("{}-->{}".format(source_form,target_form))

In [18]:
rows = [d for d in data]

In [29]:
row = rows[4]

In [30]:
node = row[0]
id = row[1]

In [38]:
node_label = list(node.labels())[0]

In [41]:
prop_key = opt.get(node_label)
prop_key

'form'

In [43]:
repr(node.properties)

  if __name__ == '__main__':


"{'animacy': '', 'cid': '36335578', 'pos': 'noun', 'voice': '-', 'ne_type': '', 'isMemberOfApos': 0, 'mood': '-', 'person': '-', 'case': 'nominative', 'original_label': 'SBJ', 'tense': '-', 'head': '3', 'lemma': 'λόγος', 'gender': 'masculine', 'cite': 'urn:cts:greekLit:tlg0011.tlg001:1', 'isMemberOfCoord': 0, 'degree': '-', 'postag': 'n-s---mn-', 'number': 'singular', 'form': 'λόγος', 'address': 'tlg0011.tlg001.perseus-grc2#2898476#1', 'rank': '1'}"

In [None]:
e.get("form")

In [None]:
sentence_address = "tlg0011.tlg001.perseus-grc2#2898496#0"
a_components = sentence_address.split("#")
reg_address = "^{}#[0-9]+$".format("#".join(a_components[:-1]))
reg_address #== "^tlg0011.tlg001.perseus-grc2#2898496#[0-9]+$"

In [None]:
with open("figure/graph-27ebccc0-9884-4a84-b204-0cc41f332bab.html") as f:
    h = f.read()

In [None]:
HTML(h)

In [14]:
drawSent(g,"prova#1#0", opt, physics=True)

In [81]:
%%cypher {conn}
MATCH (n:Sentence) 
WHERE n.address STARTS WITH "tlg0012.tlg001" 
SET n.chronology = "8th BCE" 
SET n.meter = "dactylic hexameter"

2 properties set.
