# _Hands-on_ utilizando a biblioteca **py2neo**

In [941]:
## Preparando Neo4j
#%run ../neo4j-setup/1_neo4j_setup.ipynb
#%run installs.ipynb
#ip do container rodando neo4j
%store -r neo4j_pod_ip 

In [942]:
neo4j_pod_ip

'10.42.0.108'

## Por que py2neo ?
Py2neo é uma biblioteca cliente e toolkit para Neo4j. 
- API de alto nível 
- OGM 
- Console para executar **consultas cypher**

## Conectando com Neo4j

In [943]:
import pandas as pd
from py2neo import Graph
# criando objeto
graph = Graph("bolt://"+neo4j_pod_ip+":7687")

In [None]:
#certificando que não há nada no banco
graph.delete_all()

## Populando banco de dados (Movies database)

In [944]:
#abrindo arquivo com queries de criação de nós e relacionamentos
with open('data/cypher_create_movie_db.txt') as file:
    query = file.read().replace('\n', ' ')
graph.run(query) 

(No data)

![](img/graph-examples-movies-example.png)

## Esquema de banco de dados

In [945]:
#Executando consulta para verificação do db schema utilizando cypher
graph.run('CALL db.schema.visualization()')

 nodes                                                                                                                    | relationships                                                                                                                                                              
--------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 [(_-1:Movie {constraints: [], indexes: [], name: 'Movie'}), (_-2:Person {constraints: [], indexes: [], name: 'Person'})] | [(_-2)-[:ACTED_IN {}]->(_-1), (_-2)-[:REVIEWED {}]->(_-1), (_-2)-[:PRODUCED {}]->(_-1), (_-2)-[:WROTE {}]->(_-1), (_-2)-[:FOLLOWS {}]->(_-2), (_-2)-[:DIRECTED {}]->(_-1)] 

In [946]:
# Checando labels (tipos dos nós) mapeados no py2neo
graph.schema.node_labels

frozenset({'Movie', 'Person'})

In [947]:
# Checando relacionamentos mapeados no py2neo
graph.schema.relationship_types

frozenset({'ACTED_IN', 'DIRECTED', 'FOLLOWS', 'PRODUCED', 'REVIEWED', 'WROTE'})

In [948]:
# Propriedades dos nós
for label in graph.schema.node_labels:
    n = graph.nodes.match(label).first()
    if n:
        print(label, n.keys())

Movie dict_keys(['tagline', 'title', 'released'])
Person dict_keys(['name', 'born'])


In [949]:
# Propriedades das relações
for reltype in graph.schema.relationship_types:
    r = graph.relationships.match(r_type=reltype).first()
    print(reltype, r.keys())

DIRECTED dict_keys([])
FOLLOWS dict_keys([])
ACTED_IN dict_keys(['roles'])
WROTE dict_keys([])
REVIEWED dict_keys(['summary', 'rating'])
PRODUCED dict_keys([])


### Esquema no Neo4j-browser

In [898]:
# chamar "CALL db.schema.visualization()" no neo4j-browser
print('http://'+neo4j_pod_ip+':7474/browser')

http://10.42.0.104:7474/browser


## Consultando Nós e Relacionamentos

### Nós

In [950]:
# Com os módulos do py2neo
graph.nodes.match('Person').first() #all()

Node('Person', born=1964, name='Keanu Reeves')

In [951]:
# com cypher
graph.run('MATCH (p:Person) RETURN p LIMIT 1')

 p                                              
------------------------------------------------
 (_1:Person {born: 1964, name: 'Keanu Reeves'}) 

#### Filtrando resultado por atributos

In [952]:
# Com os módulos do py2neo
graph.nodes.match('Movie').where(released=2000).all()

[Node('Movie', released=2000, tagline='The rest of his life begins now.', title='Jerry Maguire'),
 Node('Movie', released=2000, tagline='Pain heals, Chicks dig scars... Glory lasts forever', title='The Replacements'),
 Node('Movie', released=2000, tagline='At the edge of the world, his journey begins.', title='Cast Away')]

In [953]:
# com cypher
graph.run('MATCH (m:Movie {released:2000}) RETURN m').data()

[{'m': Node('Movie', released=2000, tagline='The rest of his life begins now.', title='Jerry Maguire')},
 {'m': Node('Movie', released=2000, tagline='Pain heals, Chicks dig scars... Glory lasts forever', title='The Replacements')},
 {'m': Node('Movie', released=2000, tagline='At the edge of the world, his journey begins.', title='Cast Away')}]

### Relacionamentos

In [956]:
# guardando nós para as consultas
#p1 = graph.nodes.match('Person').where(name='Charlize Theron').first()
#p2 = graph.nodes.match('Person').where(name='Clint Eastwood').first()
# Tom Hanks
p1 = graph.nodes.match('Person').where(name='Tom Hanks').first()
# Apollo 13
m1 = graph.nodes.match('Movie').where(title='Apollo 13').first()

In [957]:
# todos os relacionamentos do p1 (Tom Hanks)
graph.relationships.match([p1]).limit(5).all()

[ACTED_IN(Node('Person', born=1956, name='Tom Hanks'), Node('Movie', released=1995, tagline='Houston, we have a problem.', title='Apollo 13'), roles=['Jim Lovell']),
 ACTED_IN(Node('Person', born=1956, name='Tom Hanks'), Node('Movie', released=1998, tagline='At odds in life... in love on-line.', title="You've Got Mail"), roles=['Joe Fox']),
 ACTED_IN(Node('Person', born=1956, name='Tom Hanks'), Node('Movie', released=1992, tagline='Once in a lifetime you get a chance to do something different.', title='A League of Their Own'), roles=['Jimmy Dugan']),
 ACTED_IN(Node('Person', born=1956, name='Tom Hanks'), Node('Movie', released=1990, tagline='A story of love, lava and burning desire.', title='Joe Versus the Volcano'), roles=['Joe Banks']),
 ACTED_IN(Node('Person', born=1956, name='Tom Hanks'), Node('Movie', released=1996, tagline='In every life there comes a time when that thing you dream becomes that thing you do', title='That Thing You Do'), roles=['Mr. White'])]

In [958]:
# Elenco de Apollo 13
graph.relationships.match([None,m1], r_type='ACTED_IN').all()

[ACTED_IN(Node('Person', born=1956, name='Tom Hanks'), Node('Movie', released=1995, tagline='Houston, we have a problem.', title='Apollo 13'), roles=['Jim Lovell']),
 ACTED_IN(Node('Person', born=1950, name='Ed Harris'), Node('Movie', released=1995, tagline='Houston, we have a problem.', title='Apollo 13'), roles=['Gene Kranz']),
 ACTED_IN(Node('Person', born=1955, name='Gary Sinise'), Node('Movie', released=1995, tagline='Houston, we have a problem.', title='Apollo 13'), roles=['Ken Mattingly']),
 ACTED_IN(Node('Person', born=1958, name='Kevin Bacon'), Node('Movie', released=1995, tagline='Houston, we have a problem.', title='Apollo 13'), roles=['Jack Swigert']),
 ACTED_IN(Node('Person', born=1955, name='Bill Paxton'), Node('Movie', released=1995, tagline='Houston, we have a problem.', title='Apollo 13'), roles=['Fred Haise'])]

In [959]:
# em cypher
graph.run("MATCH (p)-[r:ACTED_IN]->(m:Movie {title:'Apollo 13'}) RETURN r" ).data()

[{'r': ACTED_IN(Node('Person', born=1956, name='Tom Hanks'), Node('Movie', released=1995, tagline='Houston, we have a problem.', title='Apollo 13'), roles=['Jim Lovell'])},
 {'r': ACTED_IN(Node('Person', born=1950, name='Ed Harris'), Node('Movie', released=1995, tagline='Houston, we have a problem.', title='Apollo 13'), roles=['Gene Kranz'])},
 {'r': ACTED_IN(Node('Person', born=1955, name='Gary Sinise'), Node('Movie', released=1995, tagline='Houston, we have a problem.', title='Apollo 13'), roles=['Ken Mattingly'])},
 {'r': ACTED_IN(Node('Person', born=1958, name='Kevin Bacon'), Node('Movie', released=1995, tagline='Houston, we have a problem.', title='Apollo 13'), roles=['Jack Swigert'])},
 {'r': ACTED_IN(Node('Person', born=1955, name='Bill Paxton'), Node('Movie', released=1995, tagline='Houston, we have a problem.', title='Apollo 13'), roles=['Fred Haise'])}]

### Visualizando consultas no Jupyter com Neovis + Cypher

In [960]:
#impportando modulo para desenha na aplicação de visualização
import import_ipynb
from neovis_draw import Draw
#ip da aplicação de visualização
%store -r neo4j_vis_pod_ip 

In [936]:
#configurando labels desenho do grafo
vis_conf = {'labels':{'Person':{'caption':'name'},'Movie':{'caption':'title'}},'arrows':False, 'hierachical':True}
from IPython.display import JSON
JSON(vis_conf, expanded=True)

<IPython.core.display.JSON object>

#### Visão geral do grafo

In [961]:
query = "MATCH p=()-[]-() RETURN p"
Draw(neo4j_vis_pod_ip,neo4j_pod_ip,query=query, config=vis_conf, figsize=(1200,500))

#### Elenco de Apollo 13

In [962]:
query = "MATCH (p)-[r:ACTED_IN]->(m:Movie {title:'Apollo 13'}) RETURN p,m,r"
Draw(neo4j_vis_pod_ip,neo4j_pod_ip,query=query, config=vis_conf, figsize=(1200,500))

### Artistas que foram atores e diretores simultaneamente no mesmo filme

In [963]:
query = "MATCH s=(p)-[:ACTED_IN]->(m:Movie)<-[:DIRECTED]-(p) RETURN s"
Draw(neo4j_vis_pod_ip,neo4j_pod_ip,query=query, config=vis_conf, figsize=(1200,500))

## Criando Nós e Relacionamentos

### Criando Filme "O que é isso companheiro?"

In [912]:
from py2neo import Node, Relationship #vamos usar o py2neo para criar

In [964]:
#novos nós de filme e pessoas
m2 = Node('Movie', title='O que é isso companheiro?', released=1997, tagline='Their goal: freedom. Their only hope: an international incident. Their target: the American ambassador')
p2 = Node('Person', name='Pedro Cardoso', born=1962)
p3 = Node('Person', name='Fernanda Torres', born=1965)
# já está no banco, resgatando objeto correspondente
p4 = graph.nodes.match('Person').where(name='Alan Arkin').first()

In [965]:
#criando relacionamentos
r1 = Relationship(p2,'ACTED_IN',m2, roles=['Fernando Gabeira','Paulo'])
r2 = Relationship(p3,'ACTED_IN',m2, roles=['Maria'])
r3 = Relationship(p4,'ACTED_IN',m2, roles=['Charles Burke Elbrick'])
#adicionando ao banco
graph.create(r1)
graph.create(r2)
graph.create(r3)

In [966]:
query = "MATCH (m:Movie)<-[r:ACTED_IN]-(p:Person) WHERE m.title = '{title}' RETURN p,m,r".format(title=m2.get('title'))
Draw(neo4j_vis_pod_ip,neo4j_pod_ip,query=query, config=vis_conf, figsize=(1200,500))

### Quem é Alan Arkin?

In [967]:
query = "MATCH s=(p:Person {name: 'Alan Arkin'})-[r:ACTED_IN*1..3]-(m:Movie) RETURN s"
Draw(neo4j_vis_pod_ip,neo4j_pod_ip,query=query, config=vis_conf, figsize=(1200,500))

### Criando propriedades novas para nós existentes

In [968]:
query = """MATCH (p:Person)
           WITH p, size((p)-[:ACTED_IN]->()) AS count
           SET p.movies_cnt = count"""
graph.run(query)

(No data)

In [969]:
vis_conf['labels']['Person']['size'] = 'movies_cnt' 
query = "MATCH s=(p:Person)-[r:ACTED_IN]->(m:Movie) RETURN s LIMIT 50" #nova consulta
Draw(neo4j_vis_pod_ip,neo4j_pod_ip,query=query, config=vis_conf, figsize=(1200,500))

## Utilizando o plug-in APOC (Amazing Procedures on Cypher)

### Estatísticas do grafo

In [970]:
graph.run('CALL apoc.meta.stats() YIELD stats RETURN stats').data()

[{'stats': {'relTypeCount': 6,
   'propertyKeyCount': 9,
   'labelCount': 2,
   'nodeCount': 178,
   'relCount': 260,
   'labels': {'Movie': 41, 'Person': 137},
   'relTypes': {'()-[:DIRECTED]->(:Movie)': 44,
    '(:Person)-[:PRODUCED]->()': 15,
    '()-[:DIRECTED]->()': 44,
    '()-[:PRODUCED]->()': 15,
    '(:Person)-[:DIRECTED]->()': 44,
    '()-[:REVIEWED]->()': 9,
    '(:Person)-[:REVIEWED]->()': 9,
    '()-[:FOLLOWS]->(:Person)': 3,
    '()-[:PRODUCED]->(:Movie)': 15,
    '()-[:FOLLOWS]->()': 3,
    '()-[:WROTE]->()': 10,
    '()-[:ACTED_IN]->(:Movie)': 179,
    '(:Person)-[:FOLLOWS]->()': 3,
    '()-[:WROTE]->(:Movie)': 10,
    '(:Person)-[:WROTE]->()': 10,
    '()-[:REVIEWED]->(:Movie)': 9,
    '(:Person)-[:ACTED_IN]->()': 179,
    '()-[:ACTED_IN]->()': 179}}}]

### Criando novos nós e relacionamentos a partir de uma propriedade dos nós
 - Extrai a proprieadade **Person {born}** para se construir o novo rótulo Year e a relação **(Person)-[BORN_IN]->(Year{year})**

In [971]:
graph.run('CREATE CONSTRAINT IF NOT EXISTS ON (n:Year) ASSERT n.year IS UNIQUE') #evitar que sejam criados vários nós do mesmo ano
graph.run("CALL apoc.refactor.categorize('born', 'BORN_IN', true, 'Year', 'year', [], 100)")

(No data)

In [972]:
vis_conf['labels']['Year'] = {'caption':'year'} # adicionando caption para o novo rótulo
query = "MATCH g=(p:Person)-[r:BORN_IN]-(y:Year) WHERE y.year > 1970 RETURN g" #nova consulta
Draw(neo4j_vis_pod_ip,neo4j_pod_ip,query=query, config=vis_conf, figsize=(1200,500))

### Criando novo rótulo a partir de uma relação
- Criando rótulo "Director" para as pessoas que dirigiram filmes
- Criando rótulo "Actor" para as pessoas que atuaram em filmes

In [973]:
query = """MATCH (p:Person)-[r:DIRECTED]->(m:Movie)
           CALL apoc.create.addLabels([id(p)], ['Director']) YIELD node
           WITH node as d, size((node)-[:DIRECTED]->()) as count
           SET d.movies_directed = count
           RETURN DISTINCT 'done'"""
graph.run(query)

 'done' 
--------
 done   

In [974]:
query = """MATCH (p:Person)-[r:ACTED_IN]->(m:Movie)
           CALL apoc.create.addLabels([id(p)], ['Actor']) YIELD node
           RETURN DISTINCT 'done'"""
graph.run(query)

 'done' 
--------
 done   

- Mostrando apenas diretores com mais de 1 filmes dirigidos

In [975]:
vis_conf['labels']['Person']['size'] = 'movies_directed' 
query = "MATCH s=(d:Director)-[:DIRECTED]-() WHERE d.movies_directed > 1 RETURN s"
Draw(neo4j_vis_pod_ip,neo4j_pod_ip,query=query, config=vis_conf, figsize=(1200,500))

## Utilizando o plugin GDS (Graph Data Science)

### Preparando

- Criando uma relação Person --> Person (ACTED_WITH) entre pessoas que atuaram juntas em filmes

In [976]:
query = """MATCH (act1:Person)-[:ACTED_IN]->(m:Movie)<-[:ACTED_IN]->(act2:Person)
           WHERE id(act1)<id(act2)
           MERGE (act1)-[:ACTED_WITH {in:m.title}]->(act2)"""
graph.run(query)

(No data)

In [977]:
query = "MATCH s=(p1:Person{name:'Laurence Fishburne'})-[:ACTED_WITH]-() RETURN s"
vis_conf['relationships'] = {'ACTED_WITH':{'caption':'false'}}  #retirando captions dos relacionamentos
Draw(neo4j_vis_pod_ip,neo4j_pod_ip,query=query, config=vis_conf, figsize=(1200,500))

- Contado essas relações estabelecendo uma nova com a contagem das relações anteriores como propriedade

In [978]:
query = """ MATCH (a:Actor)-[r1:ACTED_WITH]-(b:Actor)
            WITH a, b, count(r1) as countr
            MERGE (a)-[r2:CO_STARRED_WITH]-(b)
            ON CREATE SET r2.times = countr
            RETURN DISTINCT r2.times ORDER BY r2.times"""
graph.run(query)

 r2.times 
----------
        1 
        2 
        3 

In [979]:
query = "MATCH g=(a:Person)-[:CO_STARRED_WITH]-(b:Person) RETURN g"
vis_conf['relationships'] = {'CO_STARRED_WITH':{'caption':'false', 'thickness':'times'}}  #retirando captions dos relacionamentos
Draw(neo4j_vis_pod_ip,neo4j_pod_ip,query=query, config=vis_conf, figsize=(1200,500))

### Criando uma projeção do grafo

- O plugin GDS trabalha sobre uma projeção do grafo em memória

In [980]:
#graph.run("CALL gds.graph.drop('myGraph')")
query = """CALL gds.graph.create(
           'myGraph',
           'Actor',
           'CO_STARRED_WITH',
            {
            nodeProperties: 'movies_cnt',
            relationshipProperties: 'times'
            })"""
graph.run(query)

 nodeProjection                                                                                    | relationshipProjection                                                                                                                                                             | graphName | nodeCount | relationshipCount | createMillis 
---------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------|-----------|-------------------|--------------
 {Actor: {properties: {movies_cnt: {property: 'movies_cnt', defaultValue: null}}, label: 'Actor'}} | {CO_STARRED_WITH: {orientation: 'NATURAL', aggregation: 'DEFAULT', type: 'CO_STARRED_WITH', properties: {times: {property: 'times', aggregation: 'DEFAULT', defaultValue: null}}}} | myGraph   |       106 |               

### Executando o algoritmo de detecção de comunidades (Label Propagation)
- Retornando resultado do algoritmo como lista

In [981]:
query = """ CALL gds.labelPropagation.stream('myGraph')
            YIELD nodeId, communityId AS Community
            RETURN gds.util.asNode(nodeId).name AS Name, Community
            ORDER BY Community, Name """
graph.run(query).data()

[{'Name': 'Carrie-Anne Moss', 'Community': 8},
 {'Name': 'Emil Eifrem', 'Community': 8},
 {'Name': 'Keanu Reeves', 'Community': 8},
 {'Name': 'Laurence Fishburne', 'Community': 8},
 {'Name': 'Al Pacino', 'Community': 13},
 {'Name': 'Charlize Theron', 'Community': 13},
 {'Name': 'Aaron Sorkin', 'Community': 28},
 {'Name': 'Christopher Guest', 'Community': 28},
 {'Name': 'Cuba Gooding Jr.', 'Community': 28},
 {'Name': 'Demi Moore', 'Community': 28},
 {'Name': 'J.T. Walsh', 'Community': 28},
 {'Name': 'Jack Nicholson', 'Community': 28},
 {'Name': 'James Marshall', 'Community': 28},
 {'Name': 'Kevin Bacon', 'Community': 28},
 {'Name': 'Kevin Pollak', 'Community': 28},
 {'Name': 'Kiefer Sutherland', 'Community': 28},
 {'Name': 'Noah Wyle', 'Community': 28},
 {'Name': 'Tom Cruise', 'Community': 28},
 {'Name': 'Jay Mohr', 'Community': 44},
 {'Name': 'Jonathan Lipnicki', 'Community': 44},
 {'Name': 'Kelly Preston', 'Community': 44},
 {'Name': 'Regina King', 'Community': 44},
 {'Name': 'Renee Z

- Retornando resultado na forma de propriedade do nó no grafo original

In [982]:
query = """ CALL gds.labelPropagation.write('myGraph', { writeProperty: 'community' })
            YIELD communityCount, ranIterations, didConverge """
graph.run(query)

 communityCount | ranIterations | didConverge 
----------------|---------------|-------------
             19 |            10 | true        

In [932]:
vis_conf

{'labels': {'Person': {'caption': 'name', 'size': 'movies_directed'},
  'Movie': {'caption': 'title'},
  'Year': {'caption': 'year'}},
 'arrows': False,
 'hierachical': True,
 'relationships': {'CO_STARRED_WITH': {'caption': 'false',
   'thickness': 'times'}}}

In [984]:
vis_conf['labels']['Person']['community'] = 'community' # adicionando nova propriedade do nó à visualização
query = "MATCH p=()-[:CO_STARRED_WITH]-() RETURN p" #nova consulta
Draw(neo4j_vis_pod_ip,neo4j_pod_ip,query=query, config=vis_conf, figsize=(1200,700))