## Creo los nodos

In [2]:
v = sqlContext.createDataFrame([
  ("a", "Alice", 34),
  ("b", "Bob", 36),
  ("c", "Charlie", 30),
  ("d", "David", 29),
  ("e", "Esther", 32),
  ("f", "Fanny", 36),
  ("g", "Gabby", 60)
], ["id", "name", "age"])

## Creo las aristas

In [3]:
e = sqlContext.createDataFrame([
  ("a", "b", "friend"),
  ("b", "c", "follow"),
  ("c", "b", "follow"),
  ("f", "c", "follow"),
  ("e", "f", "follow"),
  ("e", "d", "friend"),
  ("d", "a", "friend"),
  ("a", "e", "friend")
], ["src", "dst", "relationship"])

## Importo graphframes

In [4]:
from graphframes import *

## Creo el grafo

In [5]:
g = GraphFrame(v, e)

In [6]:
g.vertices.show()

+---+-------+---+
| id|   name|age|
+---+-------+---+
|  a|  Alice| 34|
|  b|    Bob| 36|
|  c|Charlie| 30|
|  d|  David| 29|
|  e| Esther| 32|
|  f|  Fanny| 36|
|  g|  Gabby| 60|
+---+-------+---+



In [7]:
g.edges.show()

+---+---+------------+
|src|dst|relationship|
+---+---+------------+
|  a|  b|      friend|
|  b|  c|      follow|
|  c|  b|      follow|
|  f|  c|      follow|
|  e|  f|      follow|
|  e|  d|      friend|
|  d|  a|      friend|
|  a|  e|      friend|
+---+---+------------+



## Contando el grado de los nodos

In [8]:
vertexInDegrees = g.inDegrees

In [9]:
vertexInDegrees.show()

+---+--------+
| id|inDegree|
+---+--------+
|  f|       1|
|  e|       1|
|  d|       1|
|  c|       2|
|  b|       2|
|  a|       1|
+---+--------+



In [10]:
vertexOutDegrees = g.outDegrees

In [11]:
vertexOutDegrees.show()

+---+---------+
| id|outDegree|
+---+---------+
|  f|        1|
|  e|        2|
|  d|        1|
|  c|        1|
|  b|        1|
|  a|        2|
+---+---------+



## Puedo hacer queries en el DataFrame de nodos

In [12]:
g.vertices.groupBy().min("age").show()

+--------+
|min(age)|
+--------+
|      29|
+--------+



## O en el de aristas

In [13]:
numFollows = g.edges.filter("relationship = 'follow'").count()
numFollows

4

## Buscando en el grafo (motifs)

In [14]:
motifs = g.find("(a)-[e]->(b); (b)-[e2]->(a)")
motifs.show()

+--------------+------------+--------------+------------+
|             a|           e|             b|          e2|
+--------------+------------+--------------+------------+
|[c,Charlie,30]|[c,b,follow]|    [b,Bob,36]|[b,c,follow]|
|    [b,Bob,36]|[b,c,follow]|[c,Charlie,30]|[c,b,follow]|
+--------------+------------+--------------+------------+



In [15]:
motifs.filter("b.age > 30").show()

+--------------+------------+----------+------------+
|             a|           e|         b|          e2|
+--------------+------------+----------+------------+
|[c,Charlie,30]|[c,b,follow]|[b,Bob,36]|[b,c,follow]|
+--------------+------------+----------+------------+



## Creando sub-grafos

In [16]:
v2 = g.vertices.filter("age > 30")
e2 = g.edges.filter("relationship = 'friend'")
g2 = GraphFrame(v2, e2)

In [17]:
g2.vertices.show()

+---+------+---+
| id|  name|age|
+---+------+---+
|  a| Alice| 34|
|  b|   Bob| 36|
|  e|Esther| 32|
|  f| Fanny| 36|
|  g| Gabby| 60|
+---+------+---+



In [18]:
g2.edges.show()

+---+---+------------+
|src|dst|relationship|
+---+---+------------+
|  a|  b|      friend|
|  e|  d|      friend|
|  d|  a|      friend|
|  a|  e|      friend|
+---+---+------------+



## Sub-grafo a partir de una selección de aristas

In [19]:
paths = g.find("(a)-[e]->(b)")\
  .filter("e.relationship = 'follow'")\
  .filter("a.age < b.age")

In [20]:
e2 = paths.select("e.src", "e.dst", "e.relationship")

In [21]:
g2 = GraphFrame(g.vertices, e2)

In [22]:
g2.vertices.show()

+---+-------+---+
| id|   name|age|
+---+-------+---+
|  a|  Alice| 34|
|  b|    Bob| 36|
|  c|Charlie| 30|
|  d|  David| 29|
|  e| Esther| 32|
|  f|  Fanny| 36|
|  g|  Gabby| 60|
+---+-------+---+



In [23]:
g2.edges.show()

+---+---+------------+
|src|dst|relationship|
+---+---+------------+
|  e|  f|      follow|
|  c|  b|      follow|
+---+---+------------+



## Buscando camino mínimo con bfs

In [24]:
paths = g.bfs("name = 'Esther'", "age < 32")

In [25]:
paths.show()

+-------------+------------+------------+
|         from|          e0|          to|
+-------------+------------+------------+
|[e,Esther,32]|[e,d,friend]|[d,David,29]|
+-------------+------------+------------+



In [26]:
paths = g.bfs("name = 'Esther'", "age < 32",\
  edgeFilter="relationship != 'friend'", maxPathLength=3)
paths.show()

+-------------+------------+------------+------------+--------------+
|         from|          e0|          v1|          e1|            to|
+-------------+------------+------------+------------+--------------+
|[e,Esther,32]|[e,f,follow]|[f,Fanny,36]|[f,c,follow]|[c,Charlie,30]|
+-------------+------------+------------+------------+--------------+



## Más de camino mínimos

In [27]:
results = g.shortestPaths(landmarks=["a", "d"])
results.first()

Row(id=u'a', name=u'Alice', age=34, distances={u'a': 0, u'd': 2})

In [28]:
results.select("id", "distances").show()

+---+-------------------+
| id|          distances|
+---+-------------------+
|  a|Map(a -> 0, d -> 2)|
|  g|              Map()|
|  f|              Map()|
|  d|Map(d -> 0, a -> 1)|
|  e|Map(d -> 1, a -> 2)|
|  b|              Map()|
|  c|              Map()|
+---+-------------------+



## Contando triangulos

In [29]:
results = g.triangleCount()
results.first()

Row(count=0, id=u'g', name=u'Gabby', age=60)

In [30]:
results.show()

+-----+---+-------+---+
|count| id|   name|age|
+-----+---+-------+---+
|    0|  g|  Gabby| 60|
|    0|  f|  Fanny| 36|
|    1|  e| Esther| 32|
|    1|  d|  David| 29|
|    0|  c|Charlie| 30|
|    0|  b|    Bob| 36|
|    1|  a|  Alice| 34|
+-----+---+-------+---+

