In [1]:
%Addjar http://dl.bintray.com/spark-packages/maven/graphframes/graphframes/0.1.0-spark1.6/graphframes-0.1.0-spark1.6.jar

Starting download from http://dl.bintray.com/spark-packages/maven/graphframes/graphframes/0.1.0-spark1.6/graphframes-0.1.0-spark1.6.jar
Finished download of graphframes-0.1.0-spark1.6.jar


In [2]:
import org.graphframes._

In [4]:
import org.apache.spark.sql.SQLContext

In [5]:
val sqlContext = new SQLContext(sc)

In [9]:
// Vertex DataFrame
val v = sqlContext.createDataFrame(List(
  ("a", "Alice", 34),
  ("b", "Bob", 36),
  ("c", "Charlie", 30),
  ("d", "David", 29),
  ("e", "Esther", 32),
  ("f", "Fanny", 36),
  ("g", "Gabby", 60)
)).toDF("id", "name", "age")
// Edge DataFrame
val e = sqlContext.createDataFrame(List(
  ("a", "b", "friend"),
  ("b", "c", "follow"),
  ("c", "b", "follow"),
  ("f", "c", "follow"),
  ("e", "f", "follow"),
  ("e", "d", "friend"),
  ("d", "a", "friend"),
  ("a", "e", "friend")
)).toDF("src", "dst", "relationship")
// Create a GraphFrame
val g = GraphFrame(v, e)

In [10]:
val g: GraphFrame = examples.Graphs.friends

In [11]:
g.vertices.show()

+---+-------+---+
| id|   name|age|
+---+-------+---+
|  a|  Alice| 34|
|  b|    Bob| 36|
|  c|Charlie| 30|
|  d|  David| 29|
|  e| Esther| 32|
|  f|  Fanny| 36|
|  g|  Gabby| 60|
+---+-------+---+



In [12]:
g.edges.show()

+---+---+------------+
|src|dst|relationship|
+---+---+------------+
|  a|  b|      friend|
|  b|  c|      follow|
|  c|  b|      follow|
|  f|  c|      follow|
|  e|  f|      follow|
|  e|  d|      friend|
|  d|  a|      friend|
|  a|  e|      friend|
+---+---+------------+



In [14]:
import org.apache.spark.sql.DataFrame

In [15]:
val vertexInDegrees: DataFrame = g.inDegrees

In [16]:
// Find the youngest user's age in the graph.
// This queries the vertex DataFrame.
g.vertices.groupBy().min("age").show()

+--------+
|min(age)|
+--------+
|      29|
+--------+



In [17]:
// Count the number of "follows" in the graph.
// This queries the edge DataFrame.
val numFollows = g.edges.filter("relationship = 'follow'").count()

In [18]:
numFollows

4

In [21]:
// Search for pairs of vertices with edges in both directions between them.
val motifs: DataFrame = g.find("(a)-[e]->(b); (b)-[e2]->(a)")
motifs.show()

// More complex queries can be expressed by applying filters.
motifs.filter("b.age > 30").show()

+------------+--------------+--------------+------------+
|           e|             a|             b|          e2|
+------------+--------------+--------------+------------+
|[b,c,follow]|    [b,Bob,36]|[c,Charlie,30]|[c,b,follow]|
|[c,b,follow]|[c,Charlie,30]|    [b,Bob,36]|[b,c,follow]|
+------------+--------------+--------------+------------+

+------------+--------------+----------+------------+
|           e|             a|         b|          e2|
+------------+--------------+----------+------------+
|[c,b,follow]|[c,Charlie,30]|[b,Bob,36]|[b,c,follow]|
+------------+--------------+----------+------------+



In [26]:
// Breadth First Search - Search from "Esther" for users of age <= 32.
val paths: DataFrame = g.bfs.fromExpr("name = 'Esther'").toExpr("age < 32").run()
paths.show()

+-------------+------------+------------+
|         from|          e0|          to|
+-------------+------------+------------+
|[e,Esther,32]|[e,d,friend]|[d,David,29]|
+-------------+------------+------------+

