# GraphFrame Examples

There are two ways of importing GraphFrame into Spylon Notebook:

### Option 1:
```scala
%%init_spark
launcher.jars = ["/home/tutorials/035_spark_graphframes/graphframes-0.8.0-spark2.4-s_2.11.jar"]
```

#### Option 2:
```scala
%%init_spark
launcher.packages = ["graphframes:graphframes:0.6.0-spark2.3-s_2.11"]
```

In [1]:
%%init_spark
launcher.packages = ["graphframes:graphframes:0.6.0-spark2.3-s_2.11"]

In [2]:
import org.graphframes.{examples,GraphFrame}
import org.apache.spark.sql.DataFrame

Intitializing Scala interpreter ...

Spark Web UI available at http://6555928d8b2a:4041
SparkContext available as 'sc' (version = 2.4.5, master = local[*], app id = local-1590998649824)
SparkSession available as 'spark'


import org.graphframes.{examples, GraphFrame}
import org.apache.spark.sql.DataFrame


## Example 1

In [3]:
// creating SQL Context instance to be able to work with DataFrame
val sqlContext = new org.apache.spark.sql.SQLContext(sc)

sqlContext: org.apache.spark.sql.SQLContext = org.apache.spark.sql.SQLContext@2f99861d


In [4]:
// Create a Vertex DataFrame with unique ID column "id"
val v = sqlContext.createDataFrame(List(
  ("a", "Alice", 34),
  ("b", "Bob", 36),
  ("c", "Charlie", 30),
  ("d", "Debra", 24)
)).toDF("id", "name", "age")

v: org.apache.spark.sql.DataFrame = [id: string, name: string ... 1 more field]


In [5]:
// Create an Edge DataFrame with "src" and "dst" columns
val e = sqlContext.createDataFrame(List(
  ("a", "b", "friend"),
  ("a", "c", "follow"),
  ("c", "b", "follow"),
  ("d", "b", "follow")
)).toDF("src", "dst", "relationship")

e: org.apache.spark.sql.DataFrame = [src: string, dst: string ... 1 more field]


In [6]:
// Create a GraphFrame
val g = GraphFrame(v, e)

g: org.graphframes.GraphFrame = GraphFrame(v:[id: string, name: string ... 1 more field], e:[src: string, dst: string ... 1 more field])


In [7]:
// Query: Get in-degree of each vertex.
g.inDegrees.show()

+---+--------+
| id|inDegree|
+---+--------+
|  c|       1|
|  b|       3|
+---+--------+



In [8]:
// Query: Get out-degree of each vertex.
g.outDegrees.show()

+---+---------+
| id|outDegree|
+---+---------+
|  d|        1|
|  c|        1|
|  a|        2|
+---+---------+



In [9]:
// Query: Count the number of "follow" connections in the graph.
g.edges.filter("relationship = 'follow'").count()

res2: Long = 3


In [10]:
// Run PageRank algorithm, and show results.
val results = g.pageRank.resetProbability(0.01).maxIter(20).run()

results: org.graphframes.GraphFrame = GraphFrame(v:[id: string, name: string ... 2 more fields], e:[src: string, dst: string ... 2 more fields])


In [11]:
results.vertices.select("id", "pagerank").show()

+---+------------------+
| id|          pagerank|
+---+------------------+
|  a|0.5361894357276425|
|  b| 2.126017922131889|
|  c|0.8016032064128258|
|  d|0.5361894357276425|
+---+------------------+



## Example 2

In [12]:
// get example graph
val g: GraphFrame = examples.Graphs.friends  

g: org.graphframes.GraphFrame = GraphFrame(v:[id: string, name: string ... 1 more field], e:[src: string, dst: string ... 1 more field])


In [13]:
// Display the vertex DataFrames
g.vertices.show()

+---+-------+---+
| id|   name|age|
+---+-------+---+
|  a|  Alice| 34|
|  b|    Bob| 36|
|  c|Charlie| 30|
|  d|  David| 29|
|  e| Esther| 32|
|  f|  Fanny| 36|
|  g|  Gabby| 60|
+---+-------+---+



In [14]:
// Display the edge DataFrames
g.edges.show()

+---+---+------------+
|src|dst|relationship|
+---+---+------------+
|  a|  b|      friend|
|  b|  c|      follow|
|  c|  b|      follow|
|  f|  c|      follow|
|  e|  f|      follow|
|  e|  d|      friend|
|  d|  a|      friend|
|  a|  e|      friend|
+---+---+------------+



In [15]:
// Get a DataFrame with columns "id" and "inDeg" (in-degree)
val vertexInDegrees: DataFrame = g.inDegrees

vertexInDegrees: org.apache.spark.sql.DataFrame = [id: string, inDegree: int]


In [16]:
// Find the youngest user's age in the graph.
// This queries the vertex DataFrame.
g.vertices.groupBy().min("age").show()

+--------+
|min(age)|
+--------+
|      29|
+--------+



In [17]:
// Count the number of "follows" in the graph.
// This queries the edge DataFrame.
val numFollows = g.edges.filter("relationship = 'follow'").count()

numFollows: Long = 4


In [18]:
// Prints all the edges.
g.find("()-[e]->()").show()

+--------------+
|             e|
+--------------+
|[a, b, friend]|
|[b, c, follow]|
|[c, b, follow]|
|[f, c, follow]|
|[e, f, follow]|
|[e, d, friend]|
|[d, a, friend]|
|[a, e, friend]|
+--------------+



In [19]:
// Search for pairs of vertices with edges in both directions between them.
val motifs= g.find("(A)-[E1]->(B); (B)-[E2]->(A)")

motifs: org.apache.spark.sql.DataFrame = [A: struct<id: string, name: string ... 1 more field>, E1: struct<src: string, dst: string ... 1 more field> ... 2 more fields]


In [20]:
motifs.show()

+----------------+--------------+----------------+--------------+
|               A|            E1|               B|            E2|
+----------------+--------------+----------------+--------------+
|    [b, Bob, 36]|[b, c, follow]|[c, Charlie, 30]|[c, b, follow]|
|[c, Charlie, 30]|[c, b, follow]|    [b, Bob, 36]|[b, c, follow]|
+----------------+--------------+----------------+--------------+



In [21]:
// More complex queries can be expressed by applying filters.
motifs.filter("B.age > 30").show()

+----------------+--------------+------------+--------------+
|               A|            E1|           B|            E2|
+----------------+--------------+------------+--------------+
|[c, Charlie, 30]|[c, b, follow]|[b, Bob, 36]|[b, c, follow]|
+----------------+--------------+------------+--------------+



In [22]:
val results = g.pageRank.resetProbability(0.01).maxIter(20).run()

results: org.graphframes.GraphFrame = GraphFrame(v:[id: string, name: string ... 2 more fields], e:[src: string, dst: string ... 2 more fields])


In [23]:
results.vertices.show()

+---+-------+---+--------------------+
| id|   name|age|            pagerank|
+---+-------+---+--------------------+
|  f|  Fanny| 36| 0.02681335502487603|
|  e| Esther| 32| 0.03063858676599769|
|  g|  Gabby| 60|0.011647254575707174|
|  a|  Alice| 34|0.038250038956939784|
|  b|    Bob| 36|  3.6205878323614185|
|  c|Charlie| 30|  3.2452495772901853|
|  d|  David| 29| 0.02681335502487603|
+---+-------+---+--------------------+



In [24]:
results.edges.show()

+---+---+------------+------+
|src|dst|relationship|weight|
+---+---+------------+------+
|  a|  b|      friend|   0.5|
|  b|  c|      follow|   1.0|
|  e|  f|      follow|   0.5|
|  e|  d|      friend|   0.5|
|  c|  b|      follow|   1.0|
|  a|  e|      friend|   0.5|
|  f|  c|      follow|   1.0|
|  d|  a|      friend|   1.0|
+---+---+------------+------+

