In [38]:
import org.apache.spark.graphx._
import org.apache.spark.rdd.RDD
 
val vertexArray = Array(
  (1L, ("Alice", 28)),
  (2L, ("Bob", 27)),
  (3L, ("Charlie", 65)),
  (4L, ("David", 42)),
  (5L, ("Ed", 55)),
  (6L, ("Fran", 50))
  )
val edgeArray = Array(
  Edge(2L, 1L, 7),
  Edge(2L, 4L, 2),
  Edge(3L, 2L, 4),
  Edge(3L, 6L, 3),
  Edge(4L, 1L, 1),
  Edge(5L, 2L, 2),
  Edge(5L, 3L, 8),
  Edge(5L, 6L, 3)
  )

val vertexRDD: RDD[(Long, (String, Int))] = sc.parallelize(vertexArray)
val edgeRDD: RDD[Edge[Int]] = sc.parallelize(edgeArray)
val graph: Graph[(String, Int), Int] = Graph(vertexRDD, edgeRDD)





org.apache.spark.graphx.impl.GraphImpl@282f9582

In [39]:
graph.vertices.filter{case (id, (name, age)) => age >= 30 }.collect.foreach(v => println(s"${v._2._1} is ${v._2._2}"))

David is 42
Fran is 50
Charlie is 65
Ed is 55




In [40]:
for (triplet <- graph.triplets.collect) {
  println(s"${triplet.srcAttr._1} likes ${triplet.dstAttr._1}")
}

Bob likes Alice
Bob likes David
Charlie likes Bob
Charlie likes Fran
David likes Alice
Ed likes Bob
Ed likes Charlie
Ed likes Fran




In [41]:
for (triplet <- graph.triplets.filter(t => t.attr > 5).collect) {
  println(s"${triplet.srcAttr._1} loves ${triplet.dstAttr._1}")
}

Bob loves Alice
Ed loves Charlie




In [21]:
val inDegrees: VertexRDD[Int] = graph.inDegrees





VertexRDDImpl[58] at RDD at VertexRDD.scala:57

In [42]:
import org.apache.spark.graphx._
import org.apache.spark.rdd.RDD

// Define a class to more clearly model the user property
case class User(name: String, age: Int, inDeg: Int, outDeg: Int)

// Create a user Graph
val initialUserGraph: Graph[User, Int] = graph.mapVertices{ case (id, (name, age)) => User(name, age, 0, 0) }

// Fill in the degree information
val userGraph = initialUserGraph.outerJoinVertices(initialUserGraph.inDegrees) {
  case (id, u, inDegOpt) => User(u.name,u.age,inDegOpt.getOrElse(0),u.outDeg)
}.outerJoinVertices(initialUserGraph.outDegrees) {
  case (id, u, outDegOpt) => User(u.name,u.age,u.inDeg,outDegOpt.getOrElse(0))
}






org.apache.spark.graphx.impl.GraphImpl@8812736

In [43]:
for ((id, property) <- userGraph.vertices.collect) {
  println(s"User $id is called ${property.name} and is liked by ${property.inDeg} people.")
}



User 4 is called David and is liked by 1 people.
User 6 is called Fran and is liked by 2 people.
User 2 is called Bob and is liked by 2 people.
User 1 is called Alice and is liked by 2 people.
User 3 is called Charlie and is liked by 1 people.
User 5 is called Ed and is liked by 0 people.


In [44]:
userGraph.vertices.filter {
  case (id, u) => u.inDeg == u.outDeg
}.collect.foreach {
  case (id, property) => println(property.name)
}

David
Bob




In [45]:
// TODO: Replace <FILL IN> with appropriate code
// Find the oldest follower for each user
val oldestFollower: VertexRDD[(String, Int)] = userGraph.aggregateMessages[(String, Int)](
  // sendMsg 
  triplet => triplet.sendToDst(triplet.srcAttr.name, triplet.srcAttr.age),
  // mergeMsg
  (a, b) => if(a._2 > b._2 ) a else b)





VertexRDDImpl[232] at RDD at VertexRDD.scala:57

In [46]:
userGraph.vertices.leftJoin(oldestFollower) { (id, user, optOldestFollower) =>
  optOldestFollower match {
    case None => s"${user.name} does not have any followers."
    case Some((name, age)) => s"${name} is the oldest follower of ${user.name}."
  }
}.collect.foreach { case (id, str) => println(str) }

Bob is the oldest follower of David.
Charlie is the oldest follower of Fran.
Charlie is the oldest follower of Bob.
David is the oldest follower of Alice.
Ed is the oldest follower of Charlie.
Ed does not have any followers.




In [47]:

// TODO: Replace <FILL IN> with appropriate code
val averageAge: VertexRDD[Double] = userGraph.aggregateMessages[(Int, Double)](
  // map function returns a tuple of (1, Age)
  triplet => triplet.sendToDst(1, triplet.srcAttr.age),
  // reduce function combines (sumOfFollowers, sumOfAge)
  (a, b) => (a._1 + b._1, a._2 + b._2)
).mapValues((id, p) => p._2 / p._1)

// Display the results
userGraph.vertices.leftJoin(averageAge) { (id, user, optAverageAge) =>
  optAverageAge match {
    case None => s"${user.name} does not have any followers."
    case Some(avgAge) => s"The average age of ${user.name}\'s followers is $avgAge."
  }
}.collect.foreach { case (id, str) => println(str) }

The average age of David's followers is 27.0.
The average age of Fran's followers is 60.0.
The average age of Bob's followers is 60.0.
The average age of Alice's followers is 34.5.
The average age of Charlie's followers is 55.0.
Ed does not have any followers.




In [53]:
// TODO: Replace <FILL IN> with appropriate code
val olderGraph = userGraph.subgraph(vpred = (id,user)=> user.age >=30)

// compute the connected components
val cc = olderGraph.connectedComponents

// display the component id of each user:
olderGraph.vertices.leftJoin(cc.vertices) {
  case (id, user, comp) => s"${user.name} is in component ${comp.get}"
}.collect.foreach{ case (id, str) => println(str) }

David is in component 4
Fran is in component 3
Charlie is in component 3
Ed is in component 3


