# BIL501 Project 

##### Halil Berk Dergi

In [None]:
import org.apache.spark._
import org.apache.spark.graphx._
// To make some of the examples work we will also need RDD
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types._
import org.apache.spark.sql.SparkSession
import org.apache.log4j.Logger
import org.apache.log4j.Level
import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import scala.util.MurmurHash

In [None]:
sc.stop() // stop sparkcontext

Creating a new sparksession where master is spark://Ubuntu-HBD:7077. There is a master and there are 3 workers in sparksession which is configured with STANDALONE MODE.

In [19]:
val spark = SparkSession.builder
     .master("spark://Ubuntu-HBD:7077")
     .appName("BIL501 Project")
     .getOrCreate()

spark: org.apache.spark.sql.SparkSession = org.apache.spark.sql.SparkSession@4bb41b7b


In [13]:
sc.stop()

Reading textfile which is exported from forusquare API.

In [20]:
val venues = spark.read.textFile("export_venues.csv")
val links = spark.read.textFile("export_links.csv")

venues: org.apache.spark.sql.Dataset[String] = [value: string]
links: org.apache.spark.sql.Dataset[String] = [value: string]


In [21]:
venues.show(3)

+--------------------+
|               value|
+--------------------+
|4c0e115bc700c9b6c...|
|4bed1e076e8c20a19...|
|4ce9209e0f196dcbd...|
+--------------------+
only showing top 3 rows



In [22]:
links.show(3)

+--------------------+
|               value|
+--------------------+
|4c0e115bc700c9b6c...|
|4c0e115bc700c9b6c...|
|4c0e115bc700c9b6c...|
+--------------------+
only showing top 3 rows



Defining class Graph

In [23]:
class Graph[VD, ED] (val vertices: VertexRDD[VD], val edges: EdgeRDD[ED])

defined class Graph
Companions must be defined together; you may wish to use :paste mode for this.


Defining class Venues

In [24]:
case class Venues(Name:String,lat:Float,lng:Float)


defined class Venues


In [25]:
// the function used for creating venuesRDD.
def ayır(line:String):(Long,Venues)={
    // VertexID must be Long, so VenuesID is converted to Long from string.
    val a = MurmurHash.stringHash(line.split(",")(0).toString)     
    val b = line.split(",")(1).toString
    val c = line.split(",")(2).toFloat
    val d = line.split(",")(3).toFloat

    
    return (a, Venues(b,c,d))    
}


ayır: (line: String)(Long, Venues)


In [27]:
val venuesRDD: RDD[(VertexId, Venues)] =  venues.rdd.map(ayır)

venuesRDD: org.apache.spark.rdd.RDD[(org.apache.spark.graphx.VertexId, Venues)] = MapPartitionsRDD[11] at map at <console>:51


In [28]:
venuesRDD.take(3)

res14: Array[(org.apache.spark.graphx.VertexId, Venues)] = Array((914172510,Venues(Anıtkabir,39.926785,32.83659)), (-2086253975,Venues(Middle East Technical University (Orta Doğu Teknik Üniversitesi),39.90006,32.788715)), (919849682,Venues(HD İskender,39.95,32.830822)))


In [30]:
type Connection = Float
val linksRDD: RDD[Edge[Connection]] = links.rdd.map{ line => 
  val vs = line.split(",");
  Edge(MurmurHash.stringHash(vs(0).toString), MurmurHash.stringHash(vs(1).toString), vs(2).toFloat)
}

linksRDD: org.apache.spark.rdd.RDD[org.apache.spark.graphx.Edge[Connection]] = MapPartitionsRDD[17] at map at <console>:49


### Creating VENUEs - GRAPH

In [31]:
val venuesgraph = Graph(venuesRDD, linksRDD)

venuesgraph: org.apache.spark.graphx.Graph[Venues,Connection] = org.apache.spark.graphx.impl.GraphImpl@78da9acb


Distances between edges

val facts: RDD[String] =
  venuesgraph.triplets.map(triplet =>
    triplet.srcAttr.Name + " ile " + triplet.dstAttr.Name+" arasındaki mesafe " + +triplet.attr + " m." )
facts.collect.foreach(println(_))

# PageRank

### 1) PartitioningStrategy = RandomVertexCut

In [None]:
venuesgraph.partitionBy(PartitionStrategy.RandomVertexCut)

In [34]:
val ranks1 = venuesgraph.pageRank(0.0001).vertices

ranks: org.apache.spark.graphx.VertexRDD[Double] = VertexRDDImpl[1047] at RDD at VertexRDD.scala:57


res17: (org.apache.spark.graphx.VertexId, Double) = (2080198244,0.35519169581190246)


### 2) PartitioningStrategy = RandomVertexCut

In [56]:
venuesgraph.partitionBy(PartitionStrategy.RandomVertexCut)

ranks2: org.apache.spark.graphx.VertexRDD[Double] = VertexRDDImpl[3090] at RDD at VertexRDD.scala:57


In [None]:
val ranks2 = venuesgraph.pageRank(0.0001).vertices

### 3) PartitioningStrategy = RandomVertexCut

In [None]:
venuesgraph.partitionBy(PartitionStrategy.RandomVertexCut)

In [None]:
val ranks2 = venuesgraph.pageRank(0.0001).vertices

### 4) PartitioningStrategy = RandomVertexCut

In [None]:
venuesgraph.partitionBy(PartitionStrategy.RandomVertexCut)

In [None]:
val ranks2 = venuesgraph.pageRank(0.0001).vertices

In [78]:
val ranksByUsername = venuesRDD.join(ranks2).map{
  case (x,(y,z)) => (y.Name, z)
}
.sortBy(_._2,ascending=false)

ranksByUsername: org.apache.spark.rdd.RDD[(String, Double)] = MapPartitionsRDD[3127] at sortBy at <console>:52


In [79]:
println(ranksByUsername.collect().mkString("\n"))

(Kızılay Square (Kızılay Meydanı),13.599011290335826)
(ANKAmall,8.987377808294202)
(Kızılay AVM,6.220624864775678)
(Kahveci Hacıbaba,6.110454062983242)
(Güvenpark,5.9162318606960635)
(Ezgi Cafe,4.997005490396357)
(Seğmenler Park (Seğmenler Parkı),4.723452796375306)
(Kentpark,4.695524529050263)
(Kuğulu Park,4.184787966983499)
(Cepa,3.7242873091357063)
(Özsüt,3.568926305450556)
(Starbucks,3.568926305450556)
(Cinemaximum,3.568926305450556)
(Armada,3.47051621675922)
(Timboo Cafe,3.408143591428786)
(Starbucks,3.3908664922666434)
(IF Performance Hall,2.9872315463589922)
(Brasserie Bomonti,2.7468999206193314)
(Prestige Cinema,2.598862531486412)
(Passage,2.5238787103009153)
(Timboo Cafe,2.4575423267767196)
(Cinemaximum,2.375578341355078)
(Devrez,2.3198514379890938)
(Starbucks,2.313312289061767)
(HD İskender,2.30588720342222)
(Starbucks,2.234574316719174)
(Mado,2.121430792538547)
(Blackk Ankara,1.7319641717211292)
(Karum,1.7088269412695063)
(Leman Kültür,1.6376355436433265)
(Mickey's by Las Chi

Reset css and font defaults in:
/home/halilberkdergi/.jupyter/custom &
/home/halilberkdergi/.local/share/jupyter/nbextensions

