In [1]:
import findspark
findspark.init("D:/Anaconda3/Lib/site-packages/pyspark")

from pyspark import SparkContext
import pyspark
import os

#print("test")
SUBMIT_ARGS = "--packages graphframes:graphframes:0.7.0-spark2.4-s_2.11 pyspark-shell"
os.environ["PYSPARK_SUBMIT_ARGS"] = SUBMIT_ARGS
conf = pyspark.SparkConf()
sc = pyspark.SparkContext(conf=conf)
# print(sc._conf.getAll())

from graphframes import *
#import sys
#pyfiles = str(sc.getConf().get(u'spark.submit.pyFiles')).split(',')
#sys.path.extend(pyfiles)

# init data

In [2]:
from pyspark import *
from pyspark.sql import *
spark = SparkSession.builder.appName('test').getOrCreate()
vertices = spark.createDataFrame([('1', 'Tom'),
                                  ('2', 'Jerry'),
                                  ('3', 'Andy'),
                                  ('4', 'Jeff'),
                                  ('5', 'Hanks'),
                                  ('6', 'Ben'),
                                  ('7', 'David')],
                                 ['id', 'name'])
edges = spark.createDataFrame([('1', '2', 'friend'),
                               ('2', '1', 'friend'),
                               ('3', '1', 'friend'),
                               ('1', '3', 'friend'),
                               ('2', '3', 'follows'),
                               ('3', '4', 'friend'),
                               ('4', '3', 'friend'),
                               ('5', '3', 'friend'),
                               ('3', '5', 'friend'),
                               ('4', '5', 'follows'),
                               ('6', '7', 'friend'),
                               ('7', '6', 'friend')],
                              ['src', 'dst', 'type'])
g = GraphFrame(vertices, edges)


In [11]:
# View the graphframe
g.vertices.show()
g.edges.show()
# Check the number of edges of each vertex
g.degrees.show()
g.inDegrees.show()

+---+-----+
| id| name|
+---+-----+
|  1|  Tom|
|  2|Jerry|
|  3| Andy|
|  4| Jeff|
|  5|Hanks|
|  6|  Ben|
|  7|David|
+---+-----+

+---+---+-------+
|src|dst|   type|
+---+---+-------+
|  1|  2| friend|
|  2|  1| friend|
|  3|  1| friend|
|  1|  3| friend|
|  2|  3|follows|
|  3|  4| friend|
|  4|  3| friend|
|  5|  3| friend|
|  3|  5| friend|
|  4|  5|follows|
|  6|  7| friend|
|  7|  6| friend|
+---+---+-------+

+---+------+
| id|degree|
+---+------+
|  7|     2|
|  3|     7|
|  5|     3|
|  6|     2|
|  1|     4|
|  4|     3|
|  2|     3|
+---+------+

+---+--------+
| id|inDegree|
+---+--------+
|  7|       1|
|  3|       4|
|  5|       2|
|  6|       1|
|  1|       2|
|  4|       1|
|  2|       1|
+---+--------+



In [3]:
g.inDegrees.filter("inDegree >= 2").sort("inDegree", ascending=False).show()
#g.edges.filter('type == "friend"').show()

+---+--------+
| id|inDegree|
+---+--------+
|  3|       4|
|  5|       2|
|  1|       2|
+---+--------+



 # Functions

In [9]:
result = g.labelPropagation(maxIter=5)
result.select("id", "label").sort('label').show()

+---+------------+
| id|       label|
+---+------------+
|  6| 25769803776|
|  1|154618822656|
|  4|154618822656|
|  5|154618822656|
|  2|420906795008|
|  3|420906795008|
|  7|644245094400|
+---+------------+



In [10]:
cc = g.stronglyConnectedComponents(maxIter=10)
cc.select("id", "component").orderBy("component").show()

+---+------------+
| id|   component|
+---+------------+
|  6| 25769803776|
|  7| 25769803776|
|  2|154618822656|
|  1|154618822656|
|  4|154618822656|
|  3|154618822656|
|  5|154618822656|
+---+------------+



In [12]:
results = g.shortestPaths(landmarks=["2","6"])
results.select("id", "distances").show()

+---+---------+
| id|distances|
+---+---------+
|  1| [2 -> 1]|
|  3| [2 -> 2]|
|  2| [2 -> 0]|
|  4| [2 -> 3]|
|  7| [6 -> 1]|
|  6| [6 -> 0]|
|  5| [2 -> 3]|
+---+---------+



In [13]:
g.find("(a)-[e]->(b); (b)-[e2]->(a)").show()

+----------+--------------+----------+--------------+
|         a|             e|         b|            e2|
+----------+--------------+----------+--------------+
|  [6, Ben]|[6, 7, friend]|[7, David]|[7, 6, friend]|
|[2, Jerry]|[2, 1, friend]|  [1, Tom]|[1, 2, friend]|
| [3, Andy]|[3, 5, friend]|[5, Hanks]|[5, 3, friend]|
|  [1, Tom]|[1, 3, friend]| [3, Andy]|[3, 1, friend]|
| [3, Andy]|[3, 1, friend]|  [1, Tom]|[1, 3, friend]|
|[7, David]|[7, 6, friend]|  [6, Ben]|[6, 7, friend]|
|[5, Hanks]|[5, 3, friend]| [3, Andy]|[3, 5, friend]|
| [4, Jeff]|[4, 3, friend]| [3, Andy]|[3, 4, friend]|
|  [1, Tom]|[1, 2, friend]|[2, Jerry]|[2, 1, friend]|
| [3, Andy]|[3, 4, friend]| [4, Jeff]|[4, 3, friend]|
+----------+--------------+----------+--------------+



In [14]:
mutualFriends = g.find("(a)-[]->(b); (b)-[]->(c); (c)-[]->(b); (b)-[]->(a)")\
    .filter('a!=c')\
    .dropDuplicates()
mutualFriends.show()

+----------+---------+----------+
|         a|        b|         c|
+----------+---------+----------+
|  [1, Tom]|[3, Andy]|[5, Hanks]|
| [3, Andy]| [1, Tom]|[2, Jerry]|
| [4, Jeff]|[3, Andy]|[5, Hanks]|
|[5, Hanks]|[3, Andy]| [4, Jeff]|
|[2, Jerry]| [1, Tom]| [3, Andy]|
|[5, Hanks]|[3, Andy]|  [1, Tom]|
| [4, Jeff]|[3, Andy]|  [1, Tom]|
|  [1, Tom]|[3, Andy]| [4, Jeff]|
+----------+---------+----------+



In [15]:
g.triangleCount().sort('id').show()

+-----+---+-----+
|count| id| name|
+-----+---+-----+
|    1|  1|  Tom|
|    1|  2|Jerry|
|    2|  3| Andy|
|    1|  4| Jeff|
|    1|  5|Hanks|
|    0|  6|  Ben|
|    0|  7|David|
+-----+---+-----+



In [16]:
pr = g.pageRank(resetProbability=0.15, tol=0.01)
pr.vertices.show()
pr.edges.show()

+---+-----+------------------+
| id| name|          pagerank|
+---+-----+------------------+
|  1|  Tom|0.9055074972891306|
|  3| Andy|1.8539196427388125|
|  2|Jerry| 0.537796799947492|
|  4| Jeff|0.6873519241384105|
|  7|David|1.0225331112091935|
|  6|  Ben|1.0225331112091935|
|  5|Hanks|0.9703579134677661|
+---+-----+------------------+

+---+---+-------+------------------+
|src|dst|   type|            weight|
+---+---+-------+------------------+
|  1|  2| friend|               0.5|
|  1|  3| friend|               0.5|
|  7|  6| friend|               1.0|
|  4|  5|follows|               0.5|
|  5|  3| friend|               1.0|
|  3|  5| friend|0.3333333333333333|
|  4|  3| friend|               0.5|
|  2|  1| friend|               0.5|
|  3|  4| friend|0.3333333333333333|
|  6|  7| friend|               1.0|
|  3|  1| friend|0.3333333333333333|
|  2|  3|follows|               0.5|
+---+---+-------+------------------+

