### 01 创建SparkSession对象的同时，加载graphframes库

In [1]:
import findspark
findspark.init()
from pyspark.sql import SparkSession
spark = SparkSession.builder \
                    .config('spark.jars.packages', 'graphframes:graphframes:0.8.2-spark3.0-s_2.12') \
                    .getOrCreate()

### 02 通过读入文件social-nodes.csv创建顶点DataFrame

In [2]:
df_V = spark.read.csv('social-nodes.csv', header=True, inferSchema = True)
df_V.show()

+-------+
|     id|
+-------+
|  Alice|
|Bridget|
|Charles|
|   Doug|
|   Mark|
|Michael|
|  David|
|    Amy|
|  James|
+-------+



### 03 通过读入文件social-relationships.csv创建边DataFrame

In [3]:
df_E=spark.read.csv('./social-relationships.csv',header=True,inferSchema=True)
df_E.show()

+-------+-------+------------+
|    src|    dst|relationship|
+-------+-------+------------+
|  Alice|Bridget|     FOLLOWS|
|  Alice|Charles|     FOLLOWS|
|   Mark|   Doug|     FOLLOWS|
|Bridget|Michael|     FOLLOWS|
|   Doug|   Mark|     FOLLOWS|
|Michael|  Alice|     FOLLOWS|
|  Alice|Michael|     FOLLOWS|
|Bridget|  Alice|     FOLLOWS|
|Michael|Bridget|     FOLLOWS|
|Charles|   Doug|     FOLLOWS|
|Bridget|   Doug|     FOLLOWS|
|Michael|   Doug|     FOLLOWS|
|  Alice|   Doug|     FOLLOWS|
|   Mark|  Alice|     FOLLOWS|
|  David|    Amy|     FOLLOWS|
|  James|  David|     FOLLOWS|
+-------+-------+------------+



### 04 引入GraphFram模块，并根据顶点和边DataFrame创建GraphFrame图对象

In [7]:
from graphframes import GraphFrame
graph=GraphFrame(df_V,df_E)
type(graph)

graphframes.graphframe.GraphFrame

### 05 分别查看图对象的顶点和边视图

In [8]:
graph.vertices.show()

+-------+
|     id|
+-------+
|  Alice|
|Bridget|
|Charles|
|   Doug|
|   Mark|
|Michael|
|  David|
|    Amy|
|  James|
+-------+



In [9]:
graph.edges.show()

+-------+-------+------------+
|    src|    dst|relationship|
+-------+-------+------------+
|  Alice|Bridget|     FOLLOWS|
|  Alice|Charles|     FOLLOWS|
|   Mark|   Doug|     FOLLOWS|
|Bridget|Michael|     FOLLOWS|
|   Doug|   Mark|     FOLLOWS|
|Michael|  Alice|     FOLLOWS|
|  Alice|Michael|     FOLLOWS|
|Bridget|  Alice|     FOLLOWS|
|Michael|Bridget|     FOLLOWS|
|Charles|   Doug|     FOLLOWS|
|Bridget|   Doug|     FOLLOWS|
|Michael|   Doug|     FOLLOWS|
|  Alice|   Doug|     FOLLOWS|
|   Mark|  Alice|     FOLLOWS|
|  David|    Amy|     FOLLOWS|
|  James|  David|     FOLLOWS|
+-------+-------+------------+



### 06 查找图中相互关注的人

In [13]:
df_find=graph.find('(v1)-[]->(v2);(v2)-[]->(v1)')
df_find.show()

+---------+---------+
|       v1|       v2|
+---------+---------+
|  [Alice]|[Bridget]|
|   [Mark]|   [Doug]|
|[Bridget]|[Michael]|
|   [Doug]|   [Mark]|
|[Michael]|  [Alice]|
|  [Alice]|[Michael]|
|[Bridget]|  [Alice]|
|[Michael]|[Bridget]|
+---------+---------+



In [18]:
#df_find.groupBy("v1").agg(df_find('v2')).show()

### 07 查看图中各个顶点的度、出度和入度

In [14]:
graph.degrees.show()

+-------+------+
|     id|degree|
+-------+------+
|  James|     1|
|   Doug|     6|
|    Amy|     1|
|Michael|     5|
|Charles|     2|
|   Mark|     3|
|  Alice|     7|
|Bridget|     5|
|  David|     2|
+-------+------+



In [15]:
graph.inDegrees.show()

+-------+--------+
|     id|inDegree|
+-------+--------+
|   Doug|       5|
|    Amy|       1|
|Michael|       2|
|Charles|       1|
|   Mark|       1|
|  Alice|       3|
|Bridget|       2|
|  David|       1|
+-------+--------+



In [16]:
graph.outDegrees.show()

+-------+---------+
|     id|outDegree|
+-------+---------+
|  James|        1|
|   Doug|        1|
|Michael|        3|
|Charles|        1|
|   Mark|        2|
|  Alice|        4|
|Bridget|        3|
|  David|        1|
+-------+---------+



### 08 将度、出度和入度DataFrame通过join联结起来

<img src='degrees.png' />

### 09 采用广度优先搜索（BFS）算法，找出距离David路径最短的人（不包含David本人）

In [None]:
graph.bfs("id='David'","id<>'David'").show(truncate=False)