## GraphFrame
![](./img/graph.jpg "Graph")

In [None]:
vertices = sparkSession.createDataFrame([
    ('1', 'Alex', 28, 'M', 'MIPT'),
    ('2', 'Emeli', 28, 'F', 'MIPT'),
    ('3', 'Natasha', 27, 'F', 'SPbSU'),
    ('4', 'Pavel', 30, 'M', 'MIPT'),
    ('5', 'Oleg', 35, 'M', 'MIPT'),
    ('6', 'Ivan', 30, 'M', 'MSU'),
    ('7', 'Ilya', 29, 'M', 'MSU')
],['id', 'name', 'age', 'gender', 'university'])

edges = sparkSession.createDataFrame([
    ('1', '2', 'friend'), ('2', '1', 'friend'),
    ('1', '3', 'friend'), ('3', '1', 'friend'),
    ('1', '4', 'friend'), ('4', '1', 'friend'),
    ('2', '3', 'friend'), ('3', '2', 'friend'),
    ('2', '5', 'friend'), ('5', '2', 'friend'),
    ('3', '4', 'friend'), ('4', '3', 'friend'),
    ('3', '5', 'friend'), ('5', '3', 'friend'),
    ('3', '6', 'friend'), ('6', '3', 'friend'),
    ('3', '7', 'friend'), ('7', '3', 'friend')
], ['src', 'dst', 'relationship'])

g = GraphFrame(vertices, edges)

In [None]:
# How many users in our graph have "age" > 30 ?
g.vertices.filter("age > 30")

# How many friends by all users ?
g.inDegrees.show()

# How many users have at least 2 friends ?
g.inDegrees.filter("inDegree > 2").show()

## Motif finding: DSL(Domain Specific Language)

![](./img/twoDir.jpg "2dir")

In [None]:
g.find("(a)-[e]->(b); (b)-[e2]->(a)")

### Edge
![](./img/edge.jpg "edge")

### Union
![](./img/union.jpg "Union")

### Not assign edge name
![](./img/noname.jpg "Not assign name")

### Not assign destination name
![](./img/nodest.jpg "Not assign destination")

### Not
![](./img/not.jpg "Not")

## Count the number of common friends in Graphframe

B = mutual friend of (A,C)
![](./img/1.jpg "1")

In [None]:
motifs = g.find("(a)-[]->(b); (b)-[]->(c)")

![](./img/2.jpg "2")

In [None]:
# Filter duplications a==c
motifs = g.find("(a)-[]->(b); (b)-[]->(c)").filter("a.id != c.id")

# Rename
AC = motifs.selectExpr("a.id as A", "c.id as C")

![](./img/3.jpg "3")

In [None]:
# Final result
AC.groupBy('A', 'C').count().filter('A = 1')

![](./img/4.jpg "4")