# Example 2 - motif finding

In [4]:
from graphframes import GraphFrame

In [5]:
v = spark.createDataFrame([
	("a", "Alice", 34), 
	("b", "Bob", 36), 
	("c", "Charlie", 30), 
	("d", "David", 29), 
	("e", "Esther", 32), 
	("f", "Fanny", 36), 
	("g", "Gabby", 60) 
], ["id", "name", "age"])

In [6]:
e = spark.createDataFrame([ 
	("a", "b", "friend"), 
	("b", "c", "follow"), 
	("c", "b", "follow"), 
	("f", "c", "follow"), 
	("e", "f", "follow"), 
	("e", "d", "friend"), 
	("d", "a", "friend"), 
	("a", "e", "friend") 
], ["src", "dst", "relationship"]) 

In [7]:
g1 = GraphFrame(v, e)

## Find chains of 4 users in which at least 2 of the 3 edges are 'friend'

In [15]:
fourInARow = g1.find("(v1)-[e1]->(v2); (v2)-[e2]->(v3); (v3)-[e3]->(v4)")
fourInARow.show()

                                                                                

+----------------+--------------+----------------+--------------+----------------+--------------+----------------+
|              v1|            e1|              v2|            e2|              v3|            e3|              v4|
+----------------+--------------+----------------+--------------+----------------+--------------+----------------+
|  [d, David, 29]|[d, a, friend]|  [a, Alice, 34]|[a, e, friend]| [e, Esther, 32]|[e, f, follow]|  [f, Fanny, 36]|
| [e, Esther, 32]|[e, d, friend]|  [d, David, 29]|[d, a, friend]|  [a, Alice, 34]|[a, e, friend]| [e, Esther, 32]|
|  [d, David, 29]|[d, a, friend]|  [a, Alice, 34]|[a, e, friend]| [e, Esther, 32]|[e, d, friend]|  [d, David, 29]|
|  [a, Alice, 34]|[a, e, friend]| [e, Esther, 32]|[e, f, follow]|  [f, Fanny, 36]|[f, c, follow]|[c, Charlie, 30]|
|  [d, David, 29]|[d, a, friend]|  [a, Alice, 34]|[a, b, friend]|    [b, Bob, 36]|[b, c, follow]|[c, Charlie, 30]|
|  [f, Fanny, 36]|[f, c, follow]|[c, Charlie, 30]|[c, b, follow]|    [b, Bob, 36

In [18]:
# Possibility 1:
def cond2Of3(e1, e2, e3):
    a = (e1.relationship == "friend")
    b = (e2.relationship == "friend")
    c = (e3.relationship == "friend")
    
    return (int(a)+int(b)+int(c) >= 2)

from pyspark.sql.types import BooleanType
from pyspark.sql.functions import udf

#udf(cond2Of3, BooleanType())
spark.udf.register('cond', cond2Of3, BooleanType())

fourInARow.filter('cond(e1, e2, e3)').show()

22/12/17 10:08:50 WARN analysis.SimpleFunctionRegistry: The function cond replaced a previously registered function.
                                                                                

+---------------+--------------+---------------+--------------+---------------+--------------+----------------+
|             v1|            e1|             v2|            e2|             v3|            e3|              v4|
+---------------+--------------+---------------+--------------+---------------+--------------+----------------+
| [d, David, 29]|[d, a, friend]| [a, Alice, 34]|[a, e, friend]|[e, Esther, 32]|[e, f, follow]|  [f, Fanny, 36]|
|[e, Esther, 32]|[e, d, friend]| [d, David, 29]|[d, a, friend]| [a, Alice, 34]|[a, e, friend]| [e, Esther, 32]|
| [d, David, 29]|[d, a, friend]| [a, Alice, 34]|[a, e, friend]|[e, Esther, 32]|[e, d, friend]|  [d, David, 29]|
| [d, David, 29]|[d, a, friend]| [a, Alice, 34]|[a, b, friend]|   [b, Bob, 36]|[b, c, follow]|[c, Charlie, 30]|
|[e, Esther, 32]|[e, d, friend]| [d, David, 29]|[d, a, friend]| [a, Alice, 34]|[a, b, friend]|    [b, Bob, 36]|
| [a, Alice, 34]|[a, e, friend]|[e, Esther, 32]|[e, d, friend]| [d, David, 29]|[d, a, friend]|  [a, Alic

In [16]:
# Solution on the slides:

# Define a function returning true if at least 2 of 3
# "relationship" labels are "friend"
def condition(e1,e2,e3): 
    first=(e1["relationship"]== "friend")
    second=(e2["relationship"]== "friend")
    third=(e3["relationship"]== "friend")
	
    return (int(first) + int(second) + int(third) >= 2)
 
from pyspark.sql.types import BooleanType
from pyspark.sql.functions import udf 

# Define UDF
conditionUDF = udf(condition,BooleanType()) 

chainWith2Friends = \
        fourInARow.filter(conditionUDF(fourInARow.e1, fourInARow.e2, fourInARow.e3))

chainWith2Friends.show()

                                                                                

+---------------+--------------+---------------+--------------+---------------+--------------+----------------+
|             v1|            e1|             v2|            e2|             v3|            e3|              v4|
+---------------+--------------+---------------+--------------+---------------+--------------+----------------+
| [d, David, 29]|[d, a, friend]| [a, Alice, 34]|[a, e, friend]|[e, Esther, 32]|[e, f, follow]|  [f, Fanny, 36]|
|[e, Esther, 32]|[e, d, friend]| [d, David, 29]|[d, a, friend]| [a, Alice, 34]|[a, e, friend]| [e, Esther, 32]|
| [d, David, 29]|[d, a, friend]| [a, Alice, 34]|[a, e, friend]|[e, Esther, 32]|[e, d, friend]|  [d, David, 29]|
| [d, David, 29]|[d, a, friend]| [a, Alice, 34]|[a, b, friend]|   [b, Bob, 36]|[b, c, follow]|[c, Charlie, 30]|
|[e, Esther, 32]|[e, d, friend]| [d, David, 29]|[d, a, friend]| [a, Alice, 34]|[a, b, friend]|    [b, Bob, 36]|
| [a, Alice, 34]|[a, e, friend]|[e, Esther, 32]|[e, d, friend]| [d, David, 29]|[d, a, friend]|  [a, Alic