In [1]:
from pyspark.ml.fpm import FPGrowth
from pyspark.sql import SparkSession
spark = SparkSession\
    .builder\
    .master("local[*]")\
    .appName("FPGrowthExample")\
    .getOrCreate()

In [2]:
data=spark.read.csv("transactions.csv",header=True)

In [3]:
data.show()

+---+--------------------+
| id|                item|
+---+--------------------+
|  1|        citrus fruit|
|  1| semi-finished bread|
|  1|           margarine|
|  1|         ready soups|
|  2|      tropical fruit|
|  2|              yogurt|
|  2|              coffee|
|  3|          whole milk|
|  4|           pip fruit|
|  4|              yogurt|
|  4|       cream cheese |
|  4|        meat spreads|
|  5|    other vegetables|
|  5|          whole milk|
|  5|      condensed milk|
|  5|long life bakery ...|
|  6|          whole milk|
|  6|              butter|
|  6|              yogurt|
|  6|                rice|
+---+--------------------+
only showing top 20 rows



In [None]:
import pyspark.sql.functions as f
data1=data.groupby("id").agg(f.collect_list("item").alias("items"))
data1.show()

+----+--------------------+
|  id|               items|
+----+--------------------+
|1090|[beef, citrus fru...|
|1159|[citrus fruit, bo...|
|1436|[sausage, whole m...|
|1512|[sausage, fish, o...|
|1572|            [yogurt]|
|2069|[chicken, canned ...|
|2088|[sausage, ham, wh...|
|2136|[other vegetables...|
|2162|[tropical fruit, ...|
|2294|[beverages, rolls...|
|2904|[root vegetables,...|
| 296|[citrus fruit, pa...|
|3210|[other vegetables...|
|3414|[chicken, citrus ...|
|3606|[frozen vegetable...|
|3959|[beef, root veget...|
|4032|[liver loaf, citr...|
| 467|[other vegetables...|
|4821|[other vegetables...|
|4937|[frankfurter, sau...|
+----+--------------------+
only showing top 20 rows



In [19]:
fpGrowth = FPGrowth(itemsCol="items", minSupport=0.05, minConfidence=0.05)
model = fpGrowth.fit(data1)

In [20]:
model.freqItemsets.show()

+--------------------+----+
|               items|freq|
+--------------------+----+
|        [whole milk]|2513|
|  [other vegetables]|1903|
|[other vegetables...| 736|
|        [rolls/buns]|1809|
|[rolls/buns, whol...| 557|
|              [soda]|1715|
|            [yogurt]|1372|
|[yogurt, whole milk]| 551|
|     [bottled water]|1087|
|   [root vegetables]|1072|
|    [tropical fruit]|1032|
|     [shopping bags]| 969|
|           [sausage]| 924|
|            [pastry]| 875|
|      [citrus fruit]| 814|
|      [bottled beer]| 792|
|        [newspapers]| 785|
|       [canned beer]| 764|
|         [pip fruit]| 744|
|[fruit/vegetable ...| 711|
+--------------------+----+
only showing top 20 rows



In [21]:
model.associationRules.show()

+------------------+------------------+-------------------+------------------+
|        antecedent|        consequent|         confidence|              lift|
+------------------+------------------+-------------------+------------------+
|      [rolls/buns]|      [whole milk]|0.30790491984521834|1.2050317893663836|
|[other vegetables]|      [whole milk]|0.38675775091960063|1.5136340948246207|
|      [whole milk]|[other vegetables]|0.29287703939514526| 1.513634094824621|
|      [whole milk]|      [rolls/buns]| 0.2216474333465977|1.2050317893663838|
|      [whole milk]|          [yogurt]| 0.2192598487863112|1.5717351405345266|
|          [yogurt]|      [whole milk]|0.40160349854227406|1.5717351405345266|
+------------------+------------------+-------------------+------------------+



In [22]:
model.transform(data1).show()

+----+--------------------+--------------------+
|  id|               items|          prediction|
+----+--------------------+--------------------+
|1090|[beef, citrus fru...|            [yogurt]|
|1159|[citrus fruit, bo...|                  []|
|1436|[sausage, whole m...|[other vegetables...|
|1512|[sausage, fish, o...|        [whole milk]|
|1572|            [yogurt]|        [whole milk]|
|2069|[chicken, canned ...|                  []|
|2088|[sausage, ham, wh...|[other vegetables...|
|2136|[other vegetables...|            [yogurt]|
|2162|[tropical fruit, ...|        [whole milk]|
|2294|[beverages, rolls...|        [whole milk]|
|2904|[root vegetables,...|[other vegetables...|
| 296|[citrus fruit, pa...|        [whole milk]|
|3210|[other vegetables...|        [whole milk]|
|3414|[chicken, citrus ...|        [whole milk]|
|3606|[frozen vegetable...|                  []|
|3959|[beef, root veget...|                  []|
|4032|[liver loaf, citr...|        [whole milk]|
| 467|[other vegetab

In [23]:
spark.stop()