## Spark Developer Training

**Manaranjan Pradhan**<br/>
**manaranjan@enablecloud.com**<br/>
*This notebook is given as part of Spark Training to Participants. Forwarding others is strictly prohibited.*

## Lab: Market Basket Analysis using Spark MLib

### Loading Groceries dataset

In [None]:
%python 

groceries_df = spark.read.text("/FileStore/tables/lab/groceries.csv")

In [None]:
groceries_df.show(truncate = False)

### Converting the baskets into a list of items

In [None]:
from pyspark.sql.functions import split, col

groceries_df = groceries_df.withColumn("items", split(col("value"),","))

In [None]:
groceries_df.show(20, False)

In [None]:
groceries_df.count()

### Running FPGrowth Algorithm

In [None]:
from pyspark.ml.fpm import FPGrowth

fpGrowth = FPGrowth(itemsCol="items", minSupport=0.001, minConfidence=0.6)
model = fpGrowth.fit(groceries_df)

### Finding Frequent Item Sets

In [None]:
itemSets = model.freqItemsets
itemSets.cache()
itemSets.createOrReplaceTempView("FrequentItemSets")

In [None]:
%sql 

select items, freq from FrequentItemSets where size(items) > 2 order by freq desc limit 20

items,freq
"List(root vegetables, other vegetables, whole milk)",228
"List(yogurt, other vegetables, whole milk)",219
"List(rolls/buns, other vegetables, whole milk)",176
"List(tropical fruit, other vegetables, whole milk)",168
"List(yogurt, rolls/buns, whole milk)",153
"List(tropical fruit, yogurt, whole milk)",149
"List(whipped/sour cream, other vegetables, whole milk)",144
"List(root vegetables, yogurt, whole milk)",143
"List(soda, other vegetables, whole milk)",137
"List(pip fruit, other vegetables, whole milk)",133


In [None]:
%sql 

select items, freq from FrequentItemSets where size(items) > 2 order by freq asc limit 20

items,freq
"List(canned vegetables, tropical fruit, root vegetables)",10
"List(pork, fruit/vegetable juice, tropical fruit, yogurt)",10
"List(canned vegetables, soda, whole milk)",10
"List(rice, citrus fruit, root vegetables)",10
"List(canned vegetables, root vegetables, other vegetables, whole milk)",10
"List(rice, frozen vegetables, whole milk)",10
"List(liver loaf, yogurt, whole milk)",10
"List(rice, tropical fruit, root vegetables)",10
"List(pork, whipped/sour cream, rolls/buns, whole milk)",10
"List(rice, butter, root vegetables)",10


### Finding Association Rules and Respective Metrics

In [None]:
associationRules = model.associationRules
associationRules.cache()
associationRules.createOrReplaceTempView("AssociationRules")

In [None]:
%sql

select * from AssociationRules

antecedent,consequent,confidence,lift
"List(butter, pork, root vegetables, other vegetables)",List(whole milk),0.9090909090909092,3.5578627500633067
"List(frozen vegetables, butter, other vegetables)",List(whole milk),0.6,2.3481894150417824
"List(cat food, hygiene articles)",List(whole milk),0.6,2.3481894150417824
"List(fruit/vegetable juice, sausage, tropical fruit)",List(whole milk),0.631578947368421,2.4717783316229287
"List(curd, whipped/sour cream, yogurt, other vegetables)",List(whole milk),0.6470588235294118,2.532361133868589
"List(butter, tropical fruit, root vegetables, yogurt)",List(other vegetables),0.6842105263157895,3.5361064247586915
"List(butter, tropical fruit, root vegetables, yogurt)",List(whole milk),0.8947368421052632,3.5016859697991496
"List(frozen fish, root vegetables)",List(whole milk),0.6923076923076923,2.7094493250482103
"List(oil, root vegetables, yogurt, other vegetables, whole milk)",List(tropical fruit),0.7142857142857143,6.807170542635659
"List(whipped/sour cream, citrus fruit, root vegetables, yogurt)",List(other vegetables),0.7058823529411765,3.64810979567865


### Finding top 10 rules for deployment based on Lift

In [None]:
%sql
select antecedent, consequent, confidence, lift from AssociationRules where lift > 1.0 order by lift desc limit 20

antecedent,consequent,confidence,lift
"List(Instant food products, soda)",List(hamburger meat),0.631578947368421,18.995654273297923
"List(popcorn, soda)",List(salty snack),0.631578947368421,16.697792869269946
"List(processed cheese, ham)",List(white bread),0.6333333333333333,15.04549114331723
"List(white bread, tropical fruit, yogurt, other vegetables)",List(butter),0.6666666666666666,12.03058103975535
"List(hamburger meat, whipped/sour cream, yogurt)",List(butter),0.625,11.278669724770642
"List(domestic eggs, tropical fruit, yogurt, other vegetables, whole milk)",List(butter),0.625,11.278669724770642
"List(liquor, red/blush wine)",List(bottled beer),0.9047619047619048,11.23526936026936
"List(sugar, butter, other vegetables)",List(whipped/sour cream),0.7142857142857143,9.9645390070922
"List(hard cheese, butter, whole milk)",List(whipped/sour cream),0.6666666666666666,9.300236406619383
"List(butter, fruit/vegetable juice, tropical fruit, other vegetables)",List(whipped/sour cream),0.6666666666666666,9.300236406619383
