### 1. Create a product map

In [22]:
import pandas as pd
from mlxtend.frequent_patterns import association_rules
from mlxtend.frequent_patterns import apriori

In [2]:
df_product = pd.read_csv('./tc_product.csv')

In [3]:
product_map = dict()
for i, row in df_product[['product_ID', 'product_name']].iterrows():
    product_map[row['product_name']] = row['product_ID']

In [4]:
product_map

{'綜合巧克力禮盒(小盒)': 1,
 '柴燒黑糖巧克力': 2,
 '100%無糖巧克力磚': 3,
 '95%巧克力磚': 4,
 '85%巧克力磚': 5,
 '75%巧克力磚': 6,
 '65%巧克力磚': 7,
 '紅藜巧克力BAR': 8,
 '牛奶巧克力BAR': 9,
 '洛神花巧克力BAR': 10,
 '杏仁巧克力豆': 11,
 '天然可可豆茶': 12,
 '95%黑巧克力冰淇淋': 13,
 '烘培可可豆': 14,
 '杏仁可可瓦片': 15,
 '海鹽巧克力BAR': 16,
 '花生巧克力BAR': 17,
 '抹茶拿鐵生巧克力': 18,
 '經典生巧克力': 19,
 '綜合巧克力禮盒(大盒)': 20,
 '65%巧克力禮盒': 21,
 '75%巧克力禮盒': 22,
 '85%巧克力禮盒': 23,
 '95%巧克力禮盒': 24,
 '草莓季限定款巧克力禮盒': 25}

### 2. Insert Hot Commodity into MongoDB

In [5]:
df_data = pd.read_csv('./TC_Data_processed.csv')

In [6]:
def sum_to_boolean(x):
    if x <= 0:
        return 0
    else:
        return 1
df_data = df_data.applymap(sum_to_boolean)

In [7]:
df_data

Unnamed: 0,綜合巧克力禮盒(小盒),柴燒黑糖巧克力,100%無糖巧克力磚,95%巧克力磚,85%巧克力磚,75%巧克力磚,65%巧克力磚,紅藜巧克力BAR,牛奶巧克力BAR,洛神花巧克力BAR,...,海鹽巧克力BAR,花生巧克力BAR,抹茶拿鐵生巧克力,經典生巧克力,65%巧克力禮盒,75%巧克力禮盒,85%巧克力禮盒,95%巧克力禮盒,草莓季限定款巧克力禮盒,綜合巧克力禮盒(大盒)
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
1,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,1,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
281,1,0,1,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
282,0,0,0,0,0,0,0,1,1,1,...,0,0,0,0,0,0,0,0,0,0
283,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
284,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [8]:
product_count = df_data.sum()

In [9]:
product_count

綜合巧克力禮盒(小盒)     22
柴燒黑糖巧克力          8
100%無糖巧克力磚      48
95%巧克力磚         30
85%巧克力磚         44
75%巧克力磚         40
65%巧克力磚         15
紅藜巧克力BAR         5
牛奶巧克力BAR        18
洛神花巧克力BAR       10
杏仁巧克力豆          87
天然可可豆茶          18
95%黑巧克力冰淇淋      11
烘培可可豆            1
杏仁可可瓦片          35
海鹽巧克力BAR        31
花生巧克力BAR        12
抹茶拿鐵生巧克力        27
經典生巧克力          56
65%巧克力禮盒         8
75%巧克力禮盒         4
85%巧克力禮盒         8
95%巧克力禮盒         5
草莓季限定款巧克力禮盒    150
綜合巧克力禮盒(大盒)     10
dtype: int64

In [10]:
product_count.index = product_count.index.map(product_map)

In [11]:
product_count

1      22
2       8
3      48
4      30
5      44
6      40
7      15
8       5
9      18
10     10
11     87
12     18
13     11
14      1
15     35
16     31
17     12
18     27
19     56
21      8
22      4
23      8
24      5
25    150
20     10
dtype: int64

In [12]:
import pymongo

myclient = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = myclient["cocoa"]
my_hot = mydb["hot_commodity"]

In [13]:
for index, amount in product_count.items():
    doc = dict()
    doc['product_id'] = index
    doc['amount'] = amount
    x = my_hot.insert_one(doc)

In [14]:
for stuff in my_hot.find():
    print(stuff)

{'_id': ObjectId('621c8f935b6812c63c9423fc'), 'product_id': 1, 'amount': 22}
{'_id': ObjectId('621c8f935b6812c63c9423fd'), 'product_id': 2, 'amount': 8}
{'_id': ObjectId('621c8f935b6812c63c9423fe'), 'product_id': 3, 'amount': 48}
{'_id': ObjectId('621c8f935b6812c63c9423ff'), 'product_id': 4, 'amount': 30}
{'_id': ObjectId('621c8f935b6812c63c942400'), 'product_id': 5, 'amount': 44}
{'_id': ObjectId('621c8f935b6812c63c942401'), 'product_id': 6, 'amount': 40}
{'_id': ObjectId('621c8f935b6812c63c942402'), 'product_id': 7, 'amount': 15}
{'_id': ObjectId('621c8f935b6812c63c942403'), 'product_id': 8, 'amount': 5}
{'_id': ObjectId('621c8f935b6812c63c942404'), 'product_id': 9, 'amount': 18}
{'_id': ObjectId('621c8f935b6812c63c942405'), 'product_id': 10, 'amount': 10}
{'_id': ObjectId('621c8f935b6812c63c942406'), 'product_id': 11, 'amount': 87}
{'_id': ObjectId('621c8f935b6812c63c942407'), 'product_id': 12, 'amount': 18}
{'_id': ObjectId('621c8f935b6812c63c942408'), 'product_id': 13, 'amount': 1

### 3.if you like...(by Jaccard)

In [23]:
import pyasrule as rule
import numpy as np

##### __rebuild df

In [31]:
df_if_you_like = pd.read_csv('./TC_Data_processed.csv')
df_if_you_like.insert(loc=0, column='ID', value=range(1, 287), allow_duplicates=True)

In [34]:
all_transaction = []
for order_numer in df_if_you_like['ID']:
    tran_saction = []
    order = df_if_you_like.loc[order_numer-1]
    for inf in order.iteritems():
        if inf[1] > 0:
            tran_saction.append(list(inf))
    tran_saction.remove(tran_saction[0])
    # print(tran_saction)
    one_of_transaction = []
    for detail in tran_saction:
        item = []
        copy_number = int(detail[1])
        if detail[1] == 1:
            item.append(order_numer)
            item.append(detail[0])
            one_of_transaction.append(item)
        else:
            item.append(order_numer)
            item.append(detail[0])
            count = [item] * copy_number
            for idx in count:
                one_of_transaction.append(idx)
    # print(one_of_transaction)
    for ordertransaction in one_of_transaction:
        all_transaction.append(ordertransaction)

In [35]:
df_if_you_like = pd.DataFrame(all_transaction, columns = ['Transaction', 'Item'])

In [36]:
df_if_you_like.head()

Unnamed: 0,Transaction,Item
0,1,杏仁巧克力豆
1,1,草莓季限定款巧克力禮盒
2,2,杏仁巧克力豆
3,2,杏仁巧克力豆
4,2,杏仁巧克力豆


##### __rebuild df finish

In [20]:
items=df_if_you_like['Item'].unique()

In [21]:
items

array(['杏仁巧克力豆', '草莓季限定款巧克力禮盒', '海鹽巧克力BAR', '牛奶巧克力BAR', '抹茶拿鐵生巧克力',
       '經典生巧克力', '85%巧克力禮盒', '綜合巧克力禮盒(大盒)', '烘培可可豆', '綜合巧克力禮盒(小盒)',
       '花生巧克力BAR', '天然可可豆茶', '杏仁可可瓦片', '75%巧克力禮盒', '95%黑巧克力冰淇淋',
       '65%巧克力禮盒', '紅藜巧克力BAR', '95%巧克力磚', '85%巧克力磚', '75%巧克力磚', '65%巧克力磚',
       '100%無糖巧克力磚', '洛神花巧克力BAR', '95%巧克力禮盒', '柴燒黑糖巧克力'], dtype=object)

In [22]:
arule = rule.AssociationRules()
arule.generateRules(transaction_df = df_if_you_like, item_col = "Item", transaction_id = "Transaction")
result = arule.market_basket_df

In [23]:
result.head()

Unnamed: 0,Antecedent,Consequent,Support_Both,Support_Antecedent,Support_Consequent,Confidence,Lift,Information_Gain,Jaccard,Cosine
0,100%無糖巧克力磚,65%巧克力磚,4,48,15,0.083333,1.588889,0.002475,0.067797,0.149071
1,75%巧克力磚,65%巧克力磚,9,40,15,0.225,4.29,0.046835,0.195652,0.367423
2,85%巧克力磚,65%巧克力磚,7,44,15,0.159091,3.033333,0.022182,0.134615,0.272475
3,95%巧克力磚,65%巧克力磚,2,30,15,0.066667,1.271111,0.000321,0.046512,0.094281
4,95%黑巧克力冰淇淋,65%巧克力磚,1,11,15,0.090909,1.733333,0.000718,0.04,0.07785


In [24]:
metrics = ["Confidence", "Lift", "Cosine", "Jaccard", "Information_Gain"]
features = ["Antecedent", "Consequent", "Support_Both", "Support_Antecedent", "Support_Consequent"]

In [25]:
myclient = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = myclient["cocoa"]
my_like = mydb["if_you_like"]

In [26]:
for item_name in items:
    if_you_like = dict()
    a = result.loc[result["Antecedent"] == item_name,:].sort_values(by = "Jaccard",\
                                                           ascending = False).head(5)\
                                                           [features + ["Jaccard"]]
    product_id = product_map[item_name]
    if_you_like['product_id'] = product_id
    if_you_like['collaborative_filtering_list']=  [product_map[item] for item in list(a['Consequent'])]
    x = my_like.insert_one(if_you_like)

In [27]:
for i in my_like.find():
    print(i)

{'_id': ObjectId('621c8f935b6812c63c942416'), 'product_id': 11, 'collaborative_filtering_list': [25, 15, 19, 16, 18]}
{'_id': ObjectId('621c8f935b6812c63c942417'), 'product_id': 25, 'collaborative_filtering_list': [11, 19, 16, 18, 15]}
{'_id': ObjectId('621c8f935b6812c63c942418'), 'product_id': 16, 'collaborative_filtering_list': [10, 11, 9, 15, 5]}
{'_id': ObjectId('621c8f935b6812c63c942419'), 'product_id': 9, 'collaborative_filtering_list': [16, 6, 15, 11, 12]}
{'_id': ObjectId('621c8f935b6812c63c94241a'), 'product_id': 18, 'collaborative_filtering_list': [19, 11, 6, 15, 16]}
{'_id': ObjectId('621c8f935b6812c63c94241b'), 'product_id': 19, 'collaborative_filtering_list': [18, 11, 5, 6, 15]}
{'_id': ObjectId('621c8f935b6812c63c94241c'), 'product_id': 23, 'collaborative_filtering_list': [24, 22, 20, 8, 18]}
{'_id': ObjectId('621c8f935b6812c63c94241d'), 'product_id': 20, 'collaborative_filtering_list': [23, 22, 24, 18, 16]}
{'_id': ObjectId('621c8f935b6812c63c94241e'), 'product_id': 14, 

### 4.apriori (by lift)

In [28]:
df_product_combo = pd.read_csv('./TC_Data_processed.csv')

In [29]:
df_product_combo

Unnamed: 0,綜合巧克力禮盒(小盒),柴燒黑糖巧克力,100%無糖巧克力磚,95%巧克力磚,85%巧克力磚,75%巧克力磚,65%巧克力磚,紅藜巧克力BAR,牛奶巧克力BAR,洛神花巧克力BAR,...,海鹽巧克力BAR,花生巧克力BAR,抹茶拿鐵生巧克力,經典生巧克力,65%巧克力禮盒,75%巧克力禮盒,85%巧克力禮盒,95%巧克力禮盒,草莓季限定款巧克力禮盒,綜合巧克力禮盒(大盒)
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
1,0,0,0,0,0,0,0,0,0,0,...,3,0,0,0,0,0,0,0,1,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,2,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,3,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
281,2,0,2,2,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
282,0,0,0,0,0,0,0,1,1,1,...,0,0,0,0,0,0,0,0,0,0
283,0,0,3,0,0,0,0,0,0,0,...,0,0,0,0,0,6,0,0,0,0
284,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [30]:
def sum_to_boolean(x):
    if x <= 0:
        return 0
    else:
        return 1
df_product_combo_boolean = df_product_combo.applymap(sum_to_boolean)

In [31]:
frequent_itemsets = apriori(df_product_combo_boolean, min_support = 0.06, use_colnames = True)
frequent_itemsets.sort_values(by=['support'], inplace=True, ascending=False)

In [32]:
a_rules = association_rules(frequent_itemsets, metric = "lift", min_threshold = 1)
a_rules.sort_values('lift', inplace=True, ascending = False)

In [33]:
apriori = a_rules.drop_duplicates(subset=['leverage'])

In [34]:
a_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
4,(75%巧克力磚),(85%巧克力磚),0.13986,0.153846,0.083916,0.6,3.9,0.062399,2.115385
5,(85%巧克力磚),(75%巧克力磚),0.153846,0.13986,0.083916,0.545455,3.9,0.062399,1.892308
6,(杏仁可可瓦片),(杏仁巧克力豆),0.122378,0.304196,0.08042,0.657143,2.160263,0.043193,2.029429
7,(杏仁巧克力豆),(杏仁可可瓦片),0.304196,0.122378,0.08042,0.264368,2.160263,0.043193,1.193018
2,(經典生巧克力),(杏仁巧克力豆),0.195804,0.304196,0.083916,0.428571,1.408867,0.024353,1.217657
3,(杏仁巧克力豆),(經典生巧克力),0.304196,0.195804,0.083916,0.275862,1.408867,0.024353,1.110556
0,(草莓季限定款巧克力禮盒),(杏仁巧克力豆),0.524476,0.304196,0.164336,0.313333,1.030038,0.004792,1.013307
1,(杏仁巧克力豆),(草莓季限定款巧克力禮盒),0.304196,0.524476,0.164336,0.54023,1.030038,0.004792,1.034266


In [35]:
myclient = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = myclient["cocoa"]
my_apriori = mydb["apriori"]

In [36]:
for i, row in apriori.iterrows():
    doc = dict()
    doc['antecedents'] = ', '.join(list(row['antecedents']))
    doc['consequents'] = ', '.join(list(row['consequents']))
    doc['antecedent_support'] = row['antecedent support']
    doc['consequent_support'] = row['consequent support']
    doc['lift'] = row['lift']
    doc['support'] = row['support']
    doc['leverage'] = row['leverage']
    doc['conviction'] = row['conviction']
    my_apriori.insert_one(doc)

In [37]:
for i in my_apriori.find():
    print(i)

{'_id': ObjectId('621c8f945b6812c63c942430'), 'antecedents': '75%巧克力磚', 'consequents': '85%巧克力磚', 'antecedent_support': 0.13986013986013987, 'consequent_support': 0.15384615384615385, 'lift': 3.8999999999999995, 'support': 0.08391608391608392, 'leverage': 0.062399139322216246, 'conviction': 2.1153846153846154}
{'_id': ObjectId('621c8f945b6812c63c942431'), 'antecedents': '杏仁可可瓦片', 'consequents': '杏仁巧克力豆', 'antecedent_support': 0.12237762237762238, 'consequent_support': 0.3041958041958042, 'lift': 2.160262725779967, 'support': 0.08041958041958042, 'leverage': 0.04319282116484913, 'conviction': 2.0294289044289044}
{'_id': ObjectId('621c8f945b6812c63c942432'), 'antecedents': '經典生巧克力', 'consequents': '杏仁巧克力豆', 'antecedent_support': 0.1958041958041958, 'consequent_support': 0.3041958041958042, 'lift': 1.4088669950738917, 'support': 0.08391608391608392, 'leverage': 0.02435326910851386, 'conviction': 1.2176573426573427}
{'_id': ObjectId('621c8f945b6812c63c942433'), 'antecedents': '草莓季限定款巧克力禮盒'