In [20]:
import pandas as pd

In [21]:
orders = pd.read_csv('orders.csv')

In [22]:
#  take is and pname columns from orders
data = orders[['id', 'pname']]

In [23]:
data.head()

Unnamed: 0,id,pname
0,305,Coke $;Sprite$;Coke Zero$;Coke 12 NP$;Fanta Fr...
1,309,Dasani Water Still$;Coke 24 RGB$;Fanta Pineapp...
2,310,Coke $;Fanta Orange$;Coke 12 NP$;Fanta Orange ...
3,311,Fanta Passion$;Fanta Pineapple $;Coke $;Fanta ...
4,313,Coke 24 RGB


In [24]:
data_small = data[:]

In [25]:
data_dict = data_small.to_dict(orient='records')

In [26]:
# split pname into words at '$;' into a list
for d in data_dict:
    d['pname'] = d['pname'].split('$;')

In [27]:
# replace pname spaces with underscores
for d in data_dict:
    d['pname'] = [x.strip().replace(' ', '_').lower() for x in d['pname']]

In [28]:
transactions = [entry['pname'] for entry in data_dict]

## Generate Association Rules
Use the Apriori algorithm to find frequent itemsets and generate association rules.

In [29]:
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder


In [30]:
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

In [31]:
# Apply Apriori algorithm
frequent_itemsets = apriori(df_encoded, min_support=.05, use_colnames=True)
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=.5)

In [32]:
# rules backup
rules_memory = rules.copy()

In [33]:
# Print the association rules
rules = rules.to_dict(orient='records')

In [34]:
rules = list(rules)

In [35]:
import json

data = {
    "name": "Root",
    "children": []
}
# Helper function to add children to a node
def add_child(parent, child_name, value):
    # Check if the child already exists
    for child in parent["children"]:
        if child["name"] == child_name:
            return child
    # Create a new child node if it does not exist
    new_child = {
        "name": child_name,
        "value": value,
        "children": []
    }
    parent["children"].append(new_child)
    return new_child

# Transform rules into hierarchical data
for rule in rules:
    antecedent = list(rule['antecedents'])[0]
    consequent = list(rule['consequents'])[0]
    lift = rule['lift']

    # Find or create antecedent node
    antecedent_node = next((child for child in data["children"] if child["name"] == antecedent), None)
    if not antecedent_node:
        antecedent_node = {"name": antecedent, "children": []}
        data["children"].append(antecedent_node)

    # Add consequent as a child of antecedent
    # Ensure the antecedent_node is up-to-date
    antecedent_node = add_child(data, antecedent, None)
    add_child(antecedent_node, consequent, lift)




In [36]:
rules_memory.head(-4)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(fanta_orange_24_rgb),(coke),0.528569,0.087870,0.057408,0.108610,1.236042,0.010963,1.023268,0.405077
1,(coke),(fanta_orange_24_rgb),0.087870,0.528569,0.057408,0.653333,1.236042,0.010963,1.359898,0.209363
2,(coke_12_np),(coke_24_rgb),0.153659,0.476748,0.060112,0.391202,0.820564,-0.013145,0.859484,-0.205325
3,(coke_24_rgb),(coke_12_np),0.476748,0.153659,0.060112,0.126087,0.820564,-0.013145,0.968450,-0.294739
4,(coke_12_np),(fanta_fruit_blast_12_np),0.153659,0.077686,0.055696,0.362463,4.665769,0.043759,1.446684,0.928317
...,...,...,...,...,...,...,...,...,...,...
1679,"(fanta_passion_24_rgb, fanta_orange_24_rgb)","(tangawizi_24_rgb, sprite_24_rgb, fanta_pineap...",0.244953,0.069845,0.063627,0.259750,3.718947,0.046518,1.256541,0.968293
1680,"(fanta_orange_24_rgb, coke_24_rgb)","(tangawizi_24_rgb, sprite_24_rgb, fanta_pineap...",0.408706,0.071738,0.063627,0.155678,2.170105,0.034307,1.099418,0.911886
1681,"(fanta_passion_24_rgb, coke_24_rgb)","(tangawizi_24_rgb, sprite_24_rgb, fanta_pineap...",0.207462,0.073450,0.063627,0.306690,4.175497,0.048388,1.336415,0.959585
1682,(tangawizi_24_rgb),"(fanta_pineapple_24_rgb, sprite_24_rgb, fanta_...",0.172495,0.120764,0.063627,0.368861,3.054389,0.042795,1.393094,0.812807


In [37]:
# save json to file
with open('products.json', 'w') as f:
    f.write(json.dumps(data, indent=4))