In [1]:
import pandas as pd

In [2]:
orders = pd.read_csv('orders.csv')

In [3]:
#  take is and pname columns from orders
data = orders[['id', 'pname']]

In [4]:
data.head()

Unnamed: 0,id,pname
0,305,Coke $;Sprite$;Coke Zero$;Coke 12 NP$;Fanta Fr...
1,309,Dasani Water Still$;Coke 24 RGB$;Fanta Pineapp...
2,310,Coke $;Fanta Orange$;Coke 12 NP$;Fanta Orange ...
3,311,Fanta Passion$;Fanta Pineapple $;Coke $;Fanta ...
4,313,Coke 24 RGB


In [5]:
data_small = data[:]

In [6]:
data_dict = data_small.to_dict(orient='records')

In [7]:
# split pname into words at '$;' into a list
for d in data_dict:
    d['pname'] = d['pname'].split('$;')

In [8]:
# replace pname spaces with underscores
for d in data_dict:
    d['pname'] = [x.strip().replace(' ', '_').lower() for x in d['pname']]

In [9]:
transactions = [entry['pname'] for entry in data_dict]

## Generate Association Rules
Use the Apriori algorithm to find frequent itemsets and generate association rules.

In [10]:
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder


In [11]:
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

In [12]:
# Apply Apriori algorithm
frequent_itemsets = apriori(df_encoded, min_support=.05, use_colnames=True)
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=.5)

In [13]:
# rules backup
rules_memory = rules.copy()

In [14]:
# Print the association rules
rules = rules.to_dict(orient='records')

In [15]:
rules = list(rules)

In [16]:
import json

data = {
    "name": "Root",
    "children": []
}
# Helper function to add children to a node
def add_child(parent, child_name, value):
    # Check if the child already exists
    for child in parent["children"]:
        if child["name"] == child_name:
            return child
    # Create a new child node if it does not exist
    new_child = {
        "name": child_name,
        "value": value,
        "children": []
    }
    parent["children"].append(new_child)
    return new_child

# Transform rules into hierarchical data
for rule in rules:
    antecedent = list(rule['antecedents'])[0]
    consequent = list(rule['consequents'])[0]
    lift = rule['lift']

    # Find or create antecedent node
    antecedent_node = next((child for child in data["children"] if child["name"] == antecedent), None)
    if not antecedent_node:
        antecedent_node = {"name": antecedent, "children": []}
        data["children"].append(antecedent_node)

    # Add consequent as a child of antecedent
    # Ensure the antecedent_node is up-to-date
    antecedent_node = add_child(data, antecedent, None)
    add_child(antecedent_node, consequent, lift)




In [17]:
rules_memory.head(-4)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(fanta_orange_24_rgb),(coke),0.528569,0.087870,0.057408,0.108610,1.236042,0.010963,1.023268,0.405077
1,(coke),(fanta_orange_24_rgb),0.087870,0.528569,0.057408,0.653333,1.236042,0.010963,1.359898,0.209363
2,(coke_24_rgb),(coke_12_np),0.476748,0.153659,0.060112,0.126087,0.820564,-0.013145,0.968450,-0.294739
3,(coke_12_np),(coke_24_rgb),0.153659,0.476748,0.060112,0.391202,0.820564,-0.013145,0.859484,-0.205325
4,(fanta_fruit_blast_12_np),(coke_12_np),0.077686,0.153659,0.055696,0.716937,4.665769,0.043759,2.989942,0.851850
...,...,...,...,...,...,...,...,...,...,...
1679,"(tangawizi_24_rgb, fanta_pineapple_24_rgb)","(fanta_orange_24_rgb, sprite_24_rgb, coke_24_r...",0.122657,0.132751,0.063627,0.518736,3.907602,0.047344,1.802025,0.848116
1680,"(fanta_pineapple_24_rgb, sprite_24_rgb)","(tangawizi_24_rgb, fanta_orange_24_rgb, coke_2...",0.177541,0.088951,0.063627,0.358376,4.028912,0.047834,1.419910,0.914081
1681,"(tangawizi_24_rgb, sprite_24_rgb)","(fanta_orange_24_rgb, fanta_pineapple_24_rgb, ...",0.087509,0.170242,0.063627,0.727085,4.270905,0.048729,3.040360,0.839304
1682,(fanta_orange_24_rgb),"(coke_24_rgb, fanta_passion_24_rgb, fanta_pine...",0.528569,0.064708,0.063627,0.120375,1.860282,0.029424,1.063285,0.980943


In [18]:
# save json to file
with open('products.json', 'w') as f:
    f.write(json.dumps(data, indent=4))

In [21]:

# Prepare data for D3.js
nodes = set()
links = []

for row in rules:
    antecedents = row['antecedents']
    consequents = row['consequents']
    for antecedent in antecedents:
        nodes.add(antecedent)
        for consequent in consequents:
            nodes.add(consequent)
            links.append({'source': antecedent, 'target': consequent, 'value': row['lift']})

nodes = [{'id': node} for node in nodes]

In [24]:
import json

data = {'nodes': nodes, 'links': links}
with open('data-d3-force-1.json', 'w') as f:
    json.dump(data, f)


In [26]:

# Convert rules to hierarchical format
def convert_rules_to_hierarchy(rules):
    hierarchy = []
    id_map = {}

    def add_node(node_id, parents):
        if node_id in id_map:
            return id_map[node_id]

        node = {'id': node_id}
        if parents:
            node['parents'] = parents
        hierarchy.append(node)
        id_map[node_id] = node
        return node

    for rule in rules:
        antecedents = list(rule['antecedents'])
        consequents = list(rule['consequents'])

        for consequent in consequents:
            add_node(consequent, antecedents)

    return hierarchy

hierarchy = convert_rules_to_hierarchy(rules)

# Group hierarchy by levels (generation)
levels = []
for node in hierarchy:
    if 'parents' not in node:
        levels.append([node])
    else:
        level = max(len(node['parents']), 1)
        while len(levels) < level:
            levels.append([])
        levels[level-1].append(node)

# Save to JSON file
with open('rules_hierarchy.json', 'w') as f:
    json.dump(levels, f, indent=2)

print(json.dumps(levels, indent=2))

[
  [
    {
      "id": "coke",
      "parents": [
        "fanta_orange_24_rgb"
      ]
    },
    {
      "id": "fanta_orange_24_rgb",
      "parents": [
        "coke"
      ]
    },
    {
      "id": "coke_12_np",
      "parents": [
        "coke_24_rgb"
      ]
    },
    {
      "id": "coke_24_rgb",
      "parents": [
        "coke_12_np"
      ]
    },
    {
      "id": "fanta_fruit_blast_12_np",
      "parents": [
        "coke_12_np"
      ]
    },
    {
      "id": "fanta_orange_12_np",
      "parents": [
        "coke_12_np"
      ]
    },
    {
      "id": "fanta_passion_12_np",
      "parents": [
        "coke_12_np"
      ]
    },
    {
      "id": "fanta_pineapple_12_np",
      "parents": [
        "coke_12_np"
      ]
    },
    {
      "id": "sprite_12np",
      "parents": [
        "coke_12_np"
      ]
    },
    {
      "id": "coke_12np",
      "parents": [
        "coke_24_rgb"
      ]
    },
    {
      "id": "fanta_orange_12np",
      "parents": [
        "coke_12

In [27]:
import json

def transform_rules_to_hierarchy(rules):
    # Create a list of unique nodes and their parent relationships
    nodes = {}
    for rule in rules:
        for antecedent in rule['antecedents']:
            if antecedent not in nodes:
                nodes[antecedent] = {'id': antecedent, 'parents': []}
        for consequent in rule['consequents']:
            if consequent not in nodes:
                nodes[consequent] = {'id': consequent, 'parents': []}
            for antecedent in rule['antecedents']:
                nodes[consequent]['parents'].append(antecedent)
    
    # Organize nodes into levels based on parent depth
    levels = []
    nodes_list = list(nodes.values())
    level_map = {}
    
    def assign_levels(node_id, level):
        if node_id in level_map:
            return
        level_map[node_id] = level
        for parent in nodes[node_id]['parents']:
            assign_levels(parent, level + 1)
    
    for node_id in nodes:
        assign_levels(node_id, 0)
    
    max_level = max(level_map.values())
    levels = [[] for _ in range(max_level + 1)]
    for node_id, level in level_map.items():
        levels[level].append(nodes[node_id])
    
    return levels


# Convert to hierarchical format
hierarchical_data = transform_rules_to_hierarchy(rules)

# Save to JSON file
with open('tangled_hierarchical_data.json', 'w') as f:
    json.dump(hierarchical_data, f, indent=2)

print(json.dumps(hierarchical_data, indent=2))


[
  [
    {
      "id": "fanta_orange_24_rgb",
      "parents": [
        "coke",
        "coke_12_np",
        "coke_12np",
        "coke_24_rgb",
        "fanta_orange_12_np",
        "fanta_orange_12np",
        "fanta_passion_24_rgb",
        "fanta_pineapple_24_rgb",
        "minute_maid_embe_12_np",
        "sparletta_pinenut_24_rgb",
        "sprite_12np",
        "sprite_24_rgb",
        "tangawizi_24_rgb",
        "fanta_orange_12_np",
        "coke_12_np",
        "fanta_orange_12_np",
        "coke_12_np",
        "coke_24_rgb",
        "coke_12np",
        "coke_24_rgb",
        "coke_12np",
        "coke_24_rgb",
        "fanta_passion_24_rgb",
        "coke_24_rgb",
        "fanta_passion_24_rgb",
        "fanta_pineapple_24_rgb",
        "coke_24_rgb",
        "fanta_pineapple_24_rgb",
        "coke_24_rgb",
        "sparletta_pinenut_24_rgb",
        "coke_24_rgb",
        "sparletta_pinenut_24_rgb",
        "coke_24_rgb",
        "sprite_12np",
        "coke_24_rgb",
 