In [1]:
import pandas as pd
import json
pd.set_option('display.max_colwidth', None)
pd.set_option('display.min_rows', 60)

In [2]:
FILENAME = '../../datasets/products_0.995_cleaned.csv'
df = pd.read_csv(FILENAME)

  df = pd.read_csv(FILENAME)


### Bubble chart (circle packing) of additives

Based on https://observablehq.com/@d3/bubble-chart

<img src="bubble-additives.png" width="500">

In [6]:
threshold = 100

bubble_additives = df['additives'].str.split(pat=',').explode(ignore_index=True).value_counts().to_frame().reset_index()
bubble_additives.drop(bubble_additives[bubble_additives.additives < threshold].index, inplace = True)
bubble_additives.to_json('bubble_additives.json', orient='records', indent=4)

### Treemap of additives by category (meats, snacks, beverages, ...)

Based on https://observablehq.com/@d3/json-treemap

<img src="treemap-additives.png" width="500">

In [4]:
treemap_additives = {"name": "additives", "children": []}
for category in df['main_category'].value_counts().nlargest(10).index.values:
    d = df[df['main_category'] == category]['additives'].str.split(',').explode().value_counts().nlargest(5).to_dict()
    treemap_additives['children'].append({
        "name": category,
        "children": [{
            'name': k, 'value': v
        } for k,v in d.items()]
    })
with open('treemap_additives.json', 'w') as json_file: 
    json.dump(treemap_additives, json_file, indent=4)

### Force-Directed Graph between additives

Based on https://observablehq.com/@d3/force-directed-graph

<img src="force-directed-additives.png" width="500">

In [5]:
from itertools import combinations

force_directed_graph_ = {
    'nodes': [],
    'links': []
}

threshold = 100

additives_links = df['additives'] \
    .str.split(',') \
    .apply(lambda a: list(combinations(a, 2)) if type(a) != float else []) \
    .explode() \
    .value_counts() \
    .to_frame() \
    .reset_index()

additives_links.columns = ['additives', 'value']
additives_links[['source', 'target']] = additives_links['additives'].apply(pd.Series)
additives_links.drop(['additives'], axis=1, inplace=True)
additives_links.drop(additives_links[additives_links.value < threshold].index, inplace = True)
force_directed_graph_['links'] = additives_links.to_dict('records')

s = set()
for link in force_directed_graph_['links']:
    s.add(link['source'])
    s.add(link['target'])
force_directed_graph_['nodes'] = [{ 'id': n, 'group': i } for i, n in enumerate(s)]

with open('force_directed_graph_additives.json', 'w') as json_file:
    json.dump(force_directed_graph_, json_file, indent=4)