# Sunburst Charts
<img src="img/sunburst-logo.png" style="float: right; padding-left: 1em;"></img>

Sunburst charts are stacked donut charts for multi-dimensional part-to-whole data, with the inner ring showing the first dimension, and getting more detailed towards the outer ring.

The size of each section in a ring represents the percentage a classifier occupies in the related dimension.

The data used here is from the SO answer linked in the next section.

In [0]:
%matplotlib notebook

import time
from pprint import pprint
from IPython.display import HTML, clear_output
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

data = (
    ('/', 100, (
        ('home', 70, (
            ('Images', 40, ()),
            ('Videos', 20, ()),
            ('Documents', 5, ()),
        )),
        ('usr', 15, (
            ('src', 6, (
                ('linux-headers', 4, ()),
                ('virtualbox', 1, ()),

            )),
            ('lib', 4, ()),
            ('share', 2, ()),
            ('bin', 1, ()),
            ('local', 1, ()),
            ('include', 1, ()),
        )),
    )),
)

This code creates a dataframe with the percentages and a multi-index of the paths.

In [0]:
def to_frame(data, path=()):
    """Generate dataframe rows from nested data."""
    for x in data:
        yield path + (x[0],), x[1]
        yield from to_frame(x[2], path + (x[0],))

_rows = np.asarray(tuple(to_frame(data)))
_index = pd.MultiIndex.from_frame(
    pd.DataFrame(_rows[:, 0].tolist()).fillna(''))
df = pd.DataFrame(data=_rows[:, 1], index=_index, columns=['percent'])
print(df)

## Using matplotlib

The `sunburst` function is adapted from [this SO answer](https://stackoverflow.com/a/46790802/2748717) and only uses `matplotlib` without further dependencies.

In [0]:
def sunburst(nodes, total=np.pi * 2, offset=0, level=0, ax=None,
             text_style=dict(color='white', fontsize=14, ha='center', va='center')):
    ax = ax or plt.subplot(111, projection='polar')
    text_style = text_style.copy()
    text_style.update(fontsize=max(8, text_style['fontsize'] - level))

    if level == 0 and len(nodes) == 1:
        label, value, subnodes = nodes[0]
        ax.bar([0], [0.5], [np.pi * 2])
        ax.text(0, 0, label, **text_style)
        sunburst(subnodes, total=value, 
                 level=level + 1, ax=ax, text_style=text_style)
    elif nodes:
        d = np.pi * 2 / total
        labels = []
        widths = []
        local_offset = offset
        for label, value, subnodes in nodes:
            labels.append(label)
            widths.append(value * d)
            sunburst(subnodes, total=total, offset=local_offset,
                     level=level + 1, ax=ax, text_style=text_style)
            local_offset += value
        values = np.cumsum([offset * d] + widths[:-1])
        heights = [1] * len(nodes)
        bottoms = np.zeros(len(nodes)) + level - 0.5
        rects = ax.bar(values, heights, widths, bottoms, linewidth=1,
                       edgecolor='white', align='edge')
        for rect, label in zip(rects, labels):
            x = rect.get_x() + rect.get_width() / 2
            y = rect.get_y() + rect.get_height() / 2
            rotation = (90 + (360 - np.degrees(x) % 180)) % 360
            ax.text(x, y, label, rotation=rotation, **text_style) 

    if level == 0:
        ax.set_theta_direction(-1)
        ax.set_theta_zero_location('N')
        ax.set_axis_off()

Using that function, we plot the disk usage data defined previously.

In [0]:
plt.rcParams["figure.figsize"] = (11,) * 2
sunburst(data)

chart_img = 'img/sunburst-plt.png'
plt.savefig(chart_img, dpi=90, transparent=True,
            bbox_inches='tight', pad_inches=-0.5)
clear_output()
HTML('<img src=\"{}?{}\"></img>'.format(chart_img, time.time()))

See also the similar [donut plot with subgroups](https://python-graph-gallery.com/163-donut-plot-with-subgroups/).