# Analysis of Workflow Systems

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

### Load Data

In [None]:
df = pd.read_csv('workflow-systems.csv')

In [None]:
df.domain = df.domain.fillna('Uncategorized')

### Breakdown by Domain

In [None]:
# compute counts
counts = {}
for key in df.domain:
    if key == 'Uncategorized':
        continue
    if key not in counts:
        counts[key] = 0
    counts[key] += 1

counts = dict(sorted(counts.items(), key=lambda item: item[1]))
labels = ['%s (%d)' % (k, v) for k, v in counts.items()]

# plot pie chart
_ = plt.pie(counts.values(), labels=labels)

### GitHub Stars by Domain

Note that [GitHub stars might be fake](https://dagster.io/blog/fake-stars), and different domains have different levels of participation on GitHub. The "best" workflow system is the one that best meets your specific requirements.

In [None]:
# select axes
xaxis = 'name'
yaxis = 'github_stars'
row   = 'domain'

# filter and sort data
data = df.copy()
data = data[~data[yaxis].isna()]
data.sort_values(by=yaxis, inplace=True, kind='mergesort')

# plot bar plots
g = sns.FacetGrid(
    data,
    row=row,
    sharex=False,
    sharey=False,
    height=6.0,
    aspect=1.5
)
g = g.map(
    plt.barh,
    xaxis,
    yaxis,
    data=data
)