In [None]:
from notebook_prelude import *

In [None]:
concept_maps = {x: dataset_helper.get_concept_map_for_dataset(x) for x in dataset_helper.get_dataset_names_with_concept_map()}

In [None]:
degrees = {}
for dataset, (X, Y) in concept_maps.items():
    X = graph_helper.get_graphs_only(X)
    all_degrees = [x.degree().values() for x in X]
    all_degrees_flat = list(chain.from_iterable(all_degrees))
    degrees[dataset] = all_degrees_flat

In [None]:
data = collections.defaultdict(list)
for dataset, degrees_ in degrees.items():
    num = len(degrees_)
    data['dataset'] += [dataset] * num
    data['degree'] += degrees_
df = pd.DataFrame(data)

df[df.degree < df.degree.quantile(0.999)].groupby('dataset').degree.plot(kind='hist', normed=True, alpha=0.8)

In [None]:
for dataset, degrees_ in degrees.items():
    s = pd.Series(degrees_)
    ratio_one_to_other = len(s[s > 1]) / len(s)
    # Remove outliers
    ax = s[s < s.quantile(0.999)].plot(kind='hist', bins=60, title='Dataset: {}, $degree_1 / degree_2$ = {:.4f}'.format(dataset, ratio_one_to_other))
    ax.grid(False)
    plt.show()

In [None]:
import matplotlib.patches as patches

for dataset, df_ in df.groupby('dataset'):
    df_ = df_.sort_values('degree')
    cum_sum = df_.degree.cumsum().reset_index().degree.values
    max_cum_sum = cum_sum.max()
    x = np.linspace(0, 1, len(cum_sum))
    y = cum_sum / max_cum_sum * 100
    fig, ax = plt.subplots()
    ax.plot(x, y)
    
    ratio = len(df_[df_.degree <= 1]) / len(df_)
    ratio_2 = len(df_[df_.degree == 2]) / len(df_)
    ax.axvline(ratio, c='red', linestyle='dashed')
    ax.axvline(ratio + ratio_2, c='red', linestyle='dashed')
    ax.set_xticks([])
    
    ax.add_patch(
        patches.Rectangle(
            (0, 0),   # (x,y)
            ratio,          # width
            100,          # height
            alpha=0.2
        )
    )
    
    ax.add_patch(
        patches.Rectangle(
            (ratio, 0),   # (x,y)
            ratio_2,          # width
            100,          # height
            alpha=0.2,
            facecolor='red'
        )
    )
    fig.tight_layout()


In [None]:
ratio_data = []
for dataset, df_ in df.groupby('dataset'):
    d = df_.degree
    num = len(d)
    
    data = {}
    for name, els in [('$|d_v|$=1', d[d <= 1]), ('$|d_v|$=2', d[d == 2]), ('$|d_v|$=3', d[d == 3]), ('$|d_v|$>3', d[d > 3])]:
        ratio = len(els) / num * 100
        data[name] = (ratio)
        data['dataset'] = dataset
    ratio_data.append(data)

d__ = pd.DataFrame(ratio_data).set_index('dataset').sort_index(ascending=False)
fig, ax = plt.subplots(figsize = (EXPORT_FIG_WIDTH_BIG, 3))
d__.plot(kind='barh', stacked=True, ax = ax)
ax.grid(False)
x_ticks = np.linspace(0, 100, 11)
ax.set_xticks(x_ticks)
ax.set_xticklabels(map(int, x_ticks))
ax.set_xlim((0, 114))
ax.set_xlabel('%')
ax.set_ylabel('')
fig.tight_layout()

save_fig(fig, 'percentage_degree')