In [None]:
import sys
import pathlib
import numpy as np
import matplotlib.pyplot as plt

sys.path.append('../utils')
from utils import show_image, class_distribution, tf_bincount
%load_ext autoreload
%autoreload 2

# Jupyter-specific
%matplotlib inline

In [None]:
import seaborn as sns
sns.set()

SMALL_SIZE = 12
MEDIUM_SIZE = 14
BIGGER_SIZE = 16

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=BIGGER_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=MEDIUM_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

In [None]:
data_dir = pathlib.Path('/home/henriklg/master-thesis/data/hyper-kvasir/labeled_ttv/')


conf = {
    "num_classes": 23,
    "log_dir": ".",
    "img_shape": [128,128,3],
    "class_names": ['barretts-short-segment', 'retroflex-stomach', 'ulcerative-colitis-0-1','ulcerative-colitis-grade-3', 'esophagitis-b-d', 'dyed-resection-margins','hemorrhoids', 'normal-z-line', 'esophagitis-a', 'ulcerative-colitis-1-2','barretts', 'bbps-2-3', 'ileum', 'bbps-0-1', 'impacted-stool', 'cecum','ulcerative-colitis-grade-2', 'ulcerative-colitis-2-3', 'pylorus','retroflex-rectum', 'ulcerative-colitis-grade-1', 'polyps','dyed-lifted-polyps']
    }

In [None]:
split_0 = [1415, 1202, 1333, 1522, 1387, 1, 326, 48, 12324, 24212, 7962, 2703, 3850, 17984, 1590, 11104, 206, 1041, 0, 51, 1440, 5192, 2524]

split_1 = [326, 1829, 1478, 1526, 1884, 2, 2, 63, 12560, 22369, 9605, 3153, 2971, 16899, 1750, 10457, 160, 1034, 9, 86, 2961, 6183, 2200]

avg_split = []
for sp0, sp1 in zip(split_0, split_1):
    avg_split.append(int(round( (sp0+sp1)/2.0 )))

In [None]:
# convert from ds-paper to my order
# example 0 = barrets, barrets should be class 10
order_conversion = [16, 15, 17, 22, 12, 4, 5, 10, 11, 18, 0, 2, 6, 1, 7, 8, 21, 19, 9, 14, 20, 13, 3]

In [None]:
ds_dist = []

for order_idx in order_conversion:
    ds_dist.append(avg_split[order_idx])

In [None]:
def print_bar_chart(data, conf, title=None, fname=None, figsize=(15,6), label=None):
    """
    Takes in list of data and makes a bar chart of it.
    Dynamically allocates placement for bars.
    """
    x = np.arange(conf["num_classes"])
    width = 0.7      # 1.0 = bars side by side
    width = width/len(data)

    num_bars = len(data)
    if num_bars == 1:
        bar_placement = [0]
    # even number of bars
    elif (num_bars % 2) == 0:
        bar_placement = np.arange(-num_bars/2, num_bars/2+1)    #[-2, -1, 0, 1, 2]
        bar_placement = np.delete(bar_placement, num_bars//2)   #delete 0
        bar_placement = [bar+0.5 if bar<0 else bar-0.5 for bar in bar_placement]
    # odd number of bars
    else:
        bar_placement = np.arange(-np.floor(num_bars/2), np.floor(num_bars/2)+1)

    fig, ax = plt.subplots(figsize=figsize)

    rects = []
    for cnt, (dat, placement) in enumerate(zip(data, bar_placement)):
        if not label: 
            label='Iter {}'.format(cnt)
        rects.append(ax.bar(x+placement*width, dat, width, label=label[cnt]))

    ax.set_ylabel('Number of samples')
    if title:
        title_string = title
        ax.set_title(title_string)
    ax.set_xticks(x)
    #ax.set_ylim(0,27000)
    ax.set_xticklabels(conf["class_names"])
    ax.set_axisbelow(True)
#     ax.legend(loc='upper left');

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=25, ha="right",
                 rotation_mode="anchor")
    plt.grid(axis='x')

    def autolabel(rects):
        """Attach a text label above each bar in *rects*, displaying its height."""
        for rect in rects:
            height = int(rect.get_height())
            ax.annotate('{}'.format(height),
                            xy=(rect.get_x() + rect.get_width() / 2, height),
                            xytext=(0, 3),  # 3 points vertical offset
                            textcoords="offset points",
                            ha='center', va='bottom')#, rotation='vertical')

    autolabel(rects[-1])

    fig.tight_layout()
    if fname:
        plt.savefig('{}/{}.pdf'.format(conf["log_dir"], fname), format='pdf')
    plt.show()

In [None]:
print_bar_chart(
    [ds_dist],
    conf,
    fname="paper_distribution",
    label=["Paper distribution"]
)

## Get a distribution from own system

In [None]:
import pickle

pickle_path = "/home/henriklg/master-thesis/code/hyper-kvasir/experiments/unlab_dist"
new_unlab = pickle.load (open (pickle_path+"/unlab_findings.pkl", "rb"))

In [None]:
def print_bar_chart(data, conf, title=None, fname=None, figsize=(15,6), label=None):
    """
    Takes in list of data and makes a bar chart of it.
    Dynamically allocates placement for bars.
    """
    x = np.arange(conf["num_classes"])
    width = 0.7      # 1.0 = bars side by side
    width = width/len(data)

    num_bars = len(data)
    if num_bars == 1:
        bar_placement = [0]
    # even number of bars
    elif (num_bars % 2) == 0:
        bar_placement = np.arange(-num_bars/2, num_bars/2+1)    #[-2, -1, 0, 1, 2]
        bar_placement = np.delete(bar_placement, num_bars//2)   #delete 0
        bar_placement = [bar+0.5 if bar<0 else bar-0.5 for bar in bar_placement]
    # odd number of bars
    else:
        bar_placement = np.arange(-np.floor(num_bars/2), np.floor(num_bars/2)+1)


    fig, ax = plt.subplots(figsize=figsize)

    rects = []
    for cnt, (dat, placement) in enumerate(zip(data, bar_placement)):
        if not label: 
            label='Iter {}'.format(cnt)
        rects.append(ax.bar(x+placement*width, dat, width, label=label[cnt]))

    ax.set_ylabel('Number of samples')
    if title:
        title_string = title
        ax.set_title(title_string)
    ax.set_xticks(x)
    #ax.set_ylim(0,27000)
    ax.set_xticklabels(conf["class_names"])
    ax.set_axisbelow(True)
    ax.legend(loc='upper left');

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=25, ha="right",
                 rotation_mode="anchor")
    plt.grid(axis='x')

    def autolabel(rects):
        """Attach a text label above each bar in *rects*, displaying its height."""
        for rect in rects:
            height = int(rect.get_height())
            ax.annotate('{}'.format(height),
                            xy=(rect.get_x() + rect.get_width() / 2, height),
                            xytext=(0, 3),  # 3 points vertical offset
                            textcoords="offset points",
                            ha='center', va='bottom')#, rotation='vertical')

#     autolabel(rects[0])
    autolabel(rects[-1])

    fig.tight_layout()
    if fname:
        plt.savefig('{}/{}.pdf'.format(conf["log_dir"], fname), format='pdf')
    plt.show()

In [None]:
lab_array = np.asarray(new_unlab["lab_list"], dtype=np.uint8)
own_dist = np.bincount(lab_array, minlength=int(conf["num_classes"]))

In [None]:
print_bar_chart(
    [own_dist],
    conf,
    fname="own_distribution",
    label=["Own distribution"]
)

## Both distributions

In [None]:
def print_bar_chart(data, conf, title=None, fname=None, figsize=(15,6), label=None):
    """
    Takes in list of data and makes a bar chart of it.
    Dynamically allocates placement for bars.
    """
    x = np.arange(conf["num_classes"])
    width = 0.7      # 1.0 = bars side by side
    width = width/len(data)

    num_bars = len(data)
    if num_bars == 1:
        bar_placement = [0]
    # even number of bars
    elif (num_bars % 2) == 0:
        bar_placement = np.arange(-num_bars/2, num_bars/2+1)    #[-2, -1, 0, 1, 2]
        bar_placement = np.delete(bar_placement, num_bars//2)   #delete 0
        bar_placement = [bar+0.5 if bar<0 else bar-0.5 for bar in bar_placement]
    # odd number of bars
    else:
        bar_placement = np.arange(-np.floor(num_bars/2), np.floor(num_bars/2)+1)


    fig, ax = plt.subplots(figsize=figsize)

    rects = []
    for cnt, (dat, placement) in enumerate(zip(data, bar_placement)):
        if not label: 
            label='Iter {}'.format(cnt)
        rects.append(ax.bar(x+placement*width, dat, width, label=label[cnt]))

    ax.set_ylabel('Number of samples')
    if title:
        title_string = title
        ax.set_title(title_string)
    ax.set_xticks(x)
    ax.set_ylim(0,27000)
    ax.set_xticklabels(conf["class_names"])
    ax.set_axisbelow(True)
    ax.legend(loc='upper left');

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=25, ha="right",
                 rotation_mode="anchor")
    plt.grid(axis='x')

    def autolabel(rects):
        """Attach a text label above each bar in *rects*, displaying its height."""
        for rect in rects:
            height = int(rect.get_height())
            ax.annotate('{}'.format(height),
                            xy=(rect.get_x() + rect.get_width() / 2, height),
                            xytext=(0, 3),  # 3 points vertical offset
                            textcoords="offset points",
                            ha='center', va='bottom', rotation='vertical')

    autolabel(rects[0])
    autolabel(rects[-1])

    fig.tight_layout()
    if fname:
        plt.savefig('{}/{}.pdf'.format(conf["log_dir"], fname), format='pdf')
    plt.show()

In [None]:
print_bar_chart(
    [ds_dist, own_dist],
    conf,
    fname="both_distributions",
    label=["Paper distribution", "Own distribution"]
)

## Normalize own distribution with number of labeled samples

In [None]:
def normalize(own_dist):
    dist_max = np.max(own_dist)
    dist_min = np.min(own_dist)
    new = [float(x-dist_min)/(dist_max-dist_min) for x in own_dist]
    return new

In [None]:
labeled = [53, 764, 35, 133, 260, 989, 6, 932, 403, 11, 41, 1148, 9, 646, 131, 1009, 443, 28, 999, 391, 201, 1028, 1002]

lab_norm = normalize(labeled)
    
print (lab_norm)

In [None]:
own_norm = normalize(own_dist)

print (own_norm)

In [None]:
def print_bar_chart(data, conf, title=None, fname=None, figsize=(15,6), label=None):
    """
    Takes in list of data and makes a bar chart of it.
    Dynamically allocates placement for bars.
    """
    x = np.arange(conf["num_classes"])
    width = 0.7      # 1.0 = bars side by side
    width = width/len(data)

    num_bars = len(data)
    if num_bars == 1:
        bar_placement = [0]
    # even number of bars
    elif (num_bars % 2) == 0:
        bar_placement = np.arange(-num_bars/2, num_bars/2+1)    #[-2, -1, 0, 1, 2]
        bar_placement = np.delete(bar_placement, num_bars//2)   #delete 0
        bar_placement = [bar+0.5 if bar<0 else bar-0.5 for bar in bar_placement]
    # odd number of bars
    else:
        bar_placement = np.arange(-np.floor(num_bars/2), np.floor(num_bars/2)+1)


    fig, ax = plt.subplots(figsize=figsize)

    rects = []
    for cnt, (dat, placement) in enumerate(zip(data, bar_placement)):
        if not label: 
            label='Iter {}'.format(cnt)
        rects.append(ax.bar(x+placement*width, dat, width, label=label[cnt]))

    ax.set_ylabel('Number of samples (normalized)')
    if title:
        title_string = title
        ax.set_title(title_string)
    ax.set_xticks(x)
    #ax.set_ylim(0,27000)
    ax.set_xticklabels(conf["class_names"])
    ax.set_axisbelow(True)
    ax.legend(loc='upper left');

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=25, ha="right",
                 rotation_mode="anchor")
    plt.grid(axis='x')

    def autolabel(rects):
        """Attach a text label above each bar in *rects*, displaying its height."""
        for rect in rects:
            height = int(rect.get_height())
            ax.annotate('{}'.format(height),
                            xy=(rect.get_x() + rect.get_width() / 2, height),
                            xytext=(0, 3),  # 3 points vertical offset
                            textcoords="offset points",
                            ha='center', va='bottom')#, rotation='vertical')

#     autolabel(rects[0])
#     autolabel(rects[-1])

    fig.tight_layout()
    if fname:
        plt.savefig('{}/{}.pdf'.format(conf["log_dir"], fname), format='pdf')
    plt.show()

In [None]:
print_bar_chart(
    [lab_norm, own_norm],
    conf,
    fname="normalized",
    label=["Labeled samples", "Infered pseudo labels"]
)