In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf
import numpy as np
import datetime
import pickle
import time
import os
import pathlib
import matplotlib.pyplot as plt
import sys
 
# Some stuff to make utils-function work
sys.path.append('../utils')
from pipeline import *
from create_model import *
from utils import *
from unlabeled_utils import *
from evaluate_model import *
%load_ext autoreload
%autoreload 2

# Jupyter-specific
%matplotlib inline

In [None]:
data_dir = pathlib.Path('/home/henriklg/master-thesis/data/hyper-kvasir/labeled_ttv/')
unlab_dir = pathlib.Path('/home/henriklg/master-thesis/data/hyper-kvasir/unlabeled_ttv/')

dir_name = "distribution"
log_dir = "./logs//{}".format(dir_name)

conf = {
    # Dataset
    "data_dir": data_dir,
    "unlab_dir": unlab_dir,
    "ds_info": 'hypkva',
    "augment": ["crop","flip","brightness","saturation","contrast","rotate"],
    "aug_mult": 0.1,
    "resample": True,
    "class_weight": False,
    "shuffle_buffer_size": 2000,       # no shuffling: 0
    "seed": 2511,
    "neg_class": None,                 # select neg class for binary ds (normal class)
    "outcast": None,                   # list of folders to drop - currently only supports 1 item
    # Model
    "model": 'EfficientNetB0',
    "weights": "imagenet",             # which weights to initialize the model with
    "dropout": 0.1,
    "num_epochs": 3,
    "batch_size": 16,
    "img_shape": (128, 128, 3),
    "learning_rate": 0.001,
    "optimizer": 'Adam',
    "final_activation": 'softmax',     # sigmoid for binary ds
    # Callbacks
    "tensorboard": False,
    "decay_rate": 0,                   # 128:0.25   64:1.0   32:4.0   16:16   8:64
    "checkpoint": False,
    "early_stopp_patience": 0,         # disable: 0
    # Misc
    "verbosity": 1,
    "keep_thresh": 0.8,
    "pseudo_thresh": 0,
    "class_limit": 0,
    "dir_name": dir_name,
    "log_dir": log_dir,
    "cache_dir": "./cache",
    }

In [None]:
ds = create_dataset(conf)

In [None]:
datasets_bin = [tf_bincount(ds["clean_train"], conf["num_classes"])]
sanity = []
ds["combined_train"] = ds["clean_train"]

In [None]:
pickle_path = "/home/henriklg/master-thesis/code/hyper-kvasir/experiments/model-size/all-b4/0_teacher"
teacher_pseudo = pickle.load (open (pickle_path+"/unlab_findings.pkl", "rb"))

teach_pseudo_sorted = custom_sort(teacher_pseudo)

In [None]:
def resample_and_combine(ds, conf, pseudo_sorted, datasets_bin, limit=0):
    """
    """
    new_findings, added_samples = resample_unlab(pseudo_sorted, datasets_bin[-1], conf, limit=limit)
    # create tf.tensor of the new findings
    findings_tensor = tf.data.Dataset.from_tensor_slices(new_findings)

    # combine with original training_ds (using clean_ds which is not augmented/repeated etc)
    if len(added_samples) != 0: # if no samples are added just re-use previous combined_train
        ds["combined_train"] = ds["combined_train"].concatenate(findings_tensor)

    # count samples in the new/combined dataset
    datasets_bin.append(tf_bincount(ds["combined_train"], conf["num_classes"]))
    with open(conf["log_dir"]+"/datasets_bin.pkl", 'wb') as f:
        pickle.dump(datasets_bin, f)

    # History of class distribution
    print_bar_chart(
        data=datasets_bin,
        conf=conf,
        title=None,
        fname="bar_chart-distribution"
    )
    return datasets_bin, added_samples

In [None]:
datasets_bin, added_samples = resample_and_combine(ds, conf, teach_pseudo_sorted, datasets_bin)

In [None]:
pickle_path = "/home/henriklg/master-thesis/code/hyper-kvasir/experiments/model-size/all-b4/0_student"
stud_pseudo = pickle.load (open (pickle_path+"/unlab_findings.pkl", "rb"))

stud_pseudo_sorted = custom_sort(stud_pseudo)

In [None]:
datasets_bin, added_samples = resample_and_combine(ds, conf, stud_pseudo_sorted, datasets_bin)