In [1]:
from exp.utils import *
from tqdm.notebook import tqdm
from multiprocessing import Pool

import torch
import torch.nn as NN
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor

In [2]:
model_name = "py_test_01"

In [3]:
bs = 8

In [4]:
labels = get_labels()

In [5]:
train_df, valid_df, test_df = get_dataframes(reduced=True)



In [6]:
train_label = train_df[labels].values
pos_weights = compute_positive_class_weigths(train_label)

In [7]:
train_tfs = transforms.Compose([
    transforms.Normalize([SUBSET_MEAN]*3, [SUBSET_STD]*3),
    transforms.RandomHorizontalFlip(p=0.5)
])
test_tfs = transforms.Compose([
    transforms.Normalize([SUBSET_MEAN]*3, [SUBSET_STD]*3)
    
])

In [8]:
from torchvision.io import read_image

class CRX8_Data(Dataset):
    
    def __init__(self, df, image_path, labels, transforms=None):
        self.df = df
        self.image_path = image_path
        self.len = df.shape[0]
        self.labels = labels
        self.transforms = transforms
        self.df["Index_2"] = list(range(self.df.shape[0]))
        
    def __len__(self): return self.len

    def __getitem__(self, idx):
        img_path = self._get_image_path(idx)        
        image = self._make3D(read_image(img_path))
        label = self.df.iloc[idx,:].loc[labels].values
        if self.transforms: image = self.transforms(image)

        return image.float(), torch.Tensor(label.astype(np.float)).float()
    
    def _make3D(self, t):
        reshaped = np.repeat(t[0,:,:].float(), 3).view(3, t.shape[1], t.shape[2])
        reshaped /= 255.
        return reshaped
    
    def _get_image_path(self, idx):
        return str(self.image_path/self.df.iloc[idx].loc["Image Index"]) 

In [9]:
train_ds = CRX8_Data(train_df, get_image_path(), labels, transforms=train_tfs)
valid_ds = CRX8_Data(valid_df, get_image_path(), labels, transforms=test_tfs)
test_ds  = CRX8_Data(test_df , get_image_path(), labels, transforms=test_tfs)

In [10]:
failed_idx = []

for i in tqdm(range(len(train_ds))):
    try:
        train_ds.__getitem__(i)
    except:
        failed_idx.append(i)
        print("Failed at obj", i)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=69219.0), HTML(value='')))




In [None]:
#failed_idx = [608, 5502, 8613]

In [None]:
failed_ims = [train_ds._get_image_path(i) for i in failed_idx]
failed_shapes = set([read_image(im).shape for im in failed_ims])

In [None]:
failed_ims

In [None]:
failed_shapes

In [None]:
train_dl = DataLoader(train_ds, batch_size=bs, shuffle=True)
valid_dl = DataLoader(valid_ds, batch_size=bs, shuffle=False)
test_dl  = DataLoader(test_ds,  batch_size=bs, shuffle=False)

In [None]:
#train_ds.__getitem__(0)

In [None]:
criterion = NN.BCEWithLogitsLoss(pos_weight=pos_weights.cuda(), reduction="sum")

In [None]:
import torchvision.models as models
net = models.resnet18(pretrained=True)

In [None]:
net

In [None]:
#net.classifier = NN.Linear(1024, len(get_labels()))
net.fc = NN.Linear(512, len(get_labels()))
net

In [None]:
# From: https://sgugger.github.io/how-do-you-find-a-good-learning-rate.html
def find_lr(dl, optimizer, init_value = 1e-8, final_value=10., beta = 0.98, device=torch.device('cuda:0')):
    num = len(dl)-1
    mult = (final_value / init_value) ** (1/num)
    lr = init_value
    optimizer.param_groups[0]['lr'] = lr
    avg_loss = 0.
    best_loss = 0.
    batch_num = 0
    losses = []
    log_lrs = []
    
    net.to(device)
    
    for data in dl:
        batch_num += 1
        #As before, get the loss for this mini-batch of inputs/outputs
        inputs, labels = data
        inputs = inputs.cuda()
        labels = labels.cuda()
        #inputs, labels = Variable(inputs), Variable(labels)
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        #Compute the smoothed loss
        avg_loss = beta * avg_loss + (1-beta) *loss.data[0]
        smoothed_loss = avg_loss / (1 - beta**batch_num)
        #Stop if the loss is exploding
        if batch_num > 1 and smoothed_loss > 4 * best_loss:
            return log_lrs, losses
        #Record the best loss
        if smoothed_loss < best_loss or batch_num==1:
            best_loss = smoothed_loss
        #Store the values
        losses.append(smoothed_loss)
        log_lrs.append(math.log10(lr))
        #Do the SGD step
        loss.backward()
        optimizer.step()
        #Update the lr for the next step
        lr *= mult
        optimizer.param_groups[0]['lr'] = lr
    return log_lrs, losses

In [None]:
# From: https://github.com/dkumazaw/onecyclelr/blob/master/onecyclelr.py
from torch.optim import Optimizer


class OneCycleLR:
    """ Sets the learing rate of each parameter group by the one cycle learning rate policy
    proposed in https://arxiv.org/pdf/1708.07120.pdf. 
    It is recommended that you set the max_lr to be the learning rate that achieves 
    the lowest loss in the learning rate range test, and set min_lr to be 1/10 th of max_lr.
    So, the learning rate changes like min_lr -> max_lr -> min_lr -> final_lr, 
    where final_lr = min_lr * reduce_factor.
    Note: Currently only supports one parameter group.
    Args:
        optimizer:             (Optimizer) against which we apply this scheduler
        num_steps:             (int) of total number of steps/iterations
        lr_range:              (tuple) of min and max values of learning rate
        momentum_range:        (tuple) of min and max values of momentum
        annihilation_frac:     (float), fracion of steps to annihilate the learning rate
        reduce_factor:         (float), denotes the factor by which we annihilate the learning rate at the end
        last_step:             (int), denotes the last step. Set to -1 to start training from the beginning
    Example:
        >>> optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
        >>> scheduler = OneCycleLR(optimizer, num_steps=num_steps, lr_range=(0.1, 1.))
        >>> for epoch in range(epochs):
        >>>     for step in train_dataloader:
        >>>         train(...)
        >>>         scheduler.step()
    Useful resources:
        https://towardsdatascience.com/finding-good-learning-rate-and-the-one-cycle-policy-7159fe1db5d6
        https://medium.com/vitalify-asia/whats-up-with-deep-learning-optimizers-since-adam-5c1d862b9db0
    """

    def __init__(self,
                 optimizer: Optimizer,
                 num_steps: int,
                 lr_range: tuple = (0.1, 1.),
                 momentum_range: tuple = (0.85, 0.95),
                 annihilation_frac: float = 0.1,
                 reduce_factor: float = 0.01,
                 last_step: int = -1):
        # Sanity check
        if not isinstance(optimizer, Optimizer):
            raise TypeError('{} is not an Optimizer'.format(type(optimizer).__name__))
        self.optimizer = optimizer

        self.num_steps = num_steps

        self.min_lr, self.max_lr = lr_range[0], lr_range[1]
        assert self.min_lr < self.max_lr, \
            "Argument lr_range must be (min_lr, max_lr), where min_lr < max_lr"

        self.min_momentum, self.max_momentum = momentum_range[0], momentum_range[1]
        assert self.min_momentum < self.max_momentum, \
            "Argument momentum_range must be (min_momentum, max_momentum), where min_momentum < max_momentum"

        self.num_cycle_steps = int(num_steps * (1. - annihilation_frac))  # Total number of steps in the cycle
        self.final_lr = self.min_lr * reduce_factor

        self.last_step = last_step

        if self.last_step == -1:
            self.step()

    def state_dict(self):
        """Returns the state of the scheduler as a :class:`dict`.
        It contains an entry for every variable in self.__dict__ which
        is not the optimizer. (Borrowed from _LRScheduler class in torch.optim.lr_scheduler.py)
        """
        return {key: value for key, value in self.__dict__.items() if key != 'optimizer'}

    def load_state_dict(self, state_dict):
        """Loads the schedulers state. (Borrowed from _LRScheduler class in torch.optim.lr_scheduler.py)
        Arguments:
            state_dict (dict): scheduler state. Should be an object returned
                from a call to :meth:`state_dict`.
        """
        self.__dict__.update(state_dict)

    def get_lr(self):
        return self.optimizer.param_groups[0]['lr']

    def get_momentum(self):
        return self.optimizer.param_groups[0]['momentum']

    def step(self):
        """Conducts one step of learning rate and momentum update
        """
        current_step = self.last_step + 1
        self.last_step = current_step

        if current_step <= self.num_cycle_steps // 2:
            # Scale up phase
            scale = current_step / (self.num_cycle_steps // 2)
            lr = self.min_lr + (self.max_lr - self.min_lr) * scale
            momentum = self.max_momentum - (self.max_momentum - self.min_momentum) * scale
        elif current_step <= self.num_cycle_steps:
            # Scale down phase
            scale = (current_step - self.num_cycle_steps // 2) / (self.num_cycle_steps - self.num_cycle_steps // 2)
            lr = self.max_lr - (self.max_lr - self.min_lr) * scale
            momentum = self.min_momentum + (self.max_momentum - self.min_momentum) * scale
        elif current_step <= self.num_steps:
            # Annihilation phase: only change lr
            scale = (current_step - self.num_cycle_steps) / (self.num_steps - self.num_cycle_steps)
            lr = self.min_lr - (self.min_lr - self.final_lr) * scale
            momentum = None
        else:
            # Exceeded given num_steps: do nothing
            return

        self.optimizer.param_groups[0]['lr'] = lr
        if momentum:
            self.optimizer.param_groups[0]['momentum'] = momentum

In [None]:
#next(train_dl)

In [None]:
optimizer = torch.optim.SGD(net.parameters(),lr=1e-1)

In [None]:
logs,losses = find_lr()
plt.plot(logs[10:-5],losses[10:-5])

In [None]:
a, b = ds.__getitem__(3)
b

In [None]:
b

In [None]:
type(a), a.shape

In [None]:
array_info(a, with_hist=False)

In [None]:
print_image(a, b)

In [None]:
show_image("/home/favi/.datasets/CRX8/images/00000459_057.png")

In [None]:
bild_t = read_image("/home/favi/.datasets/CRX8/images/00000459_057.png")

In [None]:
bild_t.shape

In [None]:
y= [0]*14

In [None]:
print_image(bild_t,y)

In [None]:
bild_t

In [None]:
d3 = torch.ones((3, bild_t.shape[1], bild_t.shape[2]))
d3.shape

In [None]:
d2_data = bild_t[0,:,:]
d3_im = torch.zeros(3, bild_t.shape[1], bild_t.shape[2])
d3_im[0, :, :] = d2_data
d3_im[1, :, :] = d2_data
d3_im[2, :, :] = d2_data
im = d3_im

In [None]:
d3[0] = bild_t[0]
d3[1] = bild_t[0]
d3[2] = bild_t[0]

In [None]:
for c in range(d3.shape[0]):
    for w in range(d3.shape[1]):
        for h in range(d3.shape[2]):
            d3[c, w, h] = bild_t[0, h, w]

In [None]:
print_image(im, y)

In [None]:
npb = np.array(bild_t); npb.shape

In [None]:
linus = np.zeros((3, npb.shape[1], npb.shape[2]));linus.shape

In [None]:
linus = []
for i in range(3): linus.append(npb[0,:,:])
linus = np.array(linus)

In [None]:
np.stack([bild_t, bild_t, bild_t]).squeeze().shape

In [None]:
np.stack([bild_t, bild_t]).squeeze().shape
#np.stack([np.stack([bild_t, bild_t]).squeeze(), bild_t]).squeeze()

In [None]:
print_image(np.stack([bild_t, bild_t, bild_t]).squeeze(), y)

In [None]:
print_image(np.stack([bild_t, bild_t, bild_t]).squeeze(), y)

In [None]:
np.concatenate([bild_t, bild_t, bild_t]).shape

In [None]:
def _make3D(t):
        #w, h = t.shape[1:]
        #new_t = torch.zeros((3, w, h))
        #for i in range(3): 
        #    new_t[i] = t
        #return torch.Tensor(new_t)
        tt = np.array(t)
        new_t = np.stack([tt]*3, axis=0)
        return new_t
    
def _print_image(x):
    c, w, h = x.shape
    #x = x * SUBSET_STD + SUBSET_MEAN
    #plt.title(translate2label(y))
    #plt.view
    plt.imshow(x.reshape(w, h, c), cmap="bone");
#    plt.imshow(x.reshape(w, h, c), cmap="bone");

In [None]:
bild_t = read_image("/home/favi/.datasets/CRX8/images/00000459_057.png")
#bild_t = bild_t.squeeze()

In [None]:
[[[s,s,s] for s in r] for r in bild_t]

for r in bild_t:
    for s in r:
        [s]*3

In [None]:
gr = np.repeat(bild_t, 3).view(3, 1024, 1024)
_print_image(gr)
print(gr.shape)

In [None]:
bild_t = bild_t[:,:]*3

In [None]:
bild_t.shape

In [None]:
test_image = np.array(,dtype="u1")

In [None]:
plt.imshow(test_image, cmap="bone");

In [None]:
flat = np.array(bild_t.flatten())
arr_size = flat.shape*3
zz = np.zeros(arr_size)

for i in range(3):
    print(flat.shape*i, flat.shape+flat.shape*i)
    zz[flat.shape*i: flat.shape+flat.shape*i]


In [None]:
1024*1024*3

In [None]:
flat.shape

In [None]:
#np.concatenate([bild_t]*3)
#_print_image(np.stack([bild_t]*3))
plt.imshow(np.stack([[bild_t]]*3), cmap="bone");

In [None]:
ttt = torch.zeros(3, 1024, 1024) + bild_t
_print_image(ttt)

In [None]:
tt = _make3D(bild_t)
print(bild_t.shape)
print(tt.shape)
print(tt.squeeze().shape)
#tt.reshape(1024, 1024, 3)
_print_image(bild_t)
#_print_image(tt.squeeze())

In [None]:
tt[:,1024//3, 1024//3]

In [None]:
train_df["Index_2"] = list(range(train_df.shape[0]))

In [None]:
train_df.iloc[2,:].loc["Image Index"]

In [None]:
def translate2label_(arr):
    global labels
    idx2lbl = {i: l for i, l in enumerate(labels)}
    pos_lbls = [idx2lbl[idx] for idx, v in enumerate(arr) if v == 1]
    if len(pos_lbls) == 0: return "No Finding"
    return "|".join(pos_lbls)

def get_fast_df(train_df, valid_df, labels):
    combined_df = pd.concat([train_df, valid_df], axis=0)

    rows = combined_df.values[:,[i for i, c in enumerate(combined_df.columns) if c in labels]]
    with Pool() as pool:
        row_labels = pool.map(translate2label_, rows)

    is_valid = [*[False]*train_df.shape[0], *[True]*valid_df.shape[0]]
    
    fast_df = {"idx": list(combined_df.index), 
           "fname": combined_df.loc[:,"Image Index"],
           "labels": row_labels,
           "is_valid": is_valid}

    fast_df = pd.DataFrame(fast_df)
    fast_df.index = fast_df.idx.values
    return fast_df.drop(columns="idx")

In [None]:
combined_df = pd.concat([train_df, valid_df], axis=0)

rows = combined_df.values[:,[i for i, c in enumerate(combined_df.columns) if c in labels]]
with Pool() as pool:
    row_labels = pool.map(translate2label_, rows)

is_valid = [*[False]*train_df.shape[0], *[True]*valid_df.shape[0]]

In [None]:
fast_df = get_fast_df(train_df, valid_df, labels)

In [None]:
dls = ImageDataLoaders.from_df(fast_df, 
                               get_data_path(), 
                               folder=get_image_path().name, 
                               valid_col='is_valid', 
                               label_delim='|',
                               item_tfms=Resize(460), 
                               batch_tfms=aug_transforms(size=224)
                              )

In [None]:
dls.show_batch()

In [None]:
learn = cnn_learner(dls, resnet50, metrics=[partial(accuracy_multi, thresh=0.5), RocAucMulti(sigmoid=False)])

In [None]:
learn.lr_find()

In [None]:
lr = 3e-2

In [None]:
learn.fine_tune(2, 3e-2)

In [None]:
learn.show_results()

In [None]:
learn.loss_func

In [None]:
learn.model