In [1]:
import copy
import cv2
from datetime import datetime
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import re
from sklearn.model_selection import train_test_split
import time
import torch
from torch import nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torchsummary import summary
from torchvision import models, transforms
from torchvision.transforms import functional as TF

In [18]:
# copy-pasted from https://github.com/fastai/fastai/blob/master/fastai/vision/learner.py

# Cell
def _is_pool_type(l): return re.search(r'Pool[123]d$', l.__class__.__name__)

# Cell
def has_pool_type(m):
    "Return `True` if `m` is a pooling layer or has one in its children"
    if _is_pool_type(m): return True
    for l in m.children():
        if has_pool_type(l): return True
    return False

# Cell
def _get_first_layer(m):
    "Access first layer of a model"
    c,p,n = m,None,None  # child, parent, name
    for n in next(m.named_parameters())[0].split('.')[:-1]:
        p,c=c,getattr(c,n)
    return c,p,n

# Cell
def _load_pretrained_weights(new_layer, previous_layer):
    "Load pretrained weights based on number of input channels"
    n_in = getattr(new_layer, 'in_channels')
    if n_in==1:
        # we take the sum
        new_layer.weight.data = previous_layer.weight.data.sum(dim=1, keepdim=True)
    elif n_in==2:
        # we take first 2 channels + 50%
        new_layer.weight.data = previous_layer.weight.data[:,:2] * 1.5
    else:
        # keep 3 channels weights and set others to null
        new_layer.weight.data[:,:3] = previous_layer.weight.data
        new_layer.weight.data[:,3:].zero_()

# Cell
def _update_first_layer(model, n_in, pretrained):
    "Change first layer based on number of input channels"
    if n_in == 3: return
    first_layer, parent, name = _get_first_layer(model)
    assert isinstance(first_layer, nn.Conv2d), f'Change of input channels only supported with Conv2d, found {first_layer.__class__.__name__}'
    assert getattr(first_layer, 'in_channels') == 3, f'Unexpected number of input channels, found {getattr(first_layer, "in_channels")} while expecting 3'
    params = {attr:getattr(first_layer, attr) for attr in 'out_channels kernel_size stride padding dilation groups padding_mode'.split()}
    params['bias'] = getattr(first_layer, 'bias') is not None
    params['in_channels'] = n_in
    new_layer = nn.Conv2d(**params)
    if pretrained:
        _load_pretrained_weights(new_layer, first_layer)
    setattr(parent, name, new_layer)

# Cell
def create_body(arch, n_in=3, pretrained=True, cut=None):
    "Cut off the body of a typically pretrained `arch` as determined by `cut`"
    model = arch(pretrained=pretrained)
    _update_first_layer(model, n_in, pretrained)
    #cut = ifnone(cut, cnn_config(arch)['cut'])
    if cut is None:
        ll = list(enumerate(model.children()))
        cut = next(i for i,o in reversed(ll) if has_pool_type(o))
    if   isinstance(cut, int):      return nn.Sequential(*list(model.children())[:cut])
    elif callable(cut): return cut(model)
    else: raise NamedError("cut must be either integer or a function")
        
class UpconvBlock(nn.Sequential):
    def __init__(self, inner_nc, outer_nc, first=False):
        uprelu = nn.ReLU(inplace=True)
        upconv = nn.ConvTranspose2d(inner_nc, outer_nc,
                                    kernel_size=4, stride=2,
                                    padding=1, bias=False)
        upnorm = nn.BatchNorm2d(outer_nc)
        if first:
            layers = [upconv, upnorm]
        else:
            layers = [uprelu, upconv, upnorm]
        super().__init__(*layers)

class Upconvolution(nn.Sequential):
    def __init__(self):
        layers = [
            UpconvBlock(2048, 512, first=True),
            UpconvBlock(512, 64),
            UpconvBlock(64, 16),
            UpconvBlock(16, 4),
            UpconvBlock(4, 1),
        ]
        super().__init__(*layers)
    
class AdaptiveConcatPool2d(nn.Module):
    "Layer that concats `AdaptiveAvgPool2d` and `AdaptiveMaxPool2d`"
    def __init__(self, size=None):
        super().__init__()
        self.size = size or 1
        self.ap = nn.AdaptiveAvgPool2d(self.size)
        self.mp = nn.AdaptiveMaxPool2d(self.size)
    def forward(self, x): return torch.cat([self.mp(x), self.ap(x)], 1)
        
class LinBnDrop(nn.Sequential):
    "Module grouping `BatchNorm1d`, `Dropout` and `Linear` layers"
    def __init__(self, n_in, n_out, bn=True, p=0., act=None, lin_first=False):
        layers = [nn.BatchNorm1d(n_out if lin_first else n_in)] if bn else []
        if p != 0: layers.append(nn.Dropout(p))
        lin = [nn.Linear(n_in, n_out, bias=not bn)]
        if act is not None: lin.append(act)
        layers = lin+layers if lin_first else layers+lin
        super().__init__(*layers)
        
# Cell
def create_head(nf, n_out, lin_ftrs=None, first_bn=True, lin_first=False, no_pool=False):
    "Model head that takes `nf` features, runs through `lin_ftrs`, and out `n_out` classes."
    lin_ftrs = [nf, 512, n_out] if lin_ftrs is None else [nf] + lin_ftrs + [n_out]
    bns = [first_bn] + [True]*len(lin_ftrs[1:])
    actns = [nn.ReLU(inplace=True)] * (len(lin_ftrs)-2) + [None]
    layers = [nn.Flatten()] if no_pool else [AdaptiveConcatPool2d(), nn.Flatten()]
    for ni,no,bn,actn in zip(lin_ftrs[:-1], lin_ftrs[1:], bns, actns):
        layers += LinBnDrop(ni, no, bn=bn, p=0.2, act=actn, lin_first=lin_first)
    if lin_first: layers.append(nn.Linear(lin_ftrs[-2], n_out))
    return nn.Sequential(*layers)
        
# Cell
def create_head1(nf=2**20, n_out=1, lin_ftrs=None, first_bn=True, lin_first=False):
    return create_head(nf, n_out, lin_ftrs=[2**10, 2**5],
                       first_bn=first_bn, lin_first=lin_first, no_pool=True)


In [3]:
home_dir = '/home/vovechkin/'
data_dir = os.path.join(home_dir, 'embedding_data')
model_dir = os.path.join(home_dir, 'models')

ldr_crops_dir = os.path.join(data_dir, 'crops_ldr')
labels_file = os.path.join(data_dir, 'labels.csv')

In [4]:
def gamma_correction(img, gamma=5):
    return np.clip(np.power(img, 1.0 / gamma), 0.0, 1.0).astype(np.float32)


def to_ldr(img_hdr, gamma=2.2):
    tonemap = cv2.createTonemapReinhard(gamma, 0, 0, 0)
    return (tonemap.process(img_hdr) * 255).astype(np.uint8)


def read_hdr(path):
    hdr_img = cv2.imread(path, cv2.IMREAD_ANYDEPTH | cv2.IMREAD_COLOR)
    return cv2.cvtColor(hdr_img, cv2.COLOR_BGR2RGB).astype('float32')


def write_hdr(path, hdr_img):
    hdr_bgr = cv2.cvtColor(hdr_img, cv2.COLOR_RGB2BGR).astype('float32')
    cv2.imwrite(path, hdr_bgr, [cv2.IMWRITE_EXR_TYPE_FLOAT])

In [5]:
def light_sources_mask(hdr_image):
    treshold = np.percentile(hdr_image, 99)
    mask_rgb = (hdr_image > treshold).astype(np.uint8)
    mask = np.max(mask_rgb, axis=2)
    return mask

In [6]:
def show_light(hdr_img):
    img = to_ldr(hdr_img.copy())

    mask = light_sources_mask(hdr_img)
    mask_rgb = np.stack([mask for _ in range(3)], axis=2)
    img1 = img * (1 - mask_rgb) + mask_rgb * np.array([0, 255, 255])

    fig = plt.figure(figsize=(15, 8))
    plt.imshow(img1)
    plt.show()

In [7]:
def to_pil(cv2_img):
    cv2_img1 = cv2.cvtColor(img.copy(), cv2.COLOR_BGR2RGB)
    return Image.fromarray(cv2_img1)

def from_pil(pil_img):
    img = np.array(pil_img)
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    return img


def adjust_image_param(img, func, lower_value, higher_value):
    original_image = to_pil(img)

    lower_param_image = func(original_contrast.copy(), lower_value)
    higher_param_image = func(original_contrast.copy(), higher_value)

    composed_contrast_adjusted = np.concatenate(
        list(map(from_pil, [lower_param_image, original_image, higher_param_image])),
        axis=1)
    show_ldr(composed_contrast_adjusted)

In [8]:
# hdr_img = read_hdr(img_path)
# # hdr_img = cv2.cvtColor(hdr_img, cv2.COLOR_BGR2RGB).astype('float32')
# # tonemap = cv2.createTonemapReinhard(2.2, 0, 0, 0)
# img = to_ldr(hdr_img.copy())

# # show_light(hdr_img)


# cropped = [crop(img.copy(), point) for point in crop_points()]
# composite = np.concatenate(cropped, axis=0)

# fig = plt.figure(figsize=(15, 15))
# plt.imshow(composite)
# plt.show()

In [9]:
print(torch.backends.cuda.is_built())
print(torch.backends.cudnn.enabled)
print(torch.cuda.is_available())

True
True
True


In [10]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)
print(torch.cuda.get_device_name())

cuda:0
GeForce RTX 3090


In [11]:
# device = torch.device('cpu')

In [12]:
df = pd.read_csv(labels_file)
df

Unnamed: 0,image_name,light_percentage,brightness_mean,brightness_std
0,9C4A0003-e05009bcad_0,0.000,0.0016,0.0004
1,9C4A0003-e05009bcad_1,0.030,0.0017,0.0012
2,9C4A0003-e05009bcad_2,0.000,0.0017,0.0007
3,9C4A0003-e05009bcad_3,0.000,0.0016,0.0005
4,9C4A0003-e05009bcad_4,0.101,0.0098,0.0124
...,...,...,...,...
11160,AG8A9956-30e880bb24_0,0.258,0.0103,0.0061
11161,AG8A9956-30e880bb24_1,0.000,0.0002,0.0001
11162,AG8A9956-30e880bb24_2,0.000,0.0003,0.0001
11163,AG8A9956-30e880bb24_3,0.000,0.0003,0.0002


In [13]:
import math
from sklearn.preprocessing import MinMaxScaler

def set_light(x):
#     n, l, m, s = x
#     x[1] = int(x[1])
    return 

scaler = MinMaxScaler()

df1 = df.copy()
df1[['brightness_mean', 'brightness_std']] = scaler.fit_transform(df1[['brightness_mean', 'brightness_std']])
df1

Unnamed: 0,image_name,light_percentage,brightness_mean,brightness_std
0,9C4A0003-e05009bcad_0,0.000,0.001665,0.000552
1,9C4A0003-e05009bcad_1,0.030,0.001769,0.001657
2,9C4A0003-e05009bcad_2,0.000,0.001769,0.000966
3,9C4A0003-e05009bcad_3,0.000,0.001665,0.000690
4,9C4A0003-e05009bcad_4,0.101,0.010198,0.017120
...,...,...,...,...
11160,AG8A9956-30e880bb24_0,0.258,0.010718,0.008422
11161,AG8A9956-30e880bb24_1,0.000,0.000208,0.000138
11162,AG8A9956-30e880bb24_2,0.000,0.000312,0.000138
11163,AG8A9956-30e880bb24_3,0.000,0.000312,0.000276


In [14]:
def full_image_name(name):
    return os.path.join(ldr_crops_dir, name) + '.png'


class EmbeddingDataset(Dataset):
    def __init__(self, df, device='cuda:0'):
        self.paths = list(map(full_image_name, df.image_name))
        self.device = device
        self.labels = df[['light_percentage', 'brightness_mean', 'brightness_std']].to_numpy()
        self.norm = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

    def __len__(self): return len(self.paths)

    def __getitem__(self, idx):
        img = cv2.imread(self.paths[idx], cv2.IMREAD_COLOR).astype(np.float32) / 255
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = img.transpose((2, 0, 1))
        img = self.norm(torch.Tensor(img)).to(self.device)
        
        return img, torch.Tensor(self.labels[idx].astype(np.float32))
    
    def show(self,idx):
        x, y = self.__getitem__(idx)
        light_percentage, brightness_mean, brightness_std = y
        
        stds = np.array([0.229, 0.224, 0.225])
        means = np.array([0.485, 0.456, 0.406])
        img = ((x.cpu().numpy().transpose((1,2,0)) * stds + means) * 255).astype(np.uint8)
        plt.imshow(img)
        
        plt.title(f'{int(light_percentage)} {brightness_mean:.4f} {brightness_std:.4f}')

In [15]:
# df = pd.read_csv(labels_file)
train, test = train_test_split(df1, test_size=0.15, shuffle=True, random_state=42)

train_ds = EmbeddingDataset(train, device=device)
test_ds = EmbeddingDataset(test, device=device)

datasets = {
    'train': train_ds,
    'val': test_ds,
}

dataset_sizes = {x: len(datasets[x]) for x in ['train', 'val']}

dataloaders = {phase: DataLoader(datasets[phase], batch_size=4, shuffle=True)
               for phase in ['train', 'val']}
dataset_sizes

{'train': 9490, 'val': 1675}

In [21]:
class EmbeddingModel(nn.Module):
    def __init__(self, arch, n_in=4):
        super(EmbeddingModel, self).__init__()
        self.encoder = create_body(arch, n_in=n_in)
        self.upconv = Upconvolution()
        self.fc1 = create_head1()
        self.fc2 = create_head1()
        self.fc3 = create_head1()

    def forward(self, x):
        x = self.encoder(x)
        x = self.upconv(x)
        light_percentage = self.fc1(x)
        brightness_mean = self.fc2(x)
        brightness_std = self.fc2(x)
        
        res = torch.cat((light_percentage, brightness_mean, brightness_std), 1)
        return res
        return x

In [22]:
md = EmbeddingModel(models.resnet101)
print(md)

EmbeddingModel(
  (encoder): Sequential(
    (0): Conv2d(4, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          (0): Conv

In [23]:
summary(md.cuda(), (4, 1024, 1024))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 512, 512]          12,544
       BatchNorm2d-2         [-1, 64, 512, 512]             128
              ReLU-3         [-1, 64, 512, 512]               0
         MaxPool2d-4         [-1, 64, 256, 256]               0
            Conv2d-5         [-1, 64, 256, 256]           4,096
       BatchNorm2d-6         [-1, 64, 256, 256]             128
              ReLU-7         [-1, 64, 256, 256]               0
            Conv2d-8         [-1, 64, 256, 256]          36,864
       BatchNorm2d-9         [-1, 64, 256, 256]             128
             ReLU-10         [-1, 64, 256, 256]               0
           Conv2d-11        [-1, 256, 256, 256]          16,384
      BatchNorm2d-12        [-1, 256, 256, 256]             512
           Conv2d-13        [-1, 256, 256, 256]          16,384
      BatchNorm2d-14        [-1, 256, 2

In [17]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = 10000

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]

            print('{} Loss: {:.4f}'.format(phase, epoch_loss))

            # deep copy the model
            if phase == 'val' and epoch_loss < best_loss:
                best_loss = epoch_loss
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val loss: {:4f}'.format(best_loss))
    
    model.load_state_dict(best_model_wts)
    return model

In [18]:
model_ft = EmbeddingModel(models.resnet101)
model_ft = model_ft.to(device)

criterion = nn.MSELoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=5, gamma=0.2)

In [19]:
print(model_ft)

MultiTaskModel(
  (encoder): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          (0): Conv

In [None]:
model = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                    num_epochs=5)
model_path = os.path.join(model_dir, 'res101_model_v1024')
torch.save(model.state_dict(), model_path)

Epoch 0/4
----------


In [None]:
print(model)