In [27]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import numpy as np

import os 
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import ToTensor
import cv2
import pickle as pkl

In [28]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [29]:
class LazyLoadDataset(Dataset):
  def __init__(self, path, train=True, transform=None):
    self.transform = transform
    path = path + ("train/" if train else "test/")
    self.train = train
    if(train):
        self.pathX = path + "X/"
        self.pathY = path + "Y/"
    else:
        self.pathX = path + "X/"
    self.data = os.listdir(self.pathX)

  def __getitem__(self, idx):
    f = self.data[idx]

    # X
    # read rgb images
    img0 = cv2.imread(self.pathX + f + "/rgb/0.png")
    img1 = cv2.imread(self.pathX + f + "/rgb/1.png")
    img2 = cv2.imread(self.pathX + f + "/rgb/2.png")
    if self.transform is not None:
      img0 = self.transform(img0)
      img1 = self.transform(img1)
      img2 = self.transform(img2)

    # read depth images 
    depth = np.load(self.pathX + f + "/depth.npy")
    # read field ID
    field_id = pkl.load(open(self.pathX + f + "/field_id.pkl", "rb"))

    # Y
    if(self.train):
        Y = np.load(self.pathY + f + ".npy")
        return (img0, img1, img2, depth/1000, field_id), Y*1000
    else:
        return (img0, img1, img2, depth/1000, field_id)
  def __len__(self):
    return len(self.data)

In [31]:
def get_min_and_max_depth(data):
    min_depth, max_depth = 10000,0
    for image, _ in data:
        img0, img1, img2, depth, field_id = image
        single_min = np.min(depth)
        single_max = np.max(depth)
        if(single_min<min_depth):
            min_depth = single_min
        if(single_max>max_depth):
            max_depth = single_max
    return min_depth, max_depth

def get_mean_and_std(data):
    means = torch.tensor([0.0, 0.0, 0.0])
    stds = torch.tensor([0.0, 0.0, 0.0])
    for image, _ in data:
        img0, img1, img2, depth, field_id = image
        img0_reshaped = img0.reshape(3, -1)
        img1_reshaped = img1.reshape(3, -1)
        img2_reshaped = img2.reshape(3, -1)
        image_cat = torch.cat((img0_reshaped, img1_reshaped, img2_reshaped), dim=1)
        img_mean = image_cat.mean(axis=1)
        img_std = image_cat.std(axis=1)  
        means += img_mean
        stds += img_std 
    means = means/len(data.data)
    stds = stds/len(data.data)
    return means,stds

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
alldata = LazyLoadDataset("/kaggle/input/lazydata/lazydata/", train = True, transform = transforms.Compose([transforms.ToTensor()]))

In [32]:
means,stds = get_mean_and_std(alldata)
min_depth, max_depth = get_min_and_max_depth(alldata)

In [33]:
print(means,stds)
print(min_depth, max_depth)

tensor([0.4851, 0.4623, 0.4356]) tensor([0.2226, 0.2206, 0.2359])
0.0 65.535


In [34]:
transforms_train = transforms.Compose([
#     transforms.RandomInvert(),
#     transforms.RandomAdjustSharpness(sharpness_factor=4),
#     transforms.RandomAutocontrast(),
#     transforms.RandomSolarize(threshold=192.0),
#     transforms.RandomPosterize(bits=2),
#     transforms.ColorJitter(brightness=1.0,contrast=1.0, hue=0.5), # data augmentation
    transforms.ToTensor(),
    transforms.Normalize(mean = means, std = stds) # normalization
])

In [35]:
train_dataset = LazyLoadDataset("/kaggle/input/lazydata/lazydata/", True, transforms_train)
# train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)


In [36]:
import torch
import torch.nn as nn

try:
    from torch.hub import load_state_dict_from_url
except ImportError:
    from torch.utils.model_zoo import load_url as load_state_dict_from_url

def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=dilation, groups=groups, bias=False, dilation=dilation)


def conv1x1(in_planes, out_planes, stride=1):
    """1x1 convolution"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)


class BasicBlock(nn.Module):
    expansion = 1
    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
                 base_width=64, dilation=1, norm_layer=None):
        super(BasicBlock, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        if groups != 1 or base_width != 64:
            raise ValueError('BasicBlock only supports groups=1 and base_width=64')
        if dilation > 1:
            raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
        # Both self.conv1 and self.downsample layers downsample the input when stride != 1
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = norm_layer(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = norm_layer(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        identity = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        if self.downsample is not None:
            identity = self.downsample(x)
        out += identity
        out = self.relu(out)
        return out


class Bottleneck(nn.Module):
    # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
    # while original implementation places the stride at the first 1x1 convolution(self.conv1)
    # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
    # This variant is also known as ResNet V1.5 and improves accuracy according to
    # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
    expansion = 4
    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
                 base_width=64, dilation=1, norm_layer=None):
        super(Bottleneck, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        width = int(planes * (base_width / 64.)) * groups
        # Both self.conv2 and self.downsample layers downsample the input when stride != 1
        self.conv1 = conv1x1(inplanes, width)
        self.bn1 = norm_layer(width)
        self.conv2 = conv3x3(width, width, stride, groups, dilation)
        self.bn2 = norm_layer(width)
        self.conv3 = conv1x1(width, planes * self.expansion)
        self.bn3 = norm_layer(planes * self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        identity = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)
        out = self.conv3(out)
        out = self.bn3(out)
        if self.downsample is not None:
            identity = self.downsample(x)
        out += identity
        out = self.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes=12, zero_init_residual=False,
                 groups=1, width_per_group=64, replace_stride_with_dilation=None,
                 norm_layer=None):
        super(ResNet, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self._norm_layer = norm_layer
        self.inplanes = 64
        self.dilation = 1
        if replace_stride_with_dilation is None:
            # each element in the tuple indicates if we should replace
            # the 2x2 stride with a dilated convolution instead
            replace_stride_with_dilation = [False, False, False]
        if len(replace_stride_with_dilation) != 3:
            raise ValueError("replace_stride_with_dilation should be None "
                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
        self.groups = groups
        self.base_width = width_per_group
        self.conv1 = nn.Conv2d(12, self.inplanes, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = norm_layer(self.inplanes)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
                                       dilate=replace_stride_with_dilation[0])
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
                                       dilate=replace_stride_with_dilation[1])
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
                                       dilate=replace_stride_with_dilation[2])
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, Bottleneck):
                    nn.init.constant_(m.bn3.weight, 0)
                elif isinstance(m, BasicBlock):
                    nn.init.constant_(m.bn2.weight, 0)

    def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
        norm_layer = self._norm_layer
        downsample = None
        previous_dilation = self.dilation
        if dilate:
            self.dilation *= stride
            stride = 1
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                conv1x1(self.inplanes, planes * block.expansion, stride),
                norm_layer(planes * block.expansion),
            )
        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
                            self.base_width, previous_dilation, norm_layer))
        self.inplanes = planes * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.inplanes, planes, groups=self.groups,
                                base_width=self.base_width, dilation=self.dilation,
                                norm_layer=norm_layer))

        return nn.Sequential(*layers)

    def _forward_impl(self, x):
        # See note [TorchScript super()]
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

    def forward(self, x):
        return self._forward_impl(x)
    
    def freeze_parameters(self, freeze_layer4=False, train_fc=False):
        for p in self.conv1.parameters():
            p.requires_grad = False
        for p in self.bn1.parameters():
            p.requires_grad = False
        for p in self.layer1.parameters():
            p.requires_grad = False
        for p in self.layer2.parameters():
            p.requires_grad = False
        # for p in self.layer3.parameters():
        #         p.requires_grad = False
        for p in self.fc.parameters():
            p.requires_grad = False

        if freeze_layer4:
            for p in self.layer4.parameters():
                p.requires_grad = False
        if train_fc:
            for p in self.fc.parameters():
                p.requires_grad = True

        counter = 0
        for p in self.parameters():
            if p.requires_grad:
                counter += 1
        print('n of req grad params: {}, n of total parameters: {}'.format(counter, len(list(self.parameters()))))

def _resnet(block, layers, pretrained, progress, **kwargs):
    model = ResNet(block, layers, **kwargs)
    return model

def resnet18(pretrained=False, progress=True, **kwargs):
    return _resnet(BasicBlock, [2, 2, 2, 2], pretrained, progress,
                   **kwargs)

def resnet34(pretrained=False, progress=True, **kwargs):
    return _resnet(BasicBlock, [3, 4, 6, 3], pretrained, progress,
                   **kwargs)

def resnet50(pretrained=False, progress=True, **kwargs):
    model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs, num_classes=365)
    return model

def resnet101(pretrained=False, progress=True, **kwargs):
    return _resnet(Bottleneck, [3, 4, 23, 3], pretrained, progress,
                   **kwargs)

def resnet152(pretrained=False, progress=True, **kwargs):
    return _resnet(Bottleneck, [3, 8, 36, 3], pretrained, progress,
                   **kwargs)



In [37]:
class RMSELoss(nn.Module):
    def __init__(self, eps=1e-6):
        super().__init__()
        self.mse = nn.MSELoss()
        self.eps = eps
        
    def forward(self,yhat,y):
        loss = torch.sqrt(self.mse(yhat,y) + self.eps)
        return loss

In [38]:
# model = resnet101().to(device)
# model = resnet50().to(device)
model = resnet152().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
#lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
#                                               step_size=10,
#                                               gamma=0.1,)

In [55]:
def train(dataloader, model, optimizer):
    size = len(dataloader.dataset)
    model.train()

    for batch_index, (data, target) in enumerate(dataloader):
        img0, img1, img2, depth, field_id = data
        depth = np.array(depth)
        depth = torch.from_numpy(depth)
        depth = (depth - min_depth) / (max_depth - min_depth)
        RGBD = torch.cat((img0, img1, img2, depth), dim=1)
        RGBD = RGBD.type(torch.float).to(device)
        target = target.type(torch.float).to(device)
        pred = model(RGBD)
        loss_fn = RMSELoss()
        loss = loss_fn(pred, target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch_index % 20 == 0:
            loss, batch = loss.item(), batch_index * len(RGBD)
            print(f"loss: {loss:>3f} now processing:{batch} /3396 ")
            

In [56]:
for epoch in range(10):
    print(epoch)
    train(train_dataloader, model, optimizer)

0


RuntimeError: CUDA out of memory. Tried to allocate 14.00 MiB (GPU 0; 14.76 GiB total capacity; 13.15 GiB already allocated; 3.75 MiB free; 13.66 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [41]:
test_dataset = LazyLoadDataset("/kaggle/input/lazydata/lazydata/", train = False, transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean = means, std = stds )]))
test_dataloader = DataLoader(test_dataset, batch_size = 3)

In [52]:
import pickle
import pandas as pd

outfile = 'submission.csv'

output_file = open(outfile, 'w')

titles = ['ID', 'FINGER_POS_1', 'FINGER_POS_2', 'FINGER_POS_3', 'FINGER_POS_4', 'FINGER_POS_5', 'FINGER_POS_6',
         'FINGER_POS_7', 'FINGER_POS_8', 'FINGER_POS_9', 'FINGER_POS_10', 'FINGER_POS_11', 'FINGER_POS_12']
preds = []
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
indices = []
model.eval()

for batch, data in enumerate(test_dataloader):
        img0, img1, img2, depth, index = data
        depth = np.array(depth)
        depth = torch.from_numpy(depth)
        depth = (depth - min_depth) / (max_depth - min_depth)
        data = torch.cat((img0, img1, img2, depth), dim=1).type(torch.float)
        output = model(data.to('cuda')) 
        for i in range(output.shape[0]):
            preds.append(output[i].cpu().detach().numpy())
            indices.append(index[i])

df = pd.concat([pd.DataFrame(indices), pd.DataFrame.from_records(preds)/1000], axis = 1, names = titles)
df.columns = titles
df.to_csv(outfile, index = False)
print("Written to csv file {}".format(outfile))

Written to csv file submission.csv
