In [7]:
import zipfile
import torch
import torch.nn as nn
from torch.utils import data
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
from skimage import io, transform
import numpy as np
import pandas as pd
import matplotlib.pyplot as ply
import os
import imageio
from PIL import Image
import glob

% matplotlib inline

In [2]:
#https://www.kaggle.com/skainkaryam/basic-data-visualization-using-pytorch-dataset
class TGSSaltDataset(data.Dataset):
    
    def __init__(self, root_path, file_list):
        self.root_path = root_path
        self.file_list = file_list
    
    def __len__(self):
        return len(self.file_list)
    
    def __getitem__(self, index):
        if index not in range(0, len(self.file_list)):
            return self.__getitem__(np.random.randint(0, self.__len__()))
        
        file_id = self.file_list[index]
        
        image_folder = os.path.join(self.root_path, "images")
        image_path = os.path.join(image_folder, file_id + ".png")
        
        mask_folder = os.path.join(self.root_path, "masks")
        mask_path = os.path.join(mask_folder, file_id + ".png")
        
        image = np.array(imageio.imread(image_path), dtype=np.uint8)
        mask = np.array(imageio.imread(mask_path), dtype=np.uint8)
        
        return image, mask

In [3]:
df_depth = pd.read_csv('./data/train.csv')

In [4]:
train_path = "./data/train/"
train_file_list = list(df_depth['id'].values)

In [5]:
len(train_file_list)

4000

In [42]:
train_data = TGSSaltDataset(train_path, train_file_list)

In [61]:
train_path

'./data/train/'

In [None]:
image_path

In [65]:
img = np.array(imageio.imread('./data/train/images/000e218f21.png'), dtype=np.uint8)

In [71]:
img[0][0]

array([131, 131, 131], dtype=uint8)

In [75]:
mean_img[0][1]

120.991

In [96]:
def load_img_to_df(img_path, normalize=False, mean_img=None):
    images = []
    for filename in glob.glob(f'{img_path}/*.png'): #assuming gif
        img_id = filename.split('\\')[-1].split('.')[0]
        img = np.array(imageio.imread(filename), dtype=np.float)
        if normalize:
            img -= mean_img[:,:,None]
        images.append([img_id, img])
    return pd.DataFrame(images, columns=['img_id', 'img']).set_index('img_id')

In [97]:
def load_img_to_np(img_path):
    images = []
    for filename in glob.glob(f'{img_path}/*.png'): #assuming gif
        img_id = filename.split('\\')[-1].split('.')[0]
        images.append(np.array(imageio.imread(filename), dtype=np.uint8))
    return np.r_[images]

In [98]:
np_train = load_img_to_np('./data/train/images')

In [99]:
mean_img = np_train[:,:,:,0].reshape(np_train.shape[0], -1).mean(0).reshape(101,101)

In [100]:
df_train = load_img_to_df('./data/train/images', normalize=True, mean_img=mean_img)

In [101]:
df_train['mask'] = load_img('./data/train/masks/')['img']

In [106]:
df_test = load_img_to_df('./data/test/images', normalize=True, mean_img=mean_img)

In [108]:
np_test = np.r_[df_test.img.tolist()]

In [114]:
np_test[:,:,:,1].mean()

-0.6330799240815169

In [115]:
np_train = np.r_[df_train.img.tolist()]

In [118]:
np_train.shape

(4000, 101, 101, 3)

In [126]:
nn.Conv3d?

[1;31mInit signature:[0m [0mnn[0m[1;33m.[0m[0mConv3d[0m[1;33m([0m[0min_channels[0m[1;33m,[0m [0mout_channels[0m[1;33m,[0m [0mkernel_size[0m[1;33m,[0m [0mstride[0m[1;33m=[0m[1;36m1[0m[1;33m,[0m [0mpadding[0m[1;33m=[0m[1;36m0[0m[1;33m,[0m [0mdilation[0m[1;33m=[0m[1;36m1[0m[1;33m,[0m [0mgroups[0m[1;33m=[0m[1;36m1[0m[1;33m,[0m [0mbias[0m[1;33m=[0m[1;32mTrue[0m[1;33m)[0m[1;33m[0m[0m
[1;31mDocstring:[0m     
Applies a 3D convolution over an input signal composed of several input
planes.

In the simplest case, the output value of the layer with input size :math:`(N, C_{in}, D, H, W)`
and output :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})` can be precisely described as:

.. math::

    \begin{equation*}
    \text{out}(N_i, C_{out_j}) = \text{bias}(C_{out_j}) +
                            \sum_{k = 0}^{C_{in} - 1} \text{weight}(C_{out_j}, k) \star \text{input}(N_i, k)
    \end{equation*},

where :math:`\star` is the valid 3D 

In [121]:
    >>> m = nn.Conv1d(16, 33, 3, stride=2)
    >>> input = torch.randn(20, 16, 16)
    >>> output = m(input)

In [124]:
input.shape

torch.Size([20, 16, 16])

In [123]:
output.shape

torch.Size([20, 33, 7])