In [1]:
import os
from glob import glob
import sys
sys.path.insert(0, '../core')

from load.load_data import dirtyMNISTDataset

In [5]:
import torch
from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import DataLoader
from skimage import io

In [15]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [3]:
dataset = dirtyMNISTDataset(mode='train')
dataloader = DataLoader(dataset=dataset, batch_size=)

In [36]:
class VanillaCNN(nn.Module):
    def __init__(self, input_size=(1, 256, 256), kernel_size=3, channel_dims=[32, 64], hidden_dims=(256, 128), n_classes=26, batchnorm=False):
        super(VanillaCNN, self).__init__()
        self.input_size = input_size
        self.kernel_size = kernel_size
        self.channel_dims = channel_dims
        self.hidden_dims = hidden_dims
        self.n_classes = n_classes
        self.batchnorm = batchnorm

        self.layers = []
        prev_channel_dim = self.input_size[0]
        for channel_dim in self.channel_dims:
            self.layers.append(
                nn.Conv2d(
                    in_channels=prev_channel_dim,
                    out_channels=channel_dim,
                    kernel_size=self.kernel_size,
                    stride=1,
                    padding=self.kernel_size // 2
                    )
                    )
            if self.batchnorm:
                self.layers.append(nn.BatchNorm2d(num_features=channel_dim))

            self.layers.append(nn.ReLU(inplace=True))
            self.layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
            self.layers.append(nn.Dropout2d(p=.5))
            prev_channel_dim = channel_dim

        self.layers.append(nn.Flatten())
        prev_hidden_dim = prev_channel_dim * (self.input_size[1]) // (2**len(self.channel_dims)) * (self.input_size[2] // (2**len(self.channel_dims)))
        for hidden_dim in self.hidden_dims:
            self.layers.append(nn.Linear(in_features=prev_hidden_dim, out_features=hidden_dim, bias=True))
            self.layers.append(nn.ReLU(inplace=True))
            prev_hidden_dim = hidden_dim

        self.layers.append(nn.Linear(in_features=prev_hidden_dim, out_features=self.n_classes, bias=True))

        self.network = nn.Sequential()
        for layer_idx, layer in enumerate(self.layers):
            layer_name = f'{type(layer).__name__.lower()}_{layer_idx}'
            self.network.add_module(name=layer_name, module=layer)
        self.init_param()

    def init_param(self):
        for module in self.modules():
            if isinstance(module, nn.Conv2d):
                nn.init.kaiming_normal_(module.weight)
                nn.init.zeros_(module.bias)
            elif isinstance(module, nn.BatchNorm2d):
                nn.init.constant_(module.weight, 1)
                nn.init.constant_(module.bias, 0)
            elif isinstance(module, nn.Linear):
                nn.init.kaiming_normal_(module.weight)
                nn.init.zeros_(module.bias)
    
    def forward(self, X):
        return self.network.forward(X)

In [37]:
model = VanillaCNN(input_size=(1,256,256)).to(device)

ModuleAttributeError: 'VanillaCNN' object has no attribute 'ksize'

In [32]:
model.forward(X=torch.Tensor(dataset[0]['image']).to(device))

RuntimeError: Expected 4-dimensional input for 4-dimensional weight [32, 1, 3, 3], but got 2-dimensional input of size [256, 256] instead

In [19]:
dataset[0]

{'image': array([[ 26, 196,   1, ...,  74, 154,  20],
        [176, 223,  15, ...,  61, 104, 123],
        [114, 131, 239, ...,  79, 199,  27],
        ...,
        [198,   1, 159, ..., 254, 106, 212],
        [147, 236, 140, ..., 215,  43, 253],
        [192,  45, 154, ..., 170, 157, 233]], dtype=uint8),
 'label': array([1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0,
        0, 1, 1, 1])}

In [11]:
img_path = glob('../data/dirty_mnist/train/*')[0]

In [13]:
io.imread(img_path).shape

(256, 256)