In [2]:
import sys
from os.path import join
import warnings
warnings.simplefilter('ignore')

import numpy as np
import torch
from tqdm import tqdm

from coreml.config import Config
from coreml.data.dataloader import get_dataloader
from coreml.utils.io import read_yml
from coreml.decomposition import PCA
from coreml.sklearn import SVM

In [3]:
config = Config('competitions/2020/melanoma-classification/configs/pca-svm.yml')

In [4]:
dataloader, dataset = get_dataloader(
        config.data, 'train',
        config.model['batch_size'],
        num_workers=10,
        shuffle=False,
        drop_last=False)

[33m=> Loading dataset version file: [siim-isic-melanoma, v1.0-200x200, train][0m


Loading items: 100%|██████████| 26377/26377 [00:00<00:00, 455349.81it/s]


In [5]:
iterator = tqdm(dataloader)
train_signals = []
train_labels = []

for batch in iterator:
    train_signals.append(batch['signals'])
    train_labels.append(batch['labels'])

100%|██████████| 207/207 [00:47<00:00,  4.37it/s]


In [6]:
train_signals = torch.cat(train_signals)

In [7]:
train_labels = torch.cat(train_labels)

In [8]:
train_signals.shape, train_labels.shape

(torch.Size([26377, 3, 200, 200]), torch.Size([26377]))

In [9]:
dataloader, dataset = get_dataloader(
        config.data, 'val',
        config.model['batch_size'],
        num_workers=10,
        shuffle=False,
        drop_last=False)

[33m=> Loading dataset version file: [siim-isic-melanoma, v1.0-200x200, val][0m


Loading items: 100%|██████████| 6749/6749 [00:00<00:00, 446664.42it/s]


In [10]:
iterator = tqdm(dataloader)
val_signals = []
val_labels = []

for batch in iterator:
    val_signals.append(batch['signals'])
    val_labels.append(batch['labels'])

100%|██████████| 53/53 [00:12<00:00,  4.26it/s]


In [11]:
val_signals = torch.cat(val_signals)

In [12]:
val_labels = torch.cat(val_labels)

In [13]:
val_signals.shape, val_labels.shape

(torch.Size([6749, 3, 200, 200]), torch.Size([6749]))

In [14]:
dataloader, dataset = get_dataloader(
        config.data, 'test',
        config.model['batch_size'],
        num_workers=10,
        shuffle=False,
        drop_last=False)

[33m=> Loading dataset version file: [siim-isic-melanoma, v1.0-200x200, test][0m


Loading items: 100%|██████████| 10982/10982 [00:00<00:00, 445873.43it/s]


In [15]:
iterator = tqdm(dataloader)
test_signals = []

for batch in iterator:
    test_signals.append(batch['signals'])

100%|██████████| 86/86 [00:19<00:00,  4.39it/s]


In [None]:
test_signals = torch.cat(test_signals)

In [None]:
test_signals.shape

In [None]:
train_signals = train_signals.reshape(train_signals.shape[0], -1)
val_signals = val_signals.reshape(val_signals.shape[0], -1)
test_signals = test_signals.reshape(test_signals.shape[0], -1)

In [None]:
all_signals = torch.cat([train_signals, val_signals, test_signals], dim=0)

In [None]:
all_signals.shape