In [1]:
#%env CUDA_VISIBLE_DEVICES=0

In [2]:
from datetime import datetime

import numpy as np
import pandas as pd
import torch
import pytorch_lightning as pl
from PIL import Image
from pytorch_lightning.callbacks import LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader

from era_data import TabletPeriodDataset, get_IDS
from era_model import SimpleCNN

In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

# Hyperparameters

In [4]:
LR = 5e-5
EPOCHS = 8
BATCH_SIZE = 16
SUFFIX = '-vanillaCNN'
DATE = datetime.now().strftime("%B%d")
RUN_NAME_SUFFIX = '-preprocessed-masked' # ''
IMG_DIR = 'output/images_preprocessed'
IDS = get_IDS(IMG_DIR=IMG_DIR)
print(len(IDS))
VERSION_NAME = f'period_clf_bs{BATCH_SIZE}_lr{LR}_{EPOCHS}epochs{SUFFIX}-{len(IDS)}_samples{RUN_NAME_SUFFIX}-{DATE}_1000test'
VERSION_NAME

94936


'period_clf_bs16_lr5e-05_8epochs-vanillaCNN-94936_samples-preprocessed-masked-March29_1000test'

# Load data

In [5]:
#! du -h {IMG_DIR}

In [6]:
train_ids, test_ids = train_test_split(IDS, test_size=1000, random_state=0)
len(train_ids), len(test_ids)

(93936, 1000)

In [7]:
train_ids, val_ids = train_test_split(train_ids, test_size=1000, random_state=0)
len(train_ids), len(val_ids)

(92936, 1000)

In [8]:
ds_train = TabletPeriodDataset(IDS=train_ids, IMG_DIR=IMG_DIR, mask=True)
ds_val = TabletPeriodDataset(IDS=val_ids, IMG_DIR=IMG_DIR, mask=True)
ds_test = TabletPeriodDataset(IDS=test_ids, IMG_DIR=IMG_DIR, mask=True)

Filtering 94936 IDS down to provided 92936...
Filtering 94936 IDS down to provided 1000...
Filtering 94936 IDS down to provided 1000...


In [9]:
def collate_fn(batch):
    data = torch.stack([torch.from_numpy(sample[1]).unsqueeze(0) for sample in batch])
    labels = torch.tensor([sample[2] for sample in batch])

    return data, labels

In [10]:
dl_train = DataLoader(ds_train, batch_size=BATCH_SIZE,collate_fn=collate_fn, shuffle=True, num_workers=4)
dl_val = DataLoader(ds_val, batch_size=BATCH_SIZE,collate_fn=collate_fn, shuffle=False, num_workers=4)
dl_test = DataLoader(ds_test, batch_size=BATCH_SIZE, collate_fn=collate_fn, shuffle=False, num_workers=4)

In [11]:
# save model IDs so we can keep track of what data it was trained on
pd.Series(train_ids).to_csv(f'output/clf_ids/period-train-{VERSION_NAME}.csv', index=False, header=None)
pd.Series(val_ids).to_csv(f'output/clf_ids/period-val-{VERSION_NAME}.csv', index=False, header=None)
pd.Series(test_ids).to_csv(f'output/clf_ids/period-test-{VERSION_NAME}.csv', index=False, header=None)

In [12]:
num_classes = len(TabletPeriodDataset.PERIOD_INDICES) + 2
num_classes

24

In [13]:
model = SimpleCNN(num_classes=num_classes)

In [14]:
logger = pl.loggers.TensorBoardLogger(
    save_dir='.',
    name='lightning_logs',
    version=VERSION_NAME
)
lr_monitor = LearningRateMonitor(logging_interval='step')

trainer = pl.Trainer(
    max_epochs=EPOCHS,
    accelerator='gpu',
    devices='auto',
    callbacks=[lr_monitor],
    logger=logger
)

  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [15]:
print('Logs to:', VERSION_NAME)

Logs to: period_clf_bs16_lr5e-05_8epochs-vanillaCNN-94936_samples-preprocessed-masked-March29_1000test


In [16]:
trainer.fit(model, dl_train, dl_val)

  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

   | Name    | Type        | Params
-----------------------------------------
0  | conv1   | Conv2d      | 320   
1  | bn1     | BatchNorm2d | 64    
2  | conv2   | Conv2d      | 18.5 K
3  | bn2     | BatchNorm2d | 128   
4  | conv3   | Conv2d      | 73.9 K
5  | bn3     | BatchNorm2d | 256   
6  | conv4   | Conv2d      | 295 K 
7  | bn4     | BatchNorm2d | 512   
8  | pool    | MaxPool2d   | 0     
9  | dropout | Dropout     | 0     
10 | fc1     | Linear      | 268 M 
11 | fc2     | Linear      | 24.6 K
-----------------------------------------
268 M     Trainable params
0         Non-trainable params
268 M     Total params
1,075.400 Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=8` reached.
