In [1]:
%env CUDA_VISIBLE_DEVICES=0

env: CUDA_VISIBLE_DEVICES=0


In [2]:
import torch

In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

# Hyperparameters

In [4]:
LR = 5e-5
EPOCHS = 5
BATCH_SIZE = 16
SUFFIX = '-resnet50'

# Load data

In [5]:
# IMG_DIR = 'output/images'
RUN_NAME_SUFFIX = '-preprocessed2' # ''
IMG_DIR = 'output/images_preprocessed'

In [6]:
#! du -h {IMG_DIR}

In [7]:
from era_data import TabletPeriodDataset, get_IDS
from collections import Counter
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
from matplotlib import pyplot as plt
import pandas as pd

In [8]:
IDS = get_IDS(IMG_DIR=IMG_DIR)
len(IDS)

97640

In [9]:
VERSION_NAME = f'period_clf_bs{BATCH_SIZE}_lr{LR}_{EPOCHS}epochs{SUFFIX}-{len(IDS)}_samples{RUN_NAME_SUFFIX}'
VERSION_NAME

'period_clf_bs16_lr5e-05_5epochs-resnet50-97640_samples-preprocessed2'

In [10]:
train_ids, test_ids = train_test_split(IDS, test_size=500, random_state=0)
len(train_ids), len(test_ids)

(97140, 500)

In [11]:
ds_train = TabletPeriodDataset(IDS=train_ids, IMG_DIR=IMG_DIR)
ds_test = TabletPeriodDataset(IDS=test_ids)

Filtering 97640 IDS down to provided 97140...
Filtering 97640 IDS down to provided 500...


In [12]:
import numpy as np
from PIL import Image

def collate_fn(batch):
    # Unsqueeze the matrix (the first element of each tuple)
    
    unsqueezed_data = [torch.from_numpy(np.array(Image.fromarray(sample[0]).resize((178, 218), Image.NEAREST))).unsqueeze(0) for sample in batch]
    labels = torch.from_numpy(np.array([sample[1] for sample in batch]))

    # Concatenate the unsqueezed matrices into a new tensor
    unsqueezed_batch_data = torch.cat(unsqueezed_data, dim=0)

    return unsqueezed_batch_data, labels

In [13]:
dl_train = DataLoader(ds_train, batch_size=BATCH_SIZE,collate_fn=collate_fn, shuffle=True, num_workers=4)
dl_test = DataLoader(ds_test, batch_size=BATCH_SIZE, collate_fn=collate_fn, shuffle=False, num_workers=4)

In [14]:
# save model IDs so we can keep track of what data it was trained on
pd.Series(train_ids).to_csv(f'output/clf_ids/period-train-{VERSION_NAME}.csv', index=False, header=None)
pd.Series(test_ids).to_csv(f'output/clf_ids/period-test-{VERSION_NAME}.csv', index=False, header=None)

# Create Model

In [15]:
from era_model import EraClassifier # also used for periods
from torchinfo import summary

In [16]:
num_classes = len(TabletPeriodDataset.PERIOD_INDICES) + 2
num_classes

24

In [17]:
model = EraClassifier(LR=LR, num_classes=num_classes)



In [18]:
summary(model, input_size=(BATCH_SIZE, 512, 512))

Layer (type:depth-idx)                        Output Shape              Param #
EraClassifier                                 [16, 24]                  --
├─Conv2d: 1-1                                 [16, 3, 512, 512]         6
├─ResNet: 1-2                                 [16, 24]                  --
│    └─Conv2d: 2-1                            [16, 64, 256, 256]        9,408
│    └─BatchNorm2d: 2-2                       [16, 64, 256, 256]        128
│    └─ReLU: 2-3                              [16, 64, 256, 256]        --
│    └─MaxPool2d: 2-4                         [16, 64, 128, 128]        --
│    └─Sequential: 2-5                        [16, 256, 128, 128]       --
│    │    └─Bottleneck: 3-1                   [16, 256, 128, 128]       75,008
│    │    └─Bottleneck: 3-2                   [16, 256, 128, 128]       70,400
│    │    └─Bottleneck: 3-3                   [16, 256, 128, 128]       70,400
│    └─Sequential: 2-6                        [16, 512, 64, 64]         --
│    

# Train Model

In [19]:
# import warnings
import pytorch_lightning as pl
from pytorch_lightning.callbacks import LearningRateMonitor

In [21]:
lr_monitor = LearningRateMonitor(logging_interval='step')

In [22]:
logger = pl.loggers.TensorBoardLogger(
    save_dir='.',
    name='lightning_logs',
    version=VERSION_NAME
)

In [23]:
trainer = pl.Trainer(
    max_epochs=EPOCHS,
    accelerator='gpu',
    devices='auto',
    callbacks=[lr_monitor],
    logger=logger
)

  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [24]:
print('Logs to:', VERSION_NAME)

Logs to: period_clf_bs16_lr5e-05_5epochs-resnet50-97640_samples-preprocessed2


In [25]:
trainer.fit(model, dl_train, dl_test)

  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type               | Params
------------------------------------------------------
0 | gray_to_triple | Conv2d             | 6     
1 | core           | ResNet             | 23.6 M
2 | objective      | CrossEntropyLoss   | 0     
3 | train_acc      | MulticlassAccuracy | 0     
4 | val_acc        | MulticlassAccuracy | 0     
------------------------------------------------------
23.6 M    Trainable params
0         Non-trainable params
23.6 M    Total params
94.229    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=5` reached.
