In [1]:
import torch 
from torch.utils.data import Subset, DataLoader, random_split, SubsetRandomSampler
from torchvision import transforms
from torch.utils.data import Dataset
import numpy as np
import matplotlib.pyplot as plt
from data_setup import ImageFolder, create_validation, create_dataloaders
import h5py
import os
from torchvision import models
from torch import nn
from baseline_model import simpleCNN
from engine import train_step, test_step, train, create_writer
from torch.utils.tensorboard import SummaryWriter
from datetime import datetime

In [2]:
training_dataloader, testing_dataloader = create_dataloaders(training_dir = 'master.hdf5', 
                                                             batch_size=32, 
                                                             num_workers=os.cpu_count(),
                                                             train_prop=0.8)

In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [4]:
device

'cuda'

In [5]:
# we need to create a model 
baseline_model = simpleCNN(
    input_shape=1,
    output_shape=2
)

In [6]:
# looking at the summar of the model 
from torchinfo import summary
summary(model=baseline_model, 
        input_size=(32, 1, 820, 820), # make sure this is "input_size", not "input_shape"
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"]
)

Layer (type (var_name))                  Input Shape          Output Shape         Param #              Trainable
simpleCNN (simpleCNN)                    [32, 1, 820, 820]    [32, 2, 1]           --                   True
├─Conv2d (conv1)                         [32, 1, 820, 820]    [32, 32, 820, 820]   320                  True
├─ReLU (relu)                            [32, 32, 820, 820]   [32, 32, 820, 820]   --                   --
├─MaxPool2d (pool)                       [32, 32, 820, 820]   [32, 32, 410, 410]   --                   --
├─Conv2d (conv2)                         [32, 32, 410, 410]   [32, 64, 410, 410]   18,496               True
├─ReLU (relu)                            [32, 64, 410, 410]   [32, 64, 410, 410]   --                   --
├─MaxPool2d (pool)                       [32, 64, 410, 410]   [32, 64, 205, 205]   --                   --
├─Conv2d (conv3)                         [32, 64, 205, 205]   [32, 128, 205, 205]  73,856               True
├─ReLU (relu)         

In [7]:
# we need to look into loss functions that will work with the shape of our data 
# we will first keep track of the MSE and the l1 loss 
from torch import optim
loss_fn = nn.MSELoss()
l1_loss = nn.L1Loss()
optimizer = optim.Adam(baseline_model.parameters(), lr=0.001) 

# we create the summary writer instance for TensorBoard
writer = create_writer(expirement_name='first_log',
                      model_name='baseline_model',
                      extra='five_epochs')

In [8]:
# testing whether this works 
from torch import nn
model_results = train(model = baseline_model,
                     train_dataloader=training_dataloader,
                     test_dataloader=testing_dataloader,
                     loss_fn=loss_fn,
                     other_metric=l1_loss,
                     device = device,
                     epochs = 5,
                     optimizer=optimizer,
                     writer=writer)

  0%|          | 0/5 [00:00<?, ?it/s]

On batch number 1
On batch number 101
On batch number 201
EPOCH: 1 | TRAIN LOSS: 14319.839 | TRAIN METRIC: 47.954 | TEST LOSS: 31.154 | TEST METRIC: 4.611
On batch number 1
On batch number 101
On batch number 201
EPOCH: 2 | TRAIN LOSS: 46.387 | TRAIN METRIC: 5.533 | TEST LOSS: 31.358 | TEST METRIC: 4.892
On batch number 1
On batch number 101
On batch number 201
EPOCH: 3 | TRAIN LOSS: 34.369 | TRAIN METRIC: 4.676 | TEST LOSS: 9.817 | TEST METRIC: 2.562
On batch number 1
On batch number 101
On batch number 201
EPOCH: 4 | TRAIN LOSS: 29.317 | TRAIN METRIC: 4.504 | TEST LOSS: 11.230 | TEST METRIC: 2.835
On batch number 1
On batch number 101
On batch number 201
EPOCH: 5 | TRAIN LOSS: 21.196 | TRAIN METRIC: 3.644 | TEST LOSS: 12.964 | TEST METRIC: 3.092


In [9]:
model_results

{'train_loss': [14319.839179508388,
  46.38660030812025,
  34.3689235933125,
  29.31707430165261,
  21.195635460317135],
 'train_metric': [47.95411783643067,
  5.5327390637248755,
  4.6764541156589985,
  4.504121230915189,
  3.6436168551445007],
 'test_loss': [31.153695285320282,
  31.35831931233406,
  9.816630616784096,
  11.230083957314491,
  12.963814869523048],
 'test_metric': [4.611179582774639,
  4.892353817820549,
  2.5618740543723106,
  2.8352025374770164,
  3.0919458121061325]}

In [18]:
%tensorboard --logdir runs --port 6006

/bin/bash: lsof: command not found
