In [9]:
import os
import sys

import time
import utils
import glob

import math
import numpy as np

from typing import Optional

import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split
import torch.optim.lr_scheduler as lr

from torchvision import transforms
from torchvision.datasets import MNIST

import torchmetrics as tm


import pytorch_lightning as pl

from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning import Trainer, seed_everything


### Fixing seed for Reproducability

In [10]:
seed = 121
seed_everything(seed, workers=True)

Global seed set to 121


121

### Config setup

In [11]:
data_dir = "./cil_data/"

In [12]:
batch_size = 16

learning_rate = 0.005

data_workers = 8

train_eval_ratio = 0.9

### Data Transformations and Loading

In [17]:
from cil_data import data

In [30]:
class DataModule(pl.LightningDataModule):

    def __init__(self, data_dir: str = data_dir, batch_size: int = batch_size):
        super().__init__()
        self.data_dir = data_dir
        self.batch_size = batch_size
        
        self.data_transform = transforms.Compose([
            data.ToTensor()
        ])
  

    def setup(self, stage: Optional[str] = None):
        
        #TODO: how to create data loaders? Is the data set correctly created?
        
        self.dataset_train_val = data.RoadSegmentationDataset(
            os.path.join(self.data_dir, "training/training/images/"),
            transform=self.data_transform
        )
        
        self.dataset_train_val_truth = data.RoadSegmentationDataset(
            os.path.join(self.data_dir, "training/training/groundtruth/"), 
            transform=self.data_transform
        )
        
        self.dataset_test = data.RoadSegmentationDataset(
            os.path.join(self.data_dir, "test_images/test_images/"), 
            transform=self.data_transform
        )
        
        dataset_size = len(self.dataset_train_val)
        train_split_size = int(dataset_size * train_eval_ratio)
        
        # how do we split the images and truth images together?
        self.dataset_train, self.dataset_val = random_split(self.dataset_train_val, 
                                                            [train_split_size, dataset_size - train_split_size])

    def train_dataloader(self):
        return DataLoader(self.dataset_train, batch_size=self.batch_size)

    def val_dataloader(self):
        return DataLoader(self.dataset_val, batch_size=self.batch_size)

    def test_dataloader(self):
        return DataLoader(self.dataset_test, batch_size=self.batch_size)

In [32]:
d_module = DataModule(data_dir)
d_module.setup()

### Train model

In [12]:
early_stop_callback = EarlyStopping(
   monitor='val_loss',
   patience=15,
   verbose=2,
   mode='min'
)

In [13]:
trainer = pl.Trainer(
    deterministic=True,
    fast_dev_run=True,
    #gpus=-1, 
    #auto_select_gpus=True, 
    #auto_lr_find=True, 
    #benchmark=True,    
    progress_bar_refresh_rate=10,
    stochastic_weight_avg=True, 
    auto_scale_batch_size='binsearch',
    callbacks=[early_stop_callback])

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
Running in fast_dev_run mode: will run a full train, val and test loop using 1 batch(es).


In [14]:
trainer.tune(system)

NameError: name 'system' is not defined

In [None]:
trainer.fit(system)