In [11]:
!pip install --quiet wandb
!pip install --quiet dtlpy
!pip install --quiet nvidia-dali-cuda110
!pip install --quiet lru-dict
!pip install --quiet efficientnet_pytorch
!pip install --quiet pytorch_lightning

## Training Classifier on Heavy Dirt and Clean Images

In [12]:
import wandb
import dtlpy as dataloop
import pytorch_lightning as pl
from pytorch_lightning.loggers import WandbLogger

# Resnet is Resnet 18 (18 Residual SKip Blocks, vs 50)
from models.resnet import ResNet

# These libraries hold dataset information
from dataset import DataloopDataset, DataloopDatasetDirectory
# This is the Trainer Library
from training_wheels import TrainingWheels

# Library that allows for to alter "raw" images. Allows for random rotation, flipping, and adding "noise"
from augmentation import Augmentation

from datetime import datetime
from PIL import Image
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, random_split
import os

In [3]:
os.environ['WANDB_API_KEY']="4a8fc5b45c6c79237bcb32a9659c090b1458dde7"
os.environ['WANDB_DIR'] = '/home/ec2-user/SageMaker/Temp_File_Folder'

In [4]:
hparams = {
    'dataset_dir':"temp",
    'model':"resnet18",
    'use_wandb':True,
    'enable_center_cropping':False,
    'batch_size':16,
    'num_gpus':1,
    'downscaling_width':768,
    'downscaling_height':768,
    'max_epochs':50,
    'accelerator':None,
    'devices':None,
    'use_dali':False,
    'center_crop':448,
    'enable_horizontal_mirroring':True,
    'random_rotation_angle':15,
    'noise_amount':0,
    'resume_from_checkpoint':None,
    'enable_image_logging':True,
    'lr':0.00001,
}

In [14]:
data_info = {
    'project': "Car Condition Classification",
    'dataset': "all_portrait",
    'email': "ashley.lawrencehuizenga@coxautoinc.com",
    'out_dir': "dataloop/",
    'password': ""
}

In [6]:
# datetime object containing current date and time
now = datetime.now()
dt_string = now.strftime("%m/%d/%Y %H:%M")

In [7]:
dataset_dir = hparams['dataset_dir']

# This loads the augmentation object (see the keywords and how they are used inside )
augmentation = Augmentation(enable_center_cropping=hparams['enable_center_cropping'],
                            enable_horizontal_mirroring=hparams['enable_horizontal_mirroring'],
                            random_rotation_angle=hparams['random_rotation_angle'],
                            noise_amount=hparams['noise_amount'],
                            downscaling_width=hparams['downscaling_width'],
                            downscaling_height=hparams['downscaling_height'],
                            center_crop = hparams['center_crop'])

In [16]:
dataset = DataloopDataset(dataset_dir=dataset_dir,
                          email=data_info['email'], 
                          password=data_info['password'], 
                          project=data_info['project'],
                          dataset=data_info['dataset'],
                          augmentation=augmentation)

Iterate Pages: 100%|██████████| 66/66 [00:54<00:00,  1.21it/s]
Download Items: 100%|██████████| 1/1 [00:00<00:00,  2.35it/s]
Download Items: 100%|██████████| 1/1 [00:00<00:00,  5.51it/s]
Download Items: 100%|██████████| 1/1 [00:00<00:00,  5.87it/s]
Download Items: 100%|██████████| 1/1 [00:00<00:00,  3.93it/s]
Download Items: 100%|██████████| 1/1 [00:00<00:00,  4.08it/s]
Download Items: 100%|██████████| 1/1 [00:00<00:00,  4.11it/s]
Download Items: 100%|██████████| 1/1 [00:00<00:00,  4.00it/s]
Download Items: 100%|██████████| 1/1 [00:00<00:00,  4.23it/s]
Download Items: 100%|██████████| 1/1 [00:00<00:00,  4.08it/s]
Download Items: 100%|██████████| 1/1 [00:00<00:00,  4.11it/s]
Download Items: 100%|██████████| 1/1 [00:00<00:00,  4.01it/s]
Download Items: 100%|██████████| 1/1 [00:00<00:00,  4.00it/s]
Download Items: 100%|██████████| 1/1 [00:00<00:00,  3.98it/s]
Download Items: 100%|██████████| 1/1 [00:00<00:00,  3.71it/s]
Download Items: 100%|██████████| 1/1 [00:00<00:00,  4.15it/s]
Downloa

In [17]:
batch_size = hparams['batch_size']
logger = WandbLogger(project="car-condition-classifier",log_model='all',name=dt_string) if hparams['use_wandb'] else None
model = ResNet()
accelerator = "gpu" if hparams['num_gpus'] > 0 else None



In [18]:
training_wheels = TrainingWheels(model=model,
                                 dataset=dataset,
                                 validation_set_size=.2,
                                 batch_size=batch_size,
                                 augmentation=augmentation,
                                 lr=hparams['lr'],
                                 enable_image_logging=hparams['enable_image_logging'])
trainer = pl.Trainer(max_epochs=int(hparams['max_epochs']), logger=logger, accelerator=accelerator,
                     devices=max(hparams['num_gpus'], 1))

/home/ec2-user/anaconda3/envs/pytorch_p310/lib/python3.10/site-packages/pytorch_lightning/utilities/parsing.py:198: Attribute 'model' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['model'])`.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [19]:
trainer.fit(training_wheels, ckpt_path=hparams['resume_from_checkpoint'])

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mashley-lawrencehuizenga[0m ([33mfyusion[0m). Use [1m`wandb login --relogin`[0m to force relogin
  from IPython.core.display import HTML, display  # type: ignore


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type   | Params
---------------------------------
0 | model | ResNet | 11.2 M
---------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.710    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/ec2-user/anaconda3/envs/pytorch_p310/lib/python3.10/site-packages/pytorch_lightning/utilities/data.py:77: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 16. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

/home/ec2-user/anaconda3/envs/pytorch_p310/lib/python3.10/site-packages/pytorch_lightning/utilities/data.py:77: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 2. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=50` reached.


In [20]:
wandb.finish()

  from IPython.core.display import display


VBox(children=(Label(value='6462.162 MB of 6462.162 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))



0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_loss,█▅▃▃▂▁▁▂▁▁▄▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_acc,▅▅▇▇▇▇▇▇▇▇▇█▇▇▇▇▆█▇▇██▁▇█▇██▇████▇██▆███
val_loss,▂▂▁▂▁▁▁▂▁▂▁▁▂▂▂▂▄▂▂▂▂▂█▃▂▃▂▂▃▃▃▂▂▄▃▃▄▂▂▃

0,1
epoch,49.0
train_loss,0.00632
trainer/global_step,18049.0
val_acc,0.92372
val_loss,0.34487


### Testing Code Updates

In [8]:
dataset = DataloopDatasetDirectory(dataset_dir='dataloop',
                          augmentation=augmentation)

In [9]:
batch_size = hparams['batch_size']
logger = WandbLogger(project="car-condition-classifier",log_model='all',name=dt_string) if hparams['use_wandb'] else None
model = ResNet()
accelerator = "gpu" if hparams['num_gpus'] > 0 else None

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /home/ec2-user/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 378MB/s]


In [10]:
training_wheels = TrainingWheels(model=model,
                                 dataset=dataset,
                                 validation_set_size=.2,
                                 batch_size=batch_size,
                                 augmentation=augmentation,
                                 lr=hparams['lr'],
                                 enable_image_logging=hparams['enable_image_logging'])
trainer = pl.Trainer(max_epochs=int(hparams['max_epochs']), logger=logger, accelerator=accelerator,
                     devices=max(hparams['num_gpus'], 1))

/home/ec2-user/anaconda3/envs/pytorch_p310/lib/python3.10/site-packages/pytorch_lightning/utilities/parsing.py:198: Attribute 'model' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['model'])`.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [None]:
trainer.fit(training_wheels, ckpt_path=hparams['resume_from_checkpoint'])

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mashley-lawrencehuizenga[0m ([33mfyusion[0m). Use [1m`wandb login --relogin`[0m to force relogin
  from IPython.core.display import HTML, display  # type: ignore


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type   | Params
---------------------------------
0 | model | ResNet | 11.2 M
---------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.710    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/ec2-user/anaconda3/envs/pytorch_p310/lib/python3.10/site-packages/pytorch_lightning/utilities/data.py:77: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 8. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

In [None]:
wandb.finish()