In [1]:
# from google.colab import drive
# drive.mount('/content/drive')

In [2]:
# # Install required libs
# !pip install git+https://github.com/qubvel/segmentation_models.pytorch
# !pip install albumentations==0.4.6

In [3]:
# !sudo rm -rf /content/seg_recyclables
# !git clone https://github.com/finani/seg_recyclables.git

# import sys
# sys.path.append('/content/seg_recyclables')

In [4]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

print('')

from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

print('')

import os
import torch
print('Pytorch version: {}'.format(torch.__version__))
print('Is GPU available: {}'.format(torch.cuda.is_available()))
if torch.cuda.is_available():
  print(torch.cuda.get_device_name(0))
  print('The number of GPUs available: {}'.format(torch.cuda.device_count())) # Tesla P100-PCIE-16GB
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

print('CPU count: {}'.format(os.cpu_count()))  # 8

cuda = torch.version.cuda
cudnn = torch.backends.cudnn.version()
cudnn_major = cudnn // 1000
cudnn = cudnn % 1000
cudnn_minor = cudnn // 100
cudnn_patch = cudnn % 100
print('Cuda version: {}'.format(cuda)) # 11.1
print('Cudnn version: {}.{}.{}'.format(cudnn_major, cudnn_minor, cudnn_patch)) # 8.0.5


Tue Dec 28 23:52:39 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.86       Driver Version: 470.86       CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  Off  | 00000000:01:00.0  On |                  N/A |
| N/A   44C    P5    18W /  N/A |   6129MiB /  7982MiB |     20%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# Train Binary Segmentation
Classes =  ['Background', 'UNKNOWN', 'General trash', 'Paper', 'Paper pack', 'Metal', 'Glass', 'Plastic', 'Styrofoam', 'Plastic bag', 'Battery', 'Clothing']

## Configures

In [5]:
import os
import sys

import torch
import numpy as np

import segmentation_models_pytorch as smp
from torch.optim import lr_scheduler

import Utils
from ModelManager import ModelManager
from DataManager import DataManager, CustomAugmentation
from TrainManager import TrainManager
from InferManager import InferManager
from LossManager import DiceLoss
from LearningRateManager import CustomCosineAnnealingWarmUpRestarts

Utils.fix_random_seed(random_seed=21)

project_dir = '/home/weebee/recyclables/baseline'
dataset_dir = os.path.join(project_dir, 'input')
save_dir = os.path.join(project_dir, 'saved/tm_test')
if not os.path.isdir(dataset_dir):
    sys.exit('check dataset path!!')
if not os.path.isdir(save_dir):
    os.mkdir(save_dir)


## Train each class

In [6]:
for class_name in Utils.get_classes()[1:]:
    class_name = 'Clothing'
    # Set Configures
    # target_classes = Utils.get_classes()
    target_classes = ['Background']
    target_classes.append(class_name)

    config_dict = {
        'project_name': 'test',
        'run_name': '[TM2_Mv2_OnecycleLR] ' + class_name,
        'network': 'DeepLabV3Plus',
        'encoder': 'resnet101',
        'encoder_weights': 'imagenet',
        'target_classes': target_classes,
        'activation': None,
        'multi_gpu': False,
        'num_epochs': 30,
        'batch_size': 4,
        'learning_rate_0': 1e-4,
        'number_worker': 8,
        'val_every': 1,
        'note': 'test train OnecycleLR'
    }

    # Make Model
    model_manager = ModelManager()
    model = model_manager.make_deeplabv3plus_model(
        encoder=config_dict['encoder'],
        encoder_weights=config_dict['encoder_weights'],
        class_number=len(target_classes),
        activation=config_dict['activation'],
        multi_gpu=config_dict['multi_gpu']
    )

    # Load Dataset
    data_manager = DataManager(dataset_path=dataset_dir)
    data_manager.assignDataLoaders(
        batch_size=config_dict['batch_size'],
        shuffle=True,
        number_worker=config_dict['number_worker'],
        drop_last=True,
        transform=CustomAugmentation.to_tensor_transform(),
        # transform=CustomAugmentation.medium_transform(),
        # transform=CustomAugmentation.medium_transform_v2(),
        target_segmentation=True,
        target_classes=target_classes
    )

    # # path of saved best model
    # model_path = os.path.join('/home/weebee/recyclables/baseline/saved/server_11_e50_train_1/', 'best_model_target_' + '_'.join([v.lower() for v in target_classes[1:]]) +'.pt')

    # # load the saved best model
    # checkpoint = torch.load(model_path, map_location=Utils.get_device())
    # state_dict = checkpoint.state_dict()
    # model.load_state_dict(state_dict)

    criterion = smp.utils.losses.CrossEntropyLoss()
    # criterion = smp.utils.losses.BCEWithLogitsLoss()
    # criterion = tgm.losses.DiceLoss()
    # criterion = DiceLoss()
    # criterion = smp.utils.losses.JaccardLoss()

    optimizer = torch.optim.Adam(
        [dict(params=model.parameters(),
                lr=config_dict['learning_rate_0']
                ),
            ])

    # lr_scheduler = None
    # lr_scheduler = torch.optim.lr_scheduler.OneCycleLR(
    #     optimizer, max_lr=0.01, steps_per_epoch=10, epochs=config_dict['num_epochs'], anneal_strategy='cos'
    # )
    # lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
    #     optimizer, T_0=1, T_mult=2, eta_min=5e-5,
    # )
    lr_scheduler = CustomCosineAnnealingWarmUpRestarts(
        optimizer, T_0=10, T_mult=1, eta_max=0.1,  T_up=3, gamma=0.5
    )

    # Run Train
    train_manager = TrainManager()
    train_manager.run_train(
        model=model,
        config_dict=config_dict,
        data_loader=data_manager.train_data_loader,
        val_loader=data_manager.val_data_loader,
        criterion=criterion,
        optimizer=optimizer,
        lr_scheduler=lr_scheduler,
        save_dir=save_dir,
        file_name='best_model_target_' + '_'.join([v.lower() for v in target_classes[1:]]) +'.pt',
        # file_name='best_model_1.pt',
        target_only_p=1.0 # -> 1 set = 4 batch_size
    )
    break

loading annotations into memory...
Done (t=2.28s)
creating index...
index created!
loading annotations into memory...


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Done (t=0.75s)
creating index...
index created!

Start training..
num_epochs: 30
save_dir: /home/weebee/recyclables/baseline/saved/tm_test
file_name: best_model_target_clothing.pt
val_every: 1



wandb: Currently logged in as: inhwan-wee (use `wandb login --relogin` to force relogin)





Epoch: 0/30, Time: 2021-12-28 23:52:49, lr: 0.0001: 100%|██████████| 654/654 [00:51<00:00, 12.70it/s, loss=0.172, mean_loss=0.366, train_count=93]



Start validation #1


Epoch: 1: 100%|██████████| 163/163 [00:24<00:00,  6.59it/s, mIoU_all=0.552, mIoU_batch=0.5]



Validation #1  Average Loss: 0.2079, mIoU_all: 0.5519
Best performance at epoch or X0th epoch: 1
Save model in /home/weebee/recyclables/baseline/saved/tm_test



Epoch: 1/30, Time: 2021-12-28 23:54:06, lr: 0.033400000000000006: 100%|██████████| 654/654 [00:51<00:00, 12.72it/s, loss=0.0474, mean_loss=0.253, train_count=89]



Start validation #2


Epoch: 2: 100%|██████████| 163/163 [00:25<00:00,  6.29it/s, mIoU_all=0.939, mIoU_batch=1]



Validation #2  Average Loss: 0.0439, mIoU_all: 0.9393
Best performance at epoch or X0th epoch: 2
Save model in /home/weebee/recyclables/baseline/saved/tm_test



Epoch: 2/30, Time: 2021-12-28 23:55:24, lr: 0.06670000000000001: 100%|██████████| 654/654 [00:54<00:00, 11.90it/s, loss=0.185, mean_loss=0.171, train_count=93]



Start validation #3


Epoch: 3: 100%|██████████| 163/163 [00:26<00:00,  6.26it/s, mIoU_all=0.945, mIoU_batch=1]



Validation #3  Average Loss: 0.0648, mIoU_all: 0.9454



Epoch: 3/30, Time: 2021-12-28 23:56:46, lr: 0.1: 100%|██████████| 654/654 [00:52<00:00, 12.44it/s, loss=0.0983, mean_loss=0.169, train_count=90]



Start validation #4


Epoch: 4: 100%|██████████| 163/163 [00:25<00:00,  6.44it/s, mIoU_all=0.939, mIoU_batch=1]



Validation #4  Average Loss: 0.0959, mIoU_all: 0.9393



Epoch: 4/30, Time: 2021-12-28 23:58:04, lr: 0.09505339495172584: 100%|██████████| 654/654 [00:51<00:00, 12.65it/s, loss=0.0987, mean_loss=0.164, train_count=90]



Start validation #5


Epoch: 5: 100%|██████████| 163/163 [00:26<00:00,  6.16it/s, mIoU_all=0.942, mIoU_batch=0.486]



Validation #5  Average Loss: 0.0700, mIoU_all: 0.9424



Epoch: 5/30, Time: 2021-12-28 23:59:22, lr: 0.08119331560284375: 100%|██████████| 654/654 [00:55<00:00, 11.86it/s, loss=0.265, mean_loss=0.162, train_count=92]



Start validation #6


Epoch: 6: 100%|██████████| 163/163 [00:25<00:00,  6.32it/s, mIoU_all=0.942, mIoU_batch=1]



Validation #6  Average Loss: 0.0380, mIoU_all: 0.9424
Best performance at epoch or X0th epoch: 6
Save model in /home/weebee/recyclables/baseline/saved/tm_test



Epoch: 6/30, Time: 2021-12-29 00:00:44, lr: 0.06116492065111791: 100%|██████████| 654/654 [00:50<00:00, 12.86it/s, loss=0.0488, mean_loss=0.153, train_count=92]



Start validation #7


Epoch: 7: 100%|██████████| 163/163 [00:24<00:00,  6.57it/s, mIoU_all=0.942, mIoU_batch=1]



Validation #7  Average Loss: 0.0602, mIoU_all: 0.9424



Epoch: 7/30, Time: 2021-12-29 00:02:00, lr: 0.0389350793488821: 100%|██████████| 654/654 [00:49<00:00, 13.22it/s, loss=0.0356, mean_loss=0.154, train_count=89]



Start validation #8


Epoch: 8: 100%|██████████| 163/163 [00:24<00:00,  6.62it/s, mIoU_all=0.939, mIoU_batch=1]



Validation #8  Average Loss: 0.0474, mIoU_all: 0.9393



Epoch: 8/30, Time: 2021-12-29 00:03:14, lr: 0.018906684397156263: 100%|██████████| 654/654 [00:50<00:00, 12.99it/s, loss=0.301, mean_loss=0.15, train_count=90]



Start validation #9


Epoch: 9: 100%|██████████| 163/163 [00:25<00:00,  6.48it/s, mIoU_all=0.939, mIoU_batch=1]



Validation #9  Average Loss: 0.0455, mIoU_all: 0.9393



Epoch: 9/30, Time: 2021-12-29 00:04:30, lr: 0.005046605048274169: 100%|██████████| 654/654 [00:51<00:00, 12.60it/s, loss=0.207, mean_loss=0.148, train_count=90]



Start validation #10


Epoch: 10: 100%|██████████| 163/163 [00:26<00:00,  6.18it/s, mIoU_all=0.949, mIoU_batch=1]



Validation #10  Average Loss: 0.0506, mIoU_all: 0.9485



Epoch: 10/30, Time: 2021-12-29 00:05:49, lr: 0.0001: 100%|██████████| 654/654 [00:53<00:00, 12.13it/s, loss=0.0422, mean_loss=0.142, train_count=92]



Start validation #11


Epoch: 11: 100%|██████████| 163/163 [00:26<00:00,  6.18it/s, mIoU_all=0.949, mIoU_batch=1]



Validation #11  Average Loss: 0.0526, mIoU_all: 0.9486
Best performance at epoch or X0th epoch: 11
Save model in /home/weebee/recyclables/baseline/saved/tm_test



Epoch: 11/30, Time: 2021-12-29 00:07:10, lr: 0.016733333333333333: 100%|██████████| 654/654 [00:54<00:00, 12.11it/s, loss=0.376, mean_loss=0.145, train_count=94]



Start validation #12


Epoch: 12: 100%|██████████| 163/163 [00:25<00:00,  6.46it/s, mIoU_all=0.942, mIoU_batch=1]



Validation #12  Average Loss: 0.0451, mIoU_all: 0.9424
Best performance at epoch or X0th epoch: 12
Save model in /home/weebee/recyclables/baseline/saved/tm_test



Epoch: 12/30, Time: 2021-12-29 00:08:30, lr: 0.03336666666666667: 100%|██████████| 654/654 [00:49<00:00, 13.24it/s, loss=0.0353, mean_loss=0.156, train_count=88]



Start validation #13


Epoch: 13: 100%|██████████| 163/163 [00:25<00:00,  6.49it/s, mIoU_all=0.945, mIoU_batch=1]



Validation #13  Average Loss: 0.0502, mIoU_all: 0.9454



Epoch: 13/30, Time: 2021-12-29 00:09:45, lr: 0.05: 100%|██████████| 654/654 [00:50<00:00, 12.88it/s, loss=0.241, mean_loss=0.156, train_count=87]



Start validation #14


Epoch: 14: 100%|██████████| 163/163 [00:25<00:00,  6.30it/s, mIoU_all=0.942, mIoU_batch=1]



Validation #14  Average Loss: 0.0427, mIoU_all: 0.9424
Best performance at epoch or X0th epoch: 14
Save model in /home/weebee/recyclables/baseline/saved/tm_test



Epoch: 14/30, Time: 2021-12-29 00:11:02, lr: 0.047529173254165356: 100%|██████████| 654/654 [00:52<00:00, 12.46it/s, loss=0.0566, mean_loss=0.154, train_count=92]



Start validation #15


Epoch: 15: 100%|██████████| 163/163 [00:24<00:00,  6.60it/s, mIoU_all=0.943, mIoU_batch=1]



Validation #15  Average Loss: 0.0605, mIoU_all: 0.9427



Epoch: 15/30, Time: 2021-12-29 00:12:19, lr: 0.040606070556375405: 100%|██████████| 654/654 [00:50<00:00, 12.92it/s, loss=0.497, mean_loss=0.155, train_count=91]



Start validation #16


Epoch: 16: 100%|██████████| 163/163 [00:24<00:00,  6.75it/s, mIoU_all=0.945, mIoU_batch=1]



Validation #16  Average Loss: 0.0480, mIoU_all: 0.9454



Epoch: 16/30, Time: 2021-12-29 00:13:34, lr: 0.03060189730221004: 100%|██████████| 654/654 [00:49<00:00, 13.20it/s, loss=0.146, mean_loss=0.153, train_count=91]



Start validation #17


Epoch: 17: 100%|██████████| 163/163 [00:24<00:00,  6.62it/s, mIoU_all=0.945, mIoU_batch=1]



Validation #17  Average Loss: 0.0663, mIoU_all: 0.9454



Epoch: 17/30, Time: 2021-12-29 00:14:49, lr: 0.019498102697789958: 100%|██████████| 654/654 [00:51<00:00, 12.64it/s, loss=0.0739, mean_loss=0.151, train_count=93]



Start validation #18


Epoch: 18: 100%|██████████| 163/163 [00:24<00:00,  6.76it/s, mIoU_all=0.949, mIoU_batch=1]



Validation #18  Average Loss: 0.0635, mIoU_all: 0.9485



Epoch: 18/30, Time: 2021-12-29 00:16:05, lr: 0.009493929443624599: 100%|██████████| 654/654 [00:50<00:00, 12.99it/s, loss=0.493, mean_loss=0.153, train_count=90]



Start validation #19


Epoch: 19: 100%|██████████| 163/163 [00:25<00:00,  6.37it/s, mIoU_all=0.942, mIoU_batch=1]



Validation #19  Average Loss: 0.0539, mIoU_all: 0.9424



Epoch: 19/30, Time: 2021-12-29 00:17:21, lr: 0.002570826745834645: 100%|██████████| 654/654 [00:51<00:00, 12.82it/s, loss=0.183, mean_loss=0.153, train_count=89]



Start validation #20


Epoch: 20: 100%|██████████| 163/163 [00:24<00:00,  6.62it/s, mIoU_all=0.945, mIoU_batch=0.486]



Validation #20  Average Loss: 0.0542, mIoU_all: 0.9454



Epoch: 20/30, Time: 2021-12-29 00:18:37, lr: 0.0001: 100%|██████████| 654/654 [00:49<00:00, 13.09it/s, loss=0.416, mean_loss=0.153, train_count=89]



Start validation #21


Epoch: 21: 100%|██████████| 163/163 [00:25<00:00,  6.44it/s, mIoU_all=0.942, mIoU_batch=1]



Validation #21  Average Loss: 0.0538, mIoU_all: 0.9424
Best performance at epoch or X0th epoch: 21
Save model in /home/weebee/recyclables/baseline/saved/tm_test



Epoch: 21/30, Time: 2021-12-29 00:19:53, lr: 0.0084: 100%|██████████| 654/654 [00:49<00:00, 13.14it/s, loss=0.0719, mean_loss=0.152, train_count=90]



Start validation #22


Epoch: 22: 100%|██████████| 163/163 [00:24<00:00,  6.75it/s, mIoU_all=0.939, mIoU_batch=1]



Validation #22  Average Loss: 0.0532, mIoU_all: 0.9393
Best performance at epoch or X0th epoch: 22
Save model in /home/weebee/recyclables/baseline/saved/tm_test



Epoch: 22/30, Time: 2021-12-29 00:21:08, lr: 0.0167: 100%|██████████| 654/654 [00:50<00:00, 13.02it/s, loss=0.041, mean_loss=0.15, train_count=92]



Start validation #23


Epoch: 23: 100%|██████████| 163/163 [00:24<00:00,  6.61it/s, mIoU_all=0.939, mIoU_batch=1]



Validation #23  Average Loss: 0.0537, mIoU_all: 0.9393



Epoch: 23/30, Time: 2021-12-29 00:22:23, lr: 0.025: 100%|██████████| 654/654 [00:51<00:00, 12.79it/s, loss=0.304, mean_loss=0.152, train_count=91]



Start validation #24


Epoch: 24: 100%|██████████| 163/163 [00:25<00:00,  6.35it/s, mIoU_all=0.942, mIoU_batch=0.486]



Validation #24  Average Loss: 0.0602, mIoU_all: 0.9424



Epoch: 24/30, Time: 2021-12-29 00:23:40, lr: 0.02376706240538512: 100%|██████████| 654/654 [00:47<00:00, 13.82it/s, loss=0.0556, mean_loss=0.156, train_count=89]



Start validation #25


Epoch: 25: 100%|██████████| 163/163 [00:23<00:00,  6.81it/s, mIoU_all=0.939, mIoU_batch=1]



Validation #25  Average Loss: 0.0567, mIoU_all: 0.9393



Epoch: 25/30, Time: 2021-12-29 00:24:52, lr: 0.020312448033141233: 100%|██████████| 654/654 [00:47<00:00, 13.86it/s, loss=0.0566, mean_loss=0.156, train_count=88]



Start validation #26


Epoch: 26: 100%|██████████| 163/163 [00:23<00:00,  6.81it/s, mIoU_all=0.942, mIoU_batch=1]



Validation #26  Average Loss: 0.0498, mIoU_all: 0.9424
Best performance at epoch or X0th epoch: 26
Save model in /home/weebee/recyclables/baseline/saved/tm_test



Epoch: 26/30, Time: 2021-12-29 00:26:03, lr: 0.015320385627756113: 100%|██████████| 654/654 [00:48<00:00, 13.51it/s, loss=0.0353, mean_loss=0.154, train_count=88]



Start validation #27


Epoch: 27: 100%|██████████| 163/163 [00:25<00:00,  6.35it/s, mIoU_all=0.942, mIoU_batch=1]



Validation #27  Average Loss: 0.0467, mIoU_all: 0.9424
Best performance at epoch or X0th epoch: 27
Save model in /home/weebee/recyclables/baseline/saved/tm_test



Epoch: 27/30, Time: 2021-12-29 00:27:18, lr: 0.009779614372243887: 100%|██████████| 654/654 [00:52<00:00, 12.38it/s, loss=0.0979, mean_loss=0.151, train_count=91]



Start validation #28


Epoch: 28: 100%|██████████| 163/163 [00:24<00:00,  6.65it/s, mIoU_all=0.942, mIoU_batch=1]



Validation #28  Average Loss: 0.0538, mIoU_all: 0.9424



Epoch: 28/30, Time: 2021-12-29 00:28:36, lr: 0.0047875519668587685: 100%|██████████| 654/654 [00:48<00:00, 13.43it/s, loss=0.0479, mean_loss=0.147, train_count=91]



Start validation #29


Epoch: 29: 100%|██████████| 163/163 [00:24<00:00,  6.68it/s, mIoU_all=0.939, mIoU_batch=1]



Validation #29  Average Loss: 0.0524, mIoU_all: 0.9393



Epoch: 29/30, Time: 2021-12-29 00:29:49, lr: 0.0013329375946148831: 100%|██████████| 654/654 [00:50<00:00, 12.96it/s, loss=0.0428, mean_loss=0.145, train_count=91]



Start validation #30


Epoch: 30: 100%|██████████| 163/163 [00:24<00:00,  6.59it/s, mIoU_all=0.942, mIoU_batch=1]



Validation #30  Average Loss: 0.0533, mIoU_all: 0.9424


wandb: Network error (ConnectionError), entering retry loop.
