In [1]:
# from google.colab import drive
# drive.mount('/content/drive')

In [2]:
# # Install required libs
# !pip install git+https://github.com/qubvel/segmentation_models.pytorch
# !pip install albumentations==0.4.6

In [3]:
# !sudo rm -rf /content/seg_recyclables
# !git clone https://github.com/finani/seg_recyclables.git

# import sys
# sys.path.append('/content/seg_recyclables')

In [4]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

print('')

from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

print('')

import os
import torch
print('Pytorch version: {}'.format(torch.__version__))
print('Is GPU available: {}'.format(torch.cuda.is_available()))
if torch.cuda.is_available():
  print(torch.cuda.get_device_name(0))
  print('The number of GPUs available: {}'.format(torch.cuda.device_count())) # Tesla P100-PCIE-16GB
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

print('CPU count: {}'.format(os.cpu_count()))  # 8

cuda = torch.version.cuda
cudnn = torch.backends.cudnn.version()
cudnn_major = cudnn // 1000
cudnn = cudnn % 1000
cudnn_minor = cudnn // 100
cudnn_patch = cudnn % 100
print('Cuda version: {}'.format(cuda)) # 11.1
print('Cudnn version: {}.{}.{}'.format(cudnn_major, cudnn_minor, cudnn_patch)) # 8.0.5


Tue Dec 28 19:37:23 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.86       Driver Version: 470.86       CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  Off  | 00000000:01:00.0  On |                  N/A |
| N/A   61C    P8    19W /  N/A |    747MiB /  7982MiB |     24%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# Train Binary Segmentation
Classes =  ['Background', 'UNKNOWN', 'General trash', 'Paper', 'Paper pack', 'Metal', 'Glass', 'Plastic', 'Styrofoam', 'Plastic bag', 'Battery', 'Clothing']

## Configures

In [5]:
import os
import sys

import torch
import numpy as np

import segmentation_models_pytorch as smp
from torch.optim import lr_scheduler

import Utils
from ModelManager import ModelManager
from DataManager import DataManager, CustomAugmentation
from TrainManager import TrainManager
from InferManager import InferManager
from LossManager import DiceLoss
from LearningRateManager import CustomCosineAnnealingWarmUpRestarts

Utils.fix_random_seed(random_seed=21)

project_dir = '/home/weebee/recyclables/baseline'
dataset_dir = os.path.join(project_dir, 'input')
save_dir = os.path.join(project_dir, 'saved/tm_test')
if not os.path.isdir(dataset_dir):
    sys.exit('check dataset path!!')
if not os.path.isdir(save_dir):
    os.mkdir(save_dir)


## Train each class

In [6]:
for class_name in Utils.get_classes()[1:]:
    class_name = 'Clothing'
    # Set Configures
    # target_classes = Utils.get_classes()
    target_classes = ['Background']
    target_classes.append(class_name)

    config_dict = {
        'project_name': 'test',
        'run_name': '[TM2_Mv2] ' + class_name,
        'network': 'DeepLabV3Plus',
        'encoder': 'resnet101',
        'encoder_weights': 'imagenet',
        'target_classes': target_classes,
        'activation': None,
        'multi_gpu': False,
        'num_epochs': 50,
        'batch_size': 4,
        'learning_rate_0': 1e-4,
        'number_worker': 8,
        'val_every': 1,
        'note': 'test train'
    }

    # Make Model
    model_manager = ModelManager()
    model = model_manager.make_deeplabv3plus_model(
        encoder=config_dict['encoder'],
        encoder_weights=config_dict['encoder_weights'],
        class_number=len(target_classes),
        activation=config_dict['activation'],
        multi_gpu=config_dict['multi_gpu']
    )

    # Load Dataset
    data_manager = DataManager(dataset_path=dataset_dir)
    data_manager.assignDataLoaders(
        batch_size=config_dict['batch_size'],
        shuffle=True,
        number_worker=config_dict['number_worker'],
        drop_last=True,
        # transform=CustomAugmentation.to_tensor_transform(),
        # transform=CustomAugmentation.medium_transform(),
        transform=CustomAugmentation.medium_transform_v2(),
        target_segmentation=True,
        target_classes=target_classes
    )

    # # path of saved best model
    # model_path = os.path.join('/home/weebee/recyclables/baseline/saved/server_11_e50_train_1/', 'best_model_target_' + '_'.join([v.lower() for v in target_classes[1:]]) +'.pt')

    # # load the saved best model
    # checkpoint = torch.load(model_path, map_location=Utils.get_device())
    # state_dict = checkpoint.state_dict()
    # model.load_state_dict(state_dict)

    criterion = smp.utils.losses.CrossEntropyLoss()
    # criterion = smp.utils.losses.BCEWithLogitsLoss()
    # criterion = tgm.losses.DiceLoss()
    # criterion = DiceLoss()
    # criterion = smp.utils.losses.JaccardLoss()

    optimizer = torch.optim.Adam(
        [dict(params=model.parameters(),
                lr=config_dict['learning_rate_0']
                ),
            ])

    lr_scheduler = None
    # lr_scheduler = torch.optim.lr_scheduler.OneCycleLR(
    #     optimizer, max_lr=0.01, steps_per_epoch=10, epochs=epochs, anneal_strategy='cos'
    # )
    # lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
    #     optimizer, T_0=1, T_mult=2, eta_min=5e-5,
    # )
    # lr_scheduler = CustomCosineAnnealingWarmUpRestarts(
    #     optimizer, T_0=20, T_mult=1, eta_max=0.1,  T_up=2, gamma=0.5
    # )

    # Run Train
    train_manager = TrainManager()
    train_manager.run_train(
        model=model,
        config_dict=config_dict,
        data_loader=data_manager.train_data_loader,
        val_loader=data_manager.val_data_loader,
        criterion=criterion,
        optimizer=optimizer,
        lr_scheduler=lr_scheduler,
        save_dir=save_dir,
        file_name='best_model_target_' + '_'.join([v.lower() for v in target_classes[1:]]) +'.pt',
        # file_name='best_model_1.pt',
        target_only_p=1.0 # -> 1 set = 4 batch_size
    )
    break

loading annotations into memory...
Done (t=2.31s)
creating index...
index created!
loading annotations into memory...


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Done (t=0.75s)
creating index...
index created!

Start training..
num_epochs: 50
save_dir: /home/weebee/recyclables/baseline/saved/tm_test
file_name: best_model_target_clothing.pt
val_every: 1



wandb: Currently logged in as: inhwan-wee (use `wandb login --relogin` to force relogin)





Epoch: 0/50, Time: 2021-12-28 19:37:34, lr: 0.0001: 100%|██████████| 654/654 [00:49<00:00, 13.34it/s, loss=0.357, mean_loss=0.451, train_count=67]



Start validation #1


Epoch: 1: 100%|██████████| 163/163 [00:24<00:00,  6.58it/s, mIoU_all=0.921, mIoU_batch=1]



Validation #1  Average Loss: 0.1896, mIoU_all: 0.9211
Best performance at epoch or X0th epoch: 1
Save model in /home/weebee/recyclables/baseline/saved/tm_test



Epoch: 1/50, Time: 2021-12-28 19:38:48, lr: 0.0001: 100%|██████████| 654/654 [00:49<00:00, 13.20it/s, loss=0.261, mean_loss=0.3, train_count=56]



Start validation #2


Epoch: 2: 100%|██████████| 163/163 [00:25<00:00,  6.51it/s, mIoU_all=0.603, mIoU_batch=0.497]



Validation #2  Average Loss: 0.1934, mIoU_all: 0.6027



Epoch: 2/50, Time: 2021-12-28 19:40:03, lr: 0.0001: 100%|██████████| 654/654 [00:46<00:00, 14.10it/s, loss=0.111, mean_loss=0.241, train_count=58]



Start validation #3


Epoch: 3: 100%|██████████| 163/163 [00:25<00:00,  6.42it/s, mIoU_all=0.6, mIoU_batch=0.5]



Validation #3  Average Loss: 0.0975, mIoU_all: 0.5997
Best performance at epoch or X0th epoch: 3
Save model in /home/weebee/recyclables/baseline/saved/tm_test



Epoch: 3/50, Time: 2021-12-28 19:41:15, lr: 0.0001: 100%|██████████| 654/654 [00:48<00:00, 13.50it/s, loss=0.136, mean_loss=0.255, train_count=57]



Start validation #4


Epoch: 4: 100%|██████████| 163/163 [00:25<00:00,  6.34it/s, mIoU_all=0.531, mIoU_batch=0.49]



Validation #4  Average Loss: 0.1857, mIoU_all: 0.5312



Epoch: 4/50, Time: 2021-12-28 19:42:30, lr: 0.0001: 100%|██████████| 654/654 [00:49<00:00, 13.20it/s, loss=0.102, mean_loss=0.243, train_count=59]



Start validation #5


Epoch: 5: 100%|██████████| 163/163 [00:25<00:00,  6.50it/s, mIoU_all=0.856, mIoU_batch=1]



Validation #5  Average Loss: 0.0901, mIoU_all: 0.8559
Best performance at epoch or X0th epoch: 5
Save model in /home/weebee/recyclables/baseline/saved/tm_test



Epoch: 5/50, Time: 2021-12-28 19:43:45, lr: 0.0001: 100%|██████████| 654/654 [00:49<00:00, 13.19it/s, loss=0.209, mean_loss=0.215, train_count=65]



Start validation #6


Epoch: 6: 100%|██████████| 163/163 [00:24<00:00,  6.57it/s, mIoU_all=0.593, mIoU_batch=0.452]



Validation #6  Average Loss: 0.1984, mIoU_all: 0.5933



Epoch: 6/50, Time: 2021-12-28 19:45:00, lr: 0.0001: 100%|██████████| 654/654 [00:47<00:00, 13.68it/s, loss=0.0929, mean_loss=0.202, train_count=59]



Start validation #7


Epoch: 7: 100%|██████████| 163/163 [00:26<00:00,  6.26it/s, mIoU_all=0.958, mIoU_batch=1]



Validation #7  Average Loss: 0.0725, mIoU_all: 0.9583
Best performance at epoch or X0th epoch: 7
Save model in /home/weebee/recyclables/baseline/saved/tm_test



Epoch: 7/50, Time: 2021-12-28 19:46:14, lr: 0.0001: 100%|██████████| 654/654 [00:52<00:00, 12.53it/s, loss=0.081, mean_loss=0.233, train_count=67]



Start validation #8


Epoch: 8: 100%|██████████| 163/163 [00:24<00:00,  6.68it/s, mIoU_all=0.948, mIoU_batch=1]



Validation #8  Average Loss: 0.0838, mIoU_all: 0.9477



Epoch: 8/50, Time: 2021-12-28 19:47:31, lr: 0.0001: 100%|██████████| 654/654 [00:49<00:00, 13.34it/s, loss=0.0805, mean_loss=0.216, train_count=59]



Start validation #9


Epoch: 9: 100%|██████████| 163/163 [00:25<00:00,  6.39it/s, mIoU_all=0.906, mIoU_batch=1]



Validation #9  Average Loss: 0.0775, mIoU_all: 0.9058



Epoch: 9/50, Time: 2021-12-28 19:48:46, lr: 0.0001: 100%|██████████| 654/654 [00:53<00:00, 12.25it/s, loss=0.0809, mean_loss=0.196, train_count=64]



Start validation #10


Epoch: 10: 100%|██████████| 163/163 [00:26<00:00,  6.11it/s, mIoU_all=0.875, mIoU_batch=1]



Validation #10  Average Loss: 0.0674, mIoU_all: 0.8748
Best performance at epoch or X0th epoch: 10
Save model in /home/weebee/recyclables/baseline/saved/tm_test



Epoch: 10/50, Time: 2021-12-28 19:50:06, lr: 0.0001: 100%|██████████| 654/654 [00:48<00:00, 13.38it/s, loss=0.265, mean_loss=0.212, train_count=56]



Start validation #11


Epoch: 11: 100%|██████████| 163/163 [00:24<00:00,  6.60it/s, mIoU_all=0.863, mIoU_batch=1]



Validation #11  Average Loss: 0.0584, mIoU_all: 0.8634
Best performance at epoch or X0th epoch: 11
Save model in /home/weebee/recyclables/baseline/saved/tm_test



Epoch: 11/50, Time: 2021-12-28 19:51:21, lr: 0.0001: 100%|██████████| 654/654 [00:52<00:00, 12.44it/s, loss=0.156, mean_loss=0.226, train_count=62]



Start validation #12


Epoch: 12: 100%|██████████| 163/163 [00:26<00:00,  6.19it/s, mIoU_all=0.649, mIoU_batch=1]



Validation #12  Average Loss: 0.1018, mIoU_all: 0.6491



Epoch: 12/50, Time: 2021-12-28 19:52:40, lr: 0.0001: 100%|██████████| 654/654 [00:52<00:00, 12.48it/s, loss=0.0876, mean_loss=0.223, train_count=67]



Start validation #13


Epoch: 13: 100%|██████████| 163/163 [00:24<00:00,  6.63it/s, mIoU_all=0.732, mIoU_batch=0.492]



Validation #13  Average Loss: 0.0925, mIoU_all: 0.7325



Epoch: 13/50, Time: 2021-12-28 19:53:57, lr: 0.0001: 100%|██████████| 654/654 [00:49<00:00, 13.08it/s, loss=0.0764, mean_loss=0.227, train_count=59]



Start validation #14


Epoch: 14: 100%|██████████| 163/163 [00:25<00:00,  6.46it/s, mIoU_all=0.88, mIoU_batch=1]



Validation #14  Average Loss: 0.0909, mIoU_all: 0.8798



Epoch: 14/50, Time: 2021-12-28 19:55:13, lr: 0.0001: 100%|██████████| 654/654 [00:51<00:00, 12.60it/s, loss=0.0885, mean_loss=0.241, train_count=66]



Start validation #15


Epoch: 15: 100%|██████████| 163/163 [00:25<00:00,  6.49it/s, mIoU_all=0.796, mIoU_batch=1]



Validation #15  Average Loss: 0.0882, mIoU_all: 0.7965



Epoch: 15/50, Time: 2021-12-28 19:56:30, lr: 0.0001: 100%|██████████| 654/654 [00:51<00:00, 12.81it/s, loss=0.12, mean_loss=0.203, train_count=70]



Start validation #16


Epoch: 16: 100%|██████████| 163/163 [00:26<00:00,  6.26it/s, mIoU_all=0.827, mIoU_batch=1]



Validation #16  Average Loss: 0.0669, mIoU_all: 0.8270



Epoch: 16/50, Time: 2021-12-28 19:57:47, lr: 0.0001: 100%|██████████| 654/654 [00:51<00:00, 12.59it/s, loss=0.26, mean_loss=0.211, train_count=68]



Start validation #17


Epoch: 17: 100%|██████████| 163/163 [00:25<00:00,  6.35it/s, mIoU_all=0.64, mIoU_batch=0.434]



Validation #17  Average Loss: 0.1469, mIoU_all: 0.6402



Epoch: 17/50, Time: 2021-12-28 19:59:05, lr: 0.0001: 100%|██████████| 654/654 [00:49<00:00, 13.09it/s, loss=0.0841, mean_loss=0.225, train_count=60]



Start validation #18


Epoch: 18: 100%|██████████| 163/163 [00:24<00:00,  6.54it/s, mIoU_all=0.938, mIoU_batch=0.499]



Validation #18  Average Loss: 0.0645, mIoU_all: 0.9383



Epoch: 18/50, Time: 2021-12-28 20:00:20, lr: 0.0001: 100%|██████████| 654/654 [00:52<00:00, 12.43it/s, loss=0.131, mean_loss=0.198, train_count=70]



Start validation #19


Epoch: 19: 100%|██████████| 163/163 [00:24<00:00,  6.56it/s, mIoU_all=0.857, mIoU_batch=1]



Validation #19  Average Loss: 0.0769, mIoU_all: 0.8570



Epoch: 19/50, Time: 2021-12-28 20:01:38, lr: 0.0001: 100%|██████████| 654/654 [00:50<00:00, 12.99it/s, loss=0.0595, mean_loss=0.2, train_count=55]



Start validation #20


Epoch: 20: 100%|██████████| 163/163 [00:26<00:00,  6.18it/s, mIoU_all=0.677, mIoU_batch=0.498]



Validation #20  Average Loss: 0.0872, mIoU_all: 0.6774



Epoch: 20/50, Time: 2021-12-28 20:02:55, lr: 0.0001: 100%|██████████| 654/654 [00:53<00:00, 12.29it/s, loss=0.224, mean_loss=0.198, train_count=64]



Start validation #21


Epoch: 21: 100%|██████████| 163/163 [00:25<00:00,  6.46it/s, mIoU_all=0.587, mIoU_batch=1]



Validation #21  Average Loss: 0.1205, mIoU_all: 0.5866
Best performance at epoch or X0th epoch: 21
Save model in /home/weebee/recyclables/baseline/saved/tm_test



Epoch: 21/50, Time: 2021-12-28 20:04:14, lr: 0.0001: 100%|██████████| 654/654 [00:49<00:00, 13.34it/s, loss=0.0813, mean_loss=0.177, train_count=58]



Start validation #22


Epoch: 22: 100%|██████████| 163/163 [00:25<00:00,  6.49it/s, mIoU_all=0.83, mIoU_batch=0.5]



Validation #22  Average Loss: 0.0673, mIoU_all: 0.8300
Best performance at epoch or X0th epoch: 22
Save model in /home/weebee/recyclables/baseline/saved/tm_test



Epoch: 22/50, Time: 2021-12-28 20:05:29, lr: 0.0001: 100%|██████████| 654/654 [00:48<00:00, 13.45it/s, loss=0.0888, mean_loss=0.197, train_count=60]



Start validation #23


Epoch: 23: 100%|██████████| 163/163 [00:25<00:00,  6.42it/s, mIoU_all=0.764, mIoU_batch=1]



Validation #23  Average Loss: 0.0711, mIoU_all: 0.7636



Epoch: 23/50, Time: 2021-12-28 20:06:43, lr: 0.0001: 100%|██████████| 654/654 [00:50<00:00, 12.96it/s, loss=0.308, mean_loss=0.188, train_count=60]



Start validation #24


Epoch: 24: 100%|██████████| 163/163 [00:24<00:00,  6.56it/s, mIoU_all=0.933, mIoU_batch=1]



Validation #24  Average Loss: 0.0478, mIoU_all: 0.9331
Best performance at epoch or X0th epoch: 24
Save model in /home/weebee/recyclables/baseline/saved/tm_test



Epoch: 24/50, Time: 2021-12-28 20:07:59, lr: 0.0001: 100%|██████████| 654/654 [00:53<00:00, 12.14it/s, loss=0.0913, mean_loss=0.21, train_count=67]



Start validation #25


Epoch: 25: 100%|██████████| 163/163 [00:25<00:00,  6.38it/s, mIoU_all=0.673, mIoU_batch=1]



Validation #25  Average Loss: 0.0904, mIoU_all: 0.6733



Epoch: 25/50, Time: 2021-12-28 20:09:19, lr: 0.0001: 100%|██████████| 654/654 [00:50<00:00, 12.89it/s, loss=0.376, mean_loss=0.2, train_count=57]



Start validation #26


Epoch: 26: 100%|██████████| 163/163 [00:25<00:00,  6.31it/s, mIoU_all=0.551, mIoU_batch=0.496]



Validation #26  Average Loss: 0.1449, mIoU_all: 0.5507



Epoch: 26/50, Time: 2021-12-28 20:10:36, lr: 0.0001: 100%|██████████| 654/654 [00:54<00:00, 11.89it/s, loss=0.0803, mean_loss=0.19, train_count=69]



Start validation #27


Epoch: 27: 100%|██████████| 163/163 [00:25<00:00,  6.40it/s, mIoU_all=0.567, mIoU_batch=0.49]



Validation #27  Average Loss: 0.1074, mIoU_all: 0.5674



Epoch: 27/50, Time: 2021-12-28 20:11:56, lr: 0.0001: 100%|██████████| 654/654 [00:49<00:00, 13.28it/s, loss=0.0634, mean_loss=0.195, train_count=59]



Start validation #28


Epoch: 28: 100%|██████████| 163/163 [00:25<00:00,  6.36it/s, mIoU_all=0.685, mIoU_batch=1]



Validation #28  Average Loss: 0.0917, mIoU_all: 0.6849



Epoch: 28/50, Time: 2021-12-28 20:13:12, lr: 0.0001: 100%|██████████| 654/654 [00:52<00:00, 12.45it/s, loss=0.199, mean_loss=0.194, train_count=60]



Start validation #29


Epoch: 29: 100%|██████████| 163/163 [00:25<00:00,  6.41it/s, mIoU_all=0.62, mIoU_batch=0.497]



Validation #29  Average Loss: 0.1327, mIoU_all: 0.6202



Epoch: 29/50, Time: 2021-12-28 20:14:30, lr: 0.0001: 100%|██████████| 654/654 [00:49<00:00, 13.35it/s, loss=0.0588, mean_loss=0.206, train_count=57]



Start validation #30


Epoch: 30: 100%|██████████| 163/163 [00:25<00:00,  6.40it/s, mIoU_all=0.77, mIoU_batch=1]



Validation #30  Average Loss: 0.0870, mIoU_all: 0.7700



Epoch: 30/50, Time: 2021-12-28 20:15:45, lr: 0.0001: 100%|██████████| 654/654 [00:51<00:00, 12.68it/s, loss=0.109, mean_loss=0.244, train_count=68]



Start validation #31


Epoch: 31: 100%|██████████| 163/163 [00:25<00:00,  6.41it/s, mIoU_all=0.6, mIoU_batch=0.341]



Validation #31  Average Loss: 0.2348, mIoU_all: 0.5999
Best performance at epoch or X0th epoch: 31
Save model in /home/weebee/recyclables/baseline/saved/tm_test



Epoch: 31/50, Time: 2021-12-28 20:17:02, lr: 0.0001:  12%|█▏        | 77/654 [00:08<01:04,  8.91it/s, loss=0.218, mean_loss=0.143, train_count=10]


KeyboardInterrupt: 