In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
!pip install segmentation_models_pytorch 
!pip install torchmetrics

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting segmentation_models_pytorch
  Downloading segmentation_models_pytorch-0.3.2-py3-none-any.whl (106 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m106.7/106.7 KB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting timm==0.6.12
  Downloading timm-0.6.12-py3-none-any.whl (549 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m549.1/549.1 KB[0m [31m37.4 MB/s[0m eta [36m0:00:00[0m
Collecting efficientnet-pytorch==0.7.1
  Downloading efficientnet_pytorch-0.7.1.tar.gz (21 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pretrainedmodels==0.7.4
  Downloading pretrainedmodels-0.7.4.tar.gz (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.8/58.8 KB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting munch
  Downloading munch-2.5.0-py2.py3

In [None]:
from collections import namedtuple
import os
import numpy as np
import cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.io as tv
import torchvision.transforms as transf
import torch.utils.data
from torch.utils.data import DataLoader
from matplotlib import pyplot as plt
from torch.optim import Adam
from tqdm import tqdm
import segmentation_models_pytorch as smp
import torchmetrics
from torch.optim.lr_scheduler import ReduceLROnPlateau

In [None]:
# For reproducibility
torch.manual_seed(100)
if torch.cuda.is_available():
    torch.cuda.manual_seed(100)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

#################################################################################################################################
# Label nemedtuple
# taken from Dataset offical Github: https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/helpers/labels.py
#################################################################################################################################
Label = namedtuple( 'Label' , [

    'name'        , # The identifier of this label, e.g. 'car', 'person', ... .
                    # We use them to uniquely name a class

    'id'          , # An integer ID that is associated with this label.
                    # The IDs are used to represent the label in ground truth images
                    # An ID of -1 means that this label does not have an ID and thus
                    # is ignored when creating ground truth images (e.g. license plate).
                    # Do not modify these IDs, since exactly these IDs are expected by the
                    # evaluation server.

    'trainId'     , # Feel free to modify these IDs as suitable for your method. Then create
                    # ground truth images with train IDs, using the tools provided in the
                    # 'preparation' folder. However, make sure to validate or submit results
                    # to our evaluation server using the regular IDs above!
                    # For trainIds, multiple labels might have the same ID. Then, these labels
                    # are mapped to the same class in the ground truth images. For the inverse
                    # mapping, we use the label that is defined first in the list below.
                    # For example, mapping all void-type classes to the same ID in training,
                    # might make sense for some approaches.
                    # Max value is 255!

    'category'    , # The name of the category that this label belongs to

    'categoryId'  , # The ID of this category. Used to create ground truth images
                    # on category level.

    'hasInstances', # Whether this label distinguishes between single instances or not

    'ignoreInEval', # Whether pixels having this class as ground truth label are ignored
                    # during evaluations or not

    'color'       , # The color of this label
    ] )

labels = [
    #       name                     id    trainId   category            catId     hasInstances   ignoreInEval   color
    Label(  'unlabeled'            ,  0 ,      0 , 'void'            , 0       , False        , True         , (  0,  0,  0) ),
    Label(  'ego vehicle'          ,  1 ,      0 , 'void'            , 0       , False        , True         , (  0,  0,  0) ),
    Label(  'rectification border' ,  2 ,      0 , 'void'            , 0       , False        , True         , (  0,  0,  0) ),
    Label(  'out of roi'           ,  3 ,      0 , 'void'            , 0       , False        , True         , (  0,  0,  0) ),
    Label(  'static'               ,  4 ,      0 , 'void'            , 0       , False        , True         , (  0,  0,  0) ),
    Label(  'dynamic'              ,  5 ,      0 , 'void'            , 0       , False        , True         , (111, 74,  0) ),
    Label(  'ground'               ,  6 ,      0 , 'void'            , 0       , False        , True         , ( 81,  0, 81) ),
    Label(  'road'                 ,  7 ,      1 , 'flat'            , 1       , False        , False        , (128, 64,128) ),
    Label(  'sidewalk'             ,  8 ,      2 , 'flat'            , 1       , False        , False        , (244, 35,232) ),
    Label(  'parking'              ,  9 ,      0 , 'flat'            , 1       , False        , True         , (250,170,160) ),
    Label(  'rail track'           , 10 ,      0 , 'flat'            , 1       , False        , True         , (230,150,140) ),
    Label(  'building'             , 11 ,      3 , 'construction'    , 2       , False        , False        , ( 70, 70, 70) ),
    Label(  'wall'                 , 12 ,      4 , 'construction'    , 2       , False        , False        , (102,102,156) ),
    Label(  'fence'                , 13 ,      5 , 'construction'    , 2       , False        , False        , (190,153,153) ),
    Label(  'guard rail'           , 14 ,      0 , 'construction'    , 2       , False        , True         , (180,165,180) ),
    Label(  'bridge'               , 15 ,      0 , 'construction'    , 2       , False        , True         , (150,100,100) ),
    Label(  'tunnel'               , 16 ,      0 , 'construction'    , 2       , False        , True         , (150,120, 90) ),
    Label(  'pole'                 , 17 ,      6 , 'object'          , 3       , False        , False        , (153,153,153) ),
    Label(  'polegroup'            , 18 ,      0 , 'object'          , 3       , False        , True         , (153,153,153) ),
    Label(  'traffic light'        , 19 ,      7 , 'object'          , 3       , False        , False        , (250,170, 30) ),
    Label(  'traffic sign'         , 20 ,      8 , 'object'          , 3       , False        , False        , (220,220,  0) ),
    Label(  'vegetation'           , 21 ,      9 , 'nature'          , 4       , False        , False        , (107,142, 35) ),
    Label(  'terrain'              , 22 ,     10 , 'nature'          , 4       , False        , False        , (152,251,152) ),
    Label(  'sky'                  , 23 ,     11 , 'sky'             , 5       , False        , False        , ( 70,130,180) ),
    Label(  'person'               , 24 ,     12 , 'human'           , 6       , True         , False        , (220, 20, 60) ),
    Label(  'rider'                , 25 ,     13 , 'human'           , 6       , True         , False        , (255,  0,  0) ),
    Label(  'car'                  , 26 ,     14 , 'vehicle'         , 7       , True         , False        , (  0,  0,142) ),
    Label(  'truck'                , 27 ,     15 , 'vehicle'         , 7       , True         , False        , (  0,  0, 70) ),
    Label(  'bus'                  , 28 ,     16 , 'vehicle'         , 7       , True         , False        , (  0, 60,100) ),
    Label(  'caravan'              , 29 ,      0 , 'vehicle'         , 7       , True         , True         , (  0,  0, 90) ),
    Label(  'trailer'              , 30 ,      0 , 'vehicle'         , 7       , True         , True         , (  0,  0,110) ),
    Label(  'train'                , 31 ,     17 , 'vehicle'         , 7       , True         , False        , (  0, 80,100) ),
    Label(  'motorcycle'           , 32 ,     18 , 'vehicle'         , 7       , True         , False        , (  0,  0,230) ),
    Label(  'bicycle'              , 33 ,     19 , 'vehicle'         , 7       , True         , False        , (119, 11, 32) ),
    Label(  'license plate'        , -1 ,      0 , 'vehicle'         , 7       , False        , True         , (  0,  0,142) ),
]

#################################################################################################################################
# Define custom Train and Validation set classes
# This assusmes that you have downloaded the Cityscpes dataset, and placed the masks with your training IDs in the meta folder
# Method inspired from: https://github.com/fregu856/deeplabv3/blob/master/datasets.py
#################################################################################################################################
train_dirs = ["jena/", "zurich/", "weimar/", "ulm/", "tubingen/", "stuttgart/",
              "strasbourg/", "monchengladbach/", "krefeld/", "hanover/",
              "hamburg/", "erfurt/", "dusseldorf/", "darmstadt/", "cologne/",
              "bremen/"]
val_dirs = ["frankfurt/", "munster/", "lindau/"]
test_dirs = ["bochum/", "aachen/"]

cityscapes_data_path = '../content/drive/MyDrive/Cityscapes_data'
cityscapes_meta_path = '../content/drive/MyDrive/Cityscapes_data/meta'

class DatasetTrain(torch.utils.data.Dataset):
  def __init__(self, cityscapes_data_path, cityscapes_meta_path):
    self.img_dir = cityscapes_data_path + "/leftImg8bit/train/"
    self.label_dir = cityscapes_meta_path + "/label_imgs/"

    self.examples = []

    for train_dir in train_dirs:
      train_img_dir_path = self.img_dir + train_dir

      file_names = os.listdir(train_img_dir_path)
      for file_name in file_names:
        img_id = file_name.split("_leftImg8bit.png")[0]
        img_path = train_img_dir_path + file_name
        label_img_path = self.label_dir + img_id + ".png"

        example = {}
        example["img_path"] = img_path
        example["label_img_path"] = label_img_path
        example["img_id"] = img_id
        self.examples.append(example)

    self.num_examples = len(self.examples)

  def __len__(self):
    return self.num_examples

  def __getitem__(self, index):
    example = self.examples[index]

    img_path = example["img_path"]
    transform = transf.Resize((256,512), antialias=True)
    img = tv.read_image(img_path)
    img = transform(img)
    img = img.to(torch.float32) / 255.0

    label_img_path = example["label_img_path"]
    label_img = tv.read_image(label_img_path)
    label_img = transform(label_img)
    label_img = label_img.to(torch.float32)

    return (img, label_img)

class DatasetVal(torch.utils.data.Dataset):
  def __init__(self, cityscapes_data_path, cityscapes_meta_path):
    self.img_dir = cityscapes_data_path + "/leftImg8bit/val/"
    self.label_dir = cityscapes_meta_path + "/label_imgs/"

    self.examples = []

    for val_dir in val_dirs:
      val_img_dir_path = self.img_dir + val_dir

      file_names = os.listdir(val_img_dir_path)
      for file_name in file_names:
        img_id = file_name.split("_leftImg8bit.png")[0]
        img_path = val_img_dir_path + file_name
        label_img_path = self.label_dir + img_id + ".png"

        example = {}
        example["img_path"] = img_path
        example["label_img_path"] = label_img_path
        example["img_id"] = img_id
        self.examples.append(example)

    self.num_examples = len(self.examples)

  def __len__(self):
    return self.num_examples

  def __getitem__(self, index):
    example = self.examples[index]

    img_path = example["img_path"]
    
    transform = transf.Resize((256,512), antialias=True)
    img = tv.read_image(img_path)
    img = transform(img)
    img = img.to(torch.float32) / 255.0

    label_img_path = example["label_img_path"]
    label_img = tv.read_image(label_img_path)
    label_img = transform(label_img)
    label_img = label_img.to(torch.float32)

    return (img, label_img)

train_dataset = DatasetTrain(cityscapes_data_path=cityscapes_data_path,
                             cityscapes_meta_path=cityscapes_meta_path)

val_dataset = DatasetVal(cityscapes_data_path=cityscapes_data_path,
                         cityscapes_meta_path=cityscapes_meta_path)

#################################################################################################################################
# Define UNET-CBAM
#################################################################################################################################
# CBAM inspired form official CBAM Github: https://github.com/Jongchan/attention-module
class Flatten(nn.Module):
  def forward(self, x):
    return x.view(x.size(0), -1)

class ChannelGate(nn.Module):
  def __init__(self, gate_channels, reduction_ratio=16):
    super(ChannelGate, self).__init__()
    self.gate_channels = gate_channels
    self.mlp = nn.Sequential(Flatten(),
                             nn.Linear(gate_channels, gate_channels // reduction_ratio),
                             nn.ReLU(),
                             nn.Linear(gate_channels // reduction_ratio, gate_channels))

  def forward(self, x):
    x_avg_pool = F.avg_pool2d(x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
    x_max_pool = F.max_pool2d(x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
    x_avg_att = self.mlp(x_avg_pool)
    x_max_att = self.mlp(x_max_pool)
    channel_att = x_avg_att + x_max_att
    scale = torch.sigmoid(channel_att).unsqueeze(2).unsqueeze(3).expand_as(x)
    return x * scale

class ChannelPool(nn.Module):
    def forward(self, x):
        return torch.cat((torch.max(x,1)[0].unsqueeze(1), torch.mean(x,1).unsqueeze(1)), dim=1 )

class BasicConv(nn.Module):
  def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=False):
    super(BasicConv, self).__init__()
    self.out_channels = out_planes
    self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias)
    self.bn = nn.BatchNorm2d(out_planes,eps=1e-5, momentum=0.01, affine=True) 

  def forward(self, x):
    x = self.conv(x)
    x = self.bn(x)
    return x

class SpatialGate(nn.Module):
  def __init__(self):
    super(SpatialGate, self).__init__()
    kernel_size = 7
    self.compress = ChannelPool()
    self.spatial = BasicConv(2, 1, kernel_size, stride=1, padding=(kernel_size-1) // 2)

  def forward(self, x):
    x_compress = self.compress(x)
    x_out = self.spatial(x_compress)
    scale = torch.sigmoid(x_out) # broadcasting
    return x * scale

class CBAM(nn.Module):
  def __init__(self, gate_channels, reduction_ratio=16):
    super(CBAM, self).__init__()
    self.ChannelGate = ChannelGate(gate_channels, reduction_ratio)
    self.SpatialGate = SpatialGate()

  def forward(self, x):
    x_out = self.ChannelGate(x)
    x_out = self.SpatialGate(x_out)
    return x_out

# DoubleConv to be used by UNET
class DoubleConv(nn.Module):
  def __init__(self, in_channels, out_channels):
    super(DoubleConv, self).__init__()
    self.conv = nn.Sequential(
      nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False),
      nn.BatchNorm2d(out_channels),
      nn.ReLU(inplace=True),
      nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False),
      nn.BatchNorm2d(out_channels),
      nn.ReLU(inplace=True),
      )
    
  def forward(self, x):
    return self.conv(x)

class UNet(nn.Module):
  def __init__(self, in_channels, out_channels, inter_channels=[64, 128, 256, 512]):
    super(UNet, self).__init__()
    self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)

    # Encoder - Contracting path (down path)
    self.e1 = DoubleConv(in_channels, inter_channels[0])
    self.cbam1 = CBAM(gate_channels=inter_channels[0])
    self.e2 = DoubleConv(inter_channels[0], inter_channels[1])
    self.cbam2 = CBAM(gate_channels=inter_channels[1])
    self.e3 = DoubleConv(inter_channels[1], inter_channels[2])
    self.cbam3 = CBAM(gate_channels=inter_channels[2])
    self.e4 = DoubleConv(inter_channels[2], inter_channels[3])
    self.cbam4 = CBAM(gate_channels=inter_channels[3])

    # Bottom part of the network
    self.bottom = DoubleConv(inter_channels[-1], inter_channels[-1]*2)
    self.cbam_bottom = CBAM(gate_channels=inter_channels[-1]*2)

    # Decoder - Expansive path (up path)
    rev_channels = inter_channels[::-1]
    self.d4_T = nn.ConvTranspose2d(rev_channels[0]*2, rev_channels[0], kernel_size=2, stride=2, padding=0)
    self.d4_C = DoubleConv(rev_channels[0]*2, rev_channels[0])
    self.cbam4_d = CBAM(gate_channels=rev_channels[0])
    self.d3_T = nn.ConvTranspose2d(rev_channels[0], rev_channels[1], kernel_size=2, stride=2, padding=0)
    self.d3_C = DoubleConv(rev_channels[0], rev_channels[1])
    self.cbam3_d = CBAM(gate_channels=rev_channels[1])
    self.d2_T = nn.ConvTranspose2d(rev_channels[1], rev_channels[2], kernel_size=2, stride=2, padding=0)
    self.d2_C = DoubleConv(rev_channels[1], rev_channels[2])
    self.cbam2_d = CBAM(gate_channels=rev_channels[2])
    self.d1_T = nn.ConvTranspose2d(rev_channels[2], rev_channels[3], kernel_size=2, stride=2, padding=0)
    self.d1_C = DoubleConv(rev_channels[2], rev_channels[3])
    self.cbam1_d = CBAM(gate_channels=rev_channels[3])

    # Final convolution
    self.output = nn.Conv2d(rev_channels[3], out_channels, kernel_size=1)
    self.softmax = nn.Softmax(dim=1)

  def forward(self, x):
    x1 = self.e1(x)
    x1 = self.cbam1(x1)
    x = self.pool(x1)
    x2 = self.e2(x)
    x2 = self.cbam2(x2)
    x = self.pool(x2)
    x3 = self.e3(x)
    x3 = self.cbam3(x3)
    x = self.pool(x3)
    x4 = self.e4(x)
    x4 = self.cbam4(x4)
    x = self.pool(x4)
    x_bottom = self.bottom(x)
    x_bottom = self.cbam_bottom(x_bottom)
    x = self.d4_T(x_bottom)
    x = torch.cat((x4, x), dim=1)
    x = self.d4_C(x)
    x = self.cbam4_d(x)
    x = self.d3_T(x)
    x = torch.cat((x3, x), dim=1)
    x = self.d3_C(x)
    x = self.cbam3_d(x)
    x = self.d2_T(x)
    x = torch.cat((x2, x), dim=1)
    x = self.d2_C(x)
    x = self.cbam2_d(x)
    x = self.d1_T(x)
    x = torch.cat((x1, x), dim=1)
    x = self.d1_C(x)
    x = self.cbam1_d(x)
    x = self.output(x)
    return x

#################################################################################################################################
# Define data loader, load model, hyper-parameters and metrics
#################################################################################################################################
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# model
model = UNet(3, 20, [64, 128, 256, 512]).to(device)
# Loss Function
n_classes = 20
loss_fn = nn.CrossEntropyLoss()
# Optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=0.01)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, min_lr= 0.00125, verbose=True)
lr = []

jaccard_micro = torchmetrics.JaccardIndex(task="multiclass", num_classes=20, average="micro").to(device)
jaccard_macro = torchmetrics.JaccardIndex(task="multiclass", num_classes=20, average="macro").to(device)
jaccard_weighted = torchmetrics.JaccardIndex(task="multiclass", num_classes=20, average="weighted").to(device)
jaccard_none = torchmetrics.JaccardIndex(task="multiclass", num_classes=20, average=None).to(device)

epochs = 50

len_train = len(train_loader)
len_val = len(val_loader)

train_loss_per_epoch_avg = []
train_jaccard_micro_per_epoch_avg = []
train_jaccard_macro_per_epoch_avg = []
train_jaccard_weighted_per_epoch_avg = []
train_jaccard_none_per_epoch_avg = []

val_loss_per_epoch_avg = []
val_jaccard_micro_per_epoch_avg = []
val_jaccard_macro_per_epoch_avg = []
val_jaccard_weighted_per_epoch_avg = []
val_jaccard_none_per_epoch_avg = []

best_val_loss = float('inf')

folder_path = "/content/drive/MyDrive/Cityscapes_data/256x512_4_batches/unet_cbam_full/"

#################################################################################################################################
# Training and validation loops
#################################################################################################################################
for e in range(epochs):
  train_loss_total = 0
  train_jaccard_micro_total=0
  train_jaccard_macro_total=0
  train_jaccard_weighted_total=0
  train_jaccard_none_total=0

  val_loss_total = 0
  val_jaccard_micro_total=0
  val_jaccard_macro_total=0
  val_jaccard_weighted_total=0
  val_jaccard_none_total=0

  model.train()

  print("Epoch: {}".format(e+1))

  # training
  for batch, (image, label) in enumerate(tqdm(train_loader)):
    optimizer.zero_grad()
    image = image.to(device)
    label = label.to(device)
    output = model(image)

    loss = loss_fn(output.to(torch.float32), torch.squeeze(label, 1).long())
    loss.backward()
    optimizer.step()
    train_loss_total += loss.item()

    x = torch.softmax(output, dim=1)
    index = torch.argmax(x, dim=1)
    y = torch.unsqueeze(index, dim=1)
    train_jaccard_micro_total += jaccard_micro(y, label)
    train_jaccard_macro_total += jaccard_macro(y, label)
    train_jaccard_weighted_total += jaccard_weighted(y, label)
    train_jaccard_none_total += jaccard_none(y, label)
    
  train_loss_per_epoch_avg.append(train_loss_total/len_train)
  print("Epoch: {}, Average train loss per epoch: {}".format(e+1, train_loss_per_epoch_avg[e]))
  train_jaccard_micro_per_epoch_avg.append(train_jaccard_micro_total/len_train)
  print("Epoch: {}, Average train jaccard_micro coeff per epoch: {}".format(e+1, train_jaccard_micro_per_epoch_avg[e]))
  train_jaccard_macro_per_epoch_avg.append(train_jaccard_macro_total/len_train)
  print("Epoch: {}, Average train jaccard_macro coeff per epoch: {}".format(e+1, train_jaccard_macro_per_epoch_avg[e]))
  train_jaccard_weighted_per_epoch_avg.append(train_jaccard_weighted_total/len_train)
  print("Epoch: {}, Average train jaccard_weighted coeff per epoch: {}".format(e+1, train_jaccard_weighted_per_epoch_avg[e]))
  train_jaccard_none_per_epoch_avg.append(train_jaccard_none_total/len_train)
  print("Epoch: {}, Average train jaccard_none coeff per epoch: {}".format(e+1, train_jaccard_none_per_epoch_avg[e]))

  train_loss_total = 0
  train_jaccard_micro_total = 0
  train_jaccard_macro_total = 0
  train_jaccard_weighted_total = 0
  train_jaccard_none_total = 0

  torch.save(model, folder_path + "city_unet.pth")
  file = open(folder_path + 'train_loss.txt','w')
  for item in train_loss_per_epoch_avg:
	  file.write(str(item)+"\n")
  file.close()

  torch.save(train_jaccard_micro_per_epoch_avg, folder_path + "train_jaccard_micro_per_epoch_avg.pth")
  torch.save(train_jaccard_macro_per_epoch_avg, folder_path + "train_jaccard_macro_per_epoch_avg.pth")
  torch.save(train_jaccard_weighted_per_epoch_avg, folder_path + "train_jaccard_weighted_per_epoch_avg.pth")
  torch.save(train_jaccard_none_per_epoch_avg, folder_path + "train_jaccard_none_per_epoch_avg.pth")


  # validating
  with torch.no_grad():
    model.eval()
    for batch, (image, label) in enumerate(tqdm(val_loader)):
      image = image.to(device)
      label = label.to(device)
      output = model(image)

      loss = loss_fn(output.to(torch.float32), torch.squeeze(label, 1).long())
      val_loss_total += loss.item()

      x = torch.softmax(output, dim=1)
      index = torch.argmax(x, dim=1)
      y = torch.unsqueeze(index, dim=1)
      val_jaccard_micro_total += jaccard_micro(y, label)
      val_jaccard_macro_total += jaccard_macro(y, label)
      val_jaccard_weighted_total += jaccard_weighted(y, label)
      val_jaccard_none_total += jaccard_none(y, label)

  scheduler.step(val_loss_total)
  print('epoch={}, learning rate={:.4f}'.format(e, optimizer.state_dict()['param_groups'][0]['lr']))
  lr.append(optimizer.state_dict()['param_groups'][0]['lr'])

  if val_loss_total < best_val_loss:
    torch.save(model.state_dict(), folder_path + 'best_weights.pth')
    best_val_loss = val_loss_total

  val_loss_per_epoch_avg.append(val_loss_total/len_val)
  print("Epoch: {}, Average val loss per epoch: {}".format(e+1, val_loss_per_epoch_avg[e]))
  val_jaccard_micro_per_epoch_avg.append(val_jaccard_micro_total/len_val)
  print("Epoch: {}, Average val jaccard_micro coeff per epoch: {}".format(e+1, val_jaccard_micro_per_epoch_avg[e]))
  val_jaccard_macro_per_epoch_avg.append(val_jaccard_macro_total/len_val)
  print("Epoch: {}, Average val jaccard_macro coeff per epoch: {}".format(e+1, val_jaccard_macro_per_epoch_avg[e]))
  val_jaccard_weighted_per_epoch_avg.append(val_jaccard_weighted_total/len_val)
  print("Epoch: {}, Average val jaccard_weighted coeff per epoch: {}".format(e+1, val_jaccard_weighted_per_epoch_avg[e]))
  val_jaccard_none_per_epoch_avg.append(val_jaccard_none_total/len_val)
  print("Epoch: {}, Average val jaccard_none coeff per epoch: {}".format(e+1, val_jaccard_none_per_epoch_avg[e]))

  val_loss_total = 0
  val_jaccard_micro_total = 0
  val_jaccard_macro_total = 0
  val_jaccard_weighted_total = 0
  val_jaccard_none_total = 0

  file = open(folder_path + 'val_loss.txt','w')
  for item in val_loss_per_epoch_avg:
	  file.write(str(item)+"\n")
  file.close()

  file = open(folder_path + 'lr.txt','w')
  for item in lr:
	  file.write(str(item)+"\n")
  file.close()

  torch.save(val_jaccard_micro_per_epoch_avg, folder_path + "val_jaccard_micro_per_epoch_avg.pth")
  torch.save(val_jaccard_macro_per_epoch_avg, folder_path + "/val_jaccard_macro_per_epoch_avg.pth")
  torch.save(val_jaccard_weighted_per_epoch_avg, folder_path + "/val_jaccard_weighted_per_epoch_avg.pth")
  torch.save(val_jaccard_none_per_epoch_avg, folder_path + "/val_jaccard_none_per_epoch_avg.pth")

Epoch: 1


100%|██████████| 170/170 [50:39<00:00, 17.88s/it]


Epoch: 1, Average train loss per epoch: 1.1978999383309308
Epoch: 1, Average train jaccard_micro coeff per epoch: 0.4859926104545593
Epoch: 1, Average train jaccard_macro coeff per epoch: 0.15018637478351593
Epoch: 1, Average train jaccard_weighted coeff per epoch: 0.4746137261390686
Epoch: 1, Average train jaccard_none coeff per epoch: tensor([3.5821e-01, 6.9922e-01, 7.3583e-02, 4.7262e-01, 1.9466e-05, 8.1105e-05,
        3.4397e-05, 1.9446e-05, 2.1356e-05, 5.5467e-01, 8.6543e-02, 5.0416e-01,
        5.0816e-06, 3.6412e-06, 2.5453e-01, 0.0000e+00, 3.6874e-06, 0.0000e+00,
        0.0000e+00, 2.2318e-06], device='cuda:0')


100%|██████████| 32/32 [09:25<00:00, 17.66s/it]


epoch=0, learning rate=0.0100
Epoch: 1, Average val loss per epoch: 1.5467418171465397
Epoch: 1, Average val jaccard_micro coeff per epoch: 0.37382030487060547
Epoch: 1, Average val jaccard_macro coeff per epoch: 0.12437404692173004
Epoch: 1, Average val jaccard_weighted coeff per epoch: 0.41063910722732544
Epoch: 1, Average val jaccard_none coeff per epoch: tensor([0.4852, 0.5480, 0.0248, 0.4089, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.4684, 0.0666, 0.3757, 0.0000, 0.0000, 0.1097, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000], device='cuda:0')
Epoch: 2


100%|██████████| 170/170 [05:50<00:00,  2.06s/it]


Epoch: 2, Average train loss per epoch: 0.8902202041710124
Epoch: 2, Average train jaccard_micro coeff per epoch: 0.5910525918006897
Epoch: 2, Average train jaccard_macro coeff per epoch: 0.21679916977882385
Epoch: 2, Average train jaccard_weighted coeff per epoch: 0.5942114591598511
Epoch: 2, Average train jaccard_none coeff per epoch: tensor([4.9928e-01, 7.9202e-01, 2.6923e-01, 5.8733e-01, 0.0000e+00, 2.0372e-05,
        2.4900e-02, 0.0000e+00, 2.7855e-02, 6.6343e-01, 2.4611e-01, 7.2520e-01,
        4.9821e-06, 0.0000e+00, 5.0061e-01, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0')


100%|██████████| 32/32 [00:57<00:00,  1.81s/it]


epoch=1, learning rate=0.0100
Epoch: 2, Average val loss per epoch: 1.3539804872125387
Epoch: 2, Average val jaccard_micro coeff per epoch: 0.4498785734176636
Epoch: 2, Average val jaccard_macro coeff per epoch: 0.14103686809539795
Epoch: 2, Average val jaccard_weighted coeff per epoch: 0.4675024747848511
Epoch: 2, Average val jaccard_none coeff per epoch: tensor([0.4947, 0.7560, 0.1282, 0.3036, 0.0000, 0.0000, 0.0981, 0.0000, 0.0497,
        0.4992, 0.0517, 0.3291, 0.0000, 0.0000, 0.1105, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000], device='cuda:0')
Epoch: 3


100%|██████████| 170/170 [05:50<00:00,  2.06s/it]


Epoch: 3, Average train loss per epoch: 0.7893398099085864
Epoch: 3, Average train jaccard_micro coeff per epoch: 0.633386492729187
Epoch: 3, Average train jaccard_macro coeff per epoch: 0.24450284242630005
Epoch: 3, Average train jaccard_weighted coeff per epoch: 0.6378200054168701
Epoch: 3, Average train jaccard_none coeff per epoch: tensor([5.3253e-01, 8.2039e-01, 3.7876e-01, 6.4007e-01, 8.3144e-05, 1.0794e-04,
        7.4429e-02, 0.0000e+00, 7.3649e-02, 7.0641e-01, 2.8161e-01, 7.7997e-01,
        3.3965e-02, 0.0000e+00, 5.6809e-01, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=2, learning rate=0.0100
Epoch: 3, Average val loss per epoch: 1.116028068587184
Epoch: 3, Average val jaccard_micro coeff per epoch: 0.5006726980209351
Epoch: 3, Average val jaccard_macro coeff per epoch: 0.16842375695705414
Epoch: 3, Average val jaccard_weighted coeff per epoch: 0.5052645802497864
Epoch: 3, Average val jaccard_none coeff per epoch: tensor([3.5713e-01, 7.4644e-01, 1.9443e-01, 4.9600e-01, 0.0000e+00, 2.1929e-05,
        8.8940e-02, 0.0000e+00, 3.5330e-02, 5.3988e-01, 1.0669e-01, 5.2662e-01,
        2.9486e-02, 0.0000e+00, 2.4750e-01, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0')
Epoch: 4


100%|██████████| 170/170 [05:50<00:00,  2.06s/it]


Epoch: 4, Average train loss per epoch: 0.7323588504510767
Epoch: 4, Average train jaccard_micro coeff per epoch: 0.6587399244308472
Epoch: 4, Average train jaccard_macro coeff per epoch: 0.26209864020347595
Epoch: 4, Average train jaccard_weighted coeff per epoch: 0.6641770601272583
Epoch: 4, Average train jaccard_none coeff per epoch: tensor([0.5567, 0.8446, 0.4384, 0.6661, 0.0030, 0.0019, 0.0918, 0.0072, 0.0909,
        0.7311, 0.2900, 0.7948, 0.1124, 0.0000, 0.6107, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0024], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=3, learning rate=0.0100
Epoch: 4, Average val loss per epoch: 1.1027249842882156
Epoch: 4, Average val jaccard_micro coeff per epoch: 0.5605801939964294
Epoch: 4, Average val jaccard_macro coeff per epoch: 0.1868899166584015
Epoch: 4, Average val jaccard_weighted coeff per epoch: 0.5788977742195129
Epoch: 4, Average val jaccard_none coeff per epoch: tensor([5.7608e-01, 8.0961e-01, 3.0585e-01, 4.9970e-01, 0.0000e+00, 5.7341e-05,
        6.3562e-02, 1.0062e-02, 2.0890e-02, 6.5105e-01, 1.8097e-01, 5.6920e-02,
        6.2694e-02, 0.0000e+00, 4.9516e-01, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 5.1886e-03], device='cuda:0')
Epoch: 5


100%|██████████| 170/170 [05:50<00:00,  2.06s/it]


Epoch: 5, Average train loss per epoch: 0.724364060864729
Epoch: 5, Average train jaccard_micro coeff per epoch: 0.6589308381080627
Epoch: 5, Average train jaccard_macro coeff per epoch: 0.2652660310268402
Epoch: 5, Average train jaccard_weighted coeff per epoch: 0.6650243401527405
Epoch: 5, Average train jaccard_none coeff per epoch: tensor([0.5568, 0.8444, 0.4346, 0.6638, 0.0061, 0.0050, 0.0970, 0.0169, 0.0939,
        0.7305, 0.2943, 0.7936, 0.1356, 0.0000, 0.6295, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0033], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.81s/it]


epoch=4, learning rate=0.0100
Epoch: 5, Average val loss per epoch: 1.210920823737979
Epoch: 5, Average val jaccard_micro coeff per epoch: 0.49364933371543884
Epoch: 5, Average val jaccard_macro coeff per epoch: 0.18591167032718658
Epoch: 5, Average val jaccard_weighted coeff per epoch: 0.5104719400405884
Epoch: 5, Average val jaccard_none coeff per epoch: tensor([4.2273e-01, 6.3498e-01, 1.6563e-01, 5.5212e-01, 6.8371e-04, 0.0000e+00,
        6.2619e-02, 3.0131e-02, 5.8455e-02, 5.7176e-01, 7.3788e-02, 6.9699e-01,
        9.3789e-02, 0.0000e+00, 3.5454e-01, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 1.1311e-05], device='cuda:0')
Epoch: 6


100%|██████████| 170/170 [05:50<00:00,  2.06s/it]


Epoch: 6, Average train loss per epoch: 0.6896605694995207
Epoch: 6, Average train jaccard_micro coeff per epoch: 0.6744740009307861
Epoch: 6, Average train jaccard_macro coeff per epoch: 0.2762463688850403
Epoch: 6, Average train jaccard_weighted coeff per epoch: 0.6815257668495178
Epoch: 6, Average train jaccard_none coeff per epoch: tensor([5.7620e-01, 8.6209e-01, 4.7194e-01, 6.7996e-01, 9.2271e-03, 6.2822e-03,
        1.1567e-01, 2.3571e-02, 1.0229e-01, 7.3313e-01, 3.0208e-01, 8.0063e-01,
        1.6541e-01, 0.0000e+00, 6.5546e-01, 0.0000e+00, 0.0000e+00, 2.4656e-05,
        0.0000e+00, 2.0966e-02], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=5, learning rate=0.0100
Epoch: 6, Average val loss per epoch: 0.8065466918051243
Epoch: 6, Average val jaccard_micro coeff per epoch: 0.6386040449142456
Epoch: 6, Average val jaccard_macro coeff per epoch: 0.23786428570747375
Epoch: 6, Average val jaccard_weighted coeff per epoch: 0.6478021740913391
Epoch: 6, Average val jaccard_none coeff per epoch: tensor([5.7989e-01, 8.2973e-01, 4.3188e-01, 6.2938e-01, 7.0285e-03, 9.6336e-03,
        1.3506e-01, 1.7791e-02, 5.5775e-02, 7.5984e-01, 2.3093e-01, 2.7375e-01,
        1.5481e-01, 0.0000e+00, 6.3473e-01, 0.0000e+00, 0.0000e+00, 1.5941e-06,
        0.0000e+00, 7.0605e-03], device='cuda:0')
Epoch: 7


100%|██████████| 170/170 [05:50<00:00,  2.06s/it]


Epoch: 7, Average train loss per epoch: 0.6497310009072809
Epoch: 7, Average train jaccard_micro coeff per epoch: 0.6889815926551819
Epoch: 7, Average train jaccard_macro coeff per epoch: 0.2867920994758606
Epoch: 7, Average train jaccard_weighted coeff per epoch: 0.6963203549385071
Epoch: 7, Average train jaccard_none coeff per epoch: tensor([5.9283e-01, 8.7222e-01, 4.9998e-01, 6.9843e-01, 1.1929e-02, 8.9356e-03,
        1.3513e-01, 2.4716e-02, 1.1135e-01, 7.4877e-01, 2.9802e-01, 8.1223e-01,
        1.9473e-01, 4.4767e-06, 6.7349e-01, 0.0000e+00, 0.0000e+00, 9.0840e-05,
        0.0000e+00, 5.2988e-02], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=6, learning rate=0.0100
Epoch: 7, Average val loss per epoch: 0.9056209418922663
Epoch: 7, Average val jaccard_micro coeff per epoch: 0.6241616010665894
Epoch: 7, Average val jaccard_macro coeff per epoch: 0.24331910908222198
Epoch: 7, Average val jaccard_weighted coeff per epoch: 0.6263072490692139
Epoch: 7, Average val jaccard_none coeff per epoch: tensor([0.4859, 0.7852, 0.3133, 0.6639, 0.0061, 0.0074, 0.1466, 0.0307, 0.0936,
        0.7518, 0.2366, 0.6380, 0.1426, 0.0000, 0.5454, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0192], device='cuda:0')
Epoch: 8


100%|██████████| 170/170 [05:50<00:00,  2.06s/it]


Epoch: 8, Average train loss per epoch: 0.614338272284059
Epoch: 8, Average train jaccard_micro coeff per epoch: 0.7032229900360107
Epoch: 8, Average train jaccard_macro coeff per epoch: 0.2991238832473755
Epoch: 8, Average train jaccard_weighted coeff per epoch: 0.7105661034584045
Epoch: 8, Average train jaccard_none coeff per epoch: tensor([6.0397e-01, 8.8345e-01, 5.2734e-01, 7.1026e-01, 1.7246e-02, 1.1307e-02,
        1.4897e-01, 2.7158e-02, 1.1050e-01, 7.7291e-01, 3.4080e-01, 8.1934e-01,
        2.1799e-01, 4.7603e-05, 7.0638e-01, 9.1899e-06, 0.0000e+00, 2.3681e-04,
        0.0000e+00, 8.4570e-02], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=7, learning rate=0.0100
Epoch: 8, Average val loss per epoch: 0.7356920130550861
Epoch: 8, Average val jaccard_micro coeff per epoch: 0.6642667651176453
Epoch: 8, Average val jaccard_macro coeff per epoch: 0.262725293636322
Epoch: 8, Average val jaccard_weighted coeff per epoch: 0.6674463748931885
Epoch: 8, Average val jaccard_none coeff per epoch: tensor([5.9296e-01, 8.3404e-01, 3.6790e-01, 6.8578e-01, 5.8561e-03, 7.0778e-03,
        1.4622e-01, 2.8005e-02, 9.4637e-02, 7.4076e-01, 2.0887e-01, 7.3743e-01,
        1.3845e-01, 5.9051e-04, 5.9576e-01, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 7.0163e-02], device='cuda:0')
Epoch: 9


100%|██████████| 170/170 [05:50<00:00,  2.06s/it]


Epoch: 9, Average train loss per epoch: 0.6033634857219808
Epoch: 9, Average train jaccard_micro coeff per epoch: 0.708669126033783
Epoch: 9, Average train jaccard_macro coeff per epoch: 0.3041031062602997
Epoch: 9, Average train jaccard_weighted coeff per epoch: 0.7173072695732117
Epoch: 9, Average train jaccard_none coeff per epoch: tensor([6.0814e-01, 8.9119e-01, 5.4102e-01, 7.1495e-01, 2.3080e-02, 1.4845e-02,
        1.6067e-01, 2.7405e-02, 1.1617e-01, 7.7693e-01, 3.2552e-01, 8.2046e-01,
        2.3677e-01, 1.1210e-04, 7.2024e-01, 1.9219e-04, 1.9131e-03, 3.5168e-04,
        0.0000e+00, 1.0212e-01], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=8, learning rate=0.0100
Epoch: 9, Average val loss per epoch: 0.9823757670819759
Epoch: 9, Average val jaccard_micro coeff per epoch: 0.5904474258422852
Epoch: 9, Average val jaccard_macro coeff per epoch: 0.2279975563287735
Epoch: 9, Average val jaccard_weighted coeff per epoch: 0.6111510992050171
Epoch: 9, Average val jaccard_none coeff per epoch: tensor([0.5552, 0.8025, 0.2667, 0.6203, 0.0137, 0.0035, 0.1097, 0.0303, 0.0848,
        0.6727, 0.2278, 0.5457, 0.1215, 0.0000, 0.4736, 0.0029, 0.0000, 0.0000,
        0.0000, 0.0290], device='cuda:0')
Epoch: 10


100%|██████████| 170/170 [05:50<00:00,  2.06s/it]


Epoch: 10, Average train loss per epoch: 0.6004817338550792
Epoch: 10, Average train jaccard_micro coeff per epoch: 0.709987998008728
Epoch: 10, Average train jaccard_macro coeff per epoch: 0.3055448532104492
Epoch: 10, Average train jaccard_weighted coeff per epoch: 0.7187395691871643
Epoch: 10, Average train jaccard_none coeff per epoch: tensor([6.1530e-01, 8.9308e-01, 5.3617e-01, 7.1430e-01, 2.1791e-02, 1.3756e-02,
        1.6016e-01, 2.7301e-02, 1.2442e-01, 7.7229e-01, 3.3305e-01, 8.2780e-01,
        2.3664e-01, 1.9103e-04, 7.2267e-01, 6.4121e-03, 2.5591e-04, 2.8448e-03,
        0.0000e+00, 1.0247e-01], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=9, learning rate=0.0100
Epoch: 10, Average val loss per epoch: 0.8080312237143517
Epoch: 10, Average val jaccard_micro coeff per epoch: 0.6341196298599243
Epoch: 10, Average val jaccard_macro coeff per epoch: 0.2521054148674011
Epoch: 10, Average val jaccard_weighted coeff per epoch: 0.6532480716705322
Epoch: 10, Average val jaccard_none coeff per epoch: tensor([6.0357e-01, 8.4900e-01, 4.5639e-01, 6.3330e-01, 1.1604e-02, 5.0869e-03,
        1.5977e-01, 2.1177e-02, 1.0294e-01, 6.4612e-01, 2.0354e-01, 5.8994e-01,
        1.1732e-01, 0.0000e+00, 6.1704e-01, 0.0000e+00, 0.0000e+00, 2.4515e-04,
        0.0000e+00, 2.5073e-02], device='cuda:0')
Epoch: 11


100%|██████████| 170/170 [05:50<00:00,  2.06s/it]


Epoch: 11, Average train loss per epoch: 0.5707700590876972
Epoch: 11, Average train jaccard_micro coeff per epoch: 0.7197163701057434
Epoch: 11, Average train jaccard_macro coeff per epoch: 0.31403541564941406
Epoch: 11, Average train jaccard_weighted coeff per epoch: 0.728569746017456
Epoch: 11, Average train jaccard_none coeff per epoch: tensor([6.2121e-01, 9.0130e-01, 5.6654e-01, 7.2504e-01, 2.7682e-02, 2.0710e-02,
        1.6967e-01, 2.9153e-02, 1.3061e-01, 7.7954e-01, 3.3693e-01, 8.2797e-01,
        2.5933e-01, 5.1531e-04, 7.4087e-01, 9.1668e-03, 3.0214e-03, 5.2063e-03,
        0.0000e+00, 1.2624e-01], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=10, learning rate=0.0100
Epoch: 11, Average val loss per epoch: 0.6580816190689802
Epoch: 11, Average val jaccard_micro coeff per epoch: 0.6765474081039429
Epoch: 11, Average val jaccard_macro coeff per epoch: 0.28344812989234924
Epoch: 11, Average val jaccard_weighted coeff per epoch: 0.6941401958465576
Epoch: 11, Average val jaccard_none coeff per epoch: tensor([0.6083, 0.8389, 0.4384, 0.7127, 0.0216, 0.0162, 0.1696, 0.0314, 0.1290,
        0.7790, 0.2075, 0.7544, 0.1913, 0.0043, 0.6601, 0.0000, 0.0016, 0.0000,
        0.0000, 0.1046], device='cuda:0')
Epoch: 12


100%|██████████| 170/170 [05:50<00:00,  2.06s/it]


Epoch: 12, Average train loss per epoch: 0.5579949806718265
Epoch: 12, Average train jaccard_micro coeff per epoch: 0.7242850661277771
Epoch: 12, Average train jaccard_macro coeff per epoch: 0.3199828267097473
Epoch: 12, Average train jaccard_weighted coeff per epoch: 0.7332393527030945
Epoch: 12, Average train jaccard_none coeff per epoch: tensor([0.6187, 0.9036, 0.5729, 0.7327, 0.0397, 0.0266, 0.1760, 0.0298, 0.1361,
        0.7897, 0.3406, 0.8354, 0.2740, 0.0013, 0.7554, 0.0039, 0.0040, 0.0213,
        0.0000, 0.1379], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=11, learning rate=0.0100
Epoch: 12, Average val loss per epoch: 0.7485984694212675
Epoch: 12, Average val jaccard_micro coeff per epoch: 0.6533122062683105
Epoch: 12, Average val jaccard_macro coeff per epoch: 0.2710716426372528
Epoch: 12, Average val jaccard_weighted coeff per epoch: 0.6715697050094604
Epoch: 12, Average val jaccard_none coeff per epoch: tensor([5.5783e-01, 8.6591e-01, 4.6639e-01, 6.1544e-01, 9.6826e-03, 1.6671e-02,
        1.3908e-01, 2.4707e-02, 1.0291e-01, 7.3994e-01, 2.4182e-01, 7.3830e-01,
        1.2992e-01, 1.5409e-04, 6.8169e-01, 0.0000e+00, 1.9051e-02, 1.7347e-03,
        0.0000e+00, 7.0217e-02], device='cuda:0')
Epoch: 13


100%|██████████| 170/170 [05:50<00:00,  2.06s/it]


Epoch: 13, Average train loss per epoch: 0.5442551916136461
Epoch: 13, Average train jaccard_micro coeff per epoch: 0.7291032671928406
Epoch: 13, Average train jaccard_macro coeff per epoch: 0.3258885145187378
Epoch: 13, Average train jaccard_weighted coeff per epoch: 0.7389941811561584
Epoch: 13, Average train jaccard_none coeff per epoch: tensor([0.6260, 0.9054, 0.5850, 0.7391, 0.0455, 0.0418, 0.1816, 0.0310, 0.1393,
        0.7952, 0.3492, 0.8348, 0.2853, 0.0017, 0.7615, 0.0077, 0.0185, 0.0237,
        0.0000, 0.1456], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.83s/it]


epoch=12, learning rate=0.0100
Epoch: 13, Average val loss per epoch: 0.9647657182067633
Epoch: 13, Average val jaccard_micro coeff per epoch: 0.532433032989502
Epoch: 13, Average val jaccard_macro coeff per epoch: 0.24043403565883636
Epoch: 13, Average val jaccard_weighted coeff per epoch: 0.5717300176620483
Epoch: 13, Average val jaccard_none coeff per epoch: tensor([5.9291e-01, 6.0646e-01, 1.7660e-01, 5.7265e-01, 2.2504e-02, 4.2046e-04,
        1.2217e-01, 3.0566e-02, 1.0662e-01, 7.5631e-01, 1.6791e-01, 7.7094e-01,
        1.5135e-01, 8.8112e-05, 7.0584e-01, 2.7557e-03, 1.1222e-02, 6.5552e-03,
        0.0000e+00, 4.8099e-03], device='cuda:0')
Epoch: 14


100%|██████████| 170/170 [05:50<00:00,  2.06s/it]


Epoch: 14, Average train loss per epoch: 0.5369819111683789
Epoch: 14, Average train jaccard_micro coeff per epoch: 0.7321732044219971
Epoch: 14, Average train jaccard_macro coeff per epoch: 0.3284119963645935
Epoch: 14, Average train jaccard_weighted coeff per epoch: 0.7422026991844177
Epoch: 14, Average train jaccard_none coeff per epoch: tensor([0.6334, 0.9077, 0.5865, 0.7398, 0.0437, 0.0400, 0.1848, 0.0319, 0.1393,
        0.7983, 0.3522, 0.8368, 0.2853, 0.0009, 0.7684, 0.0070, 0.0239, 0.0356,
        0.0000, 0.1527], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=13, learning rate=0.0100
Epoch: 14, Average val loss per epoch: 0.714334961026907
Epoch: 14, Average val jaccard_micro coeff per epoch: 0.6549804210662842
Epoch: 14, Average val jaccard_macro coeff per epoch: 0.277132511138916
Epoch: 14, Average val jaccard_weighted coeff per epoch: 0.6735325455665588
Epoch: 14, Average val jaccard_none coeff per epoch: tensor([5.5582e-01, 8.2281e-01, 4.3307e-01, 6.9455e-01, 2.8350e-02, 1.9915e-02,
        1.8808e-01, 3.1558e-02, 1.1349e-01, 7.5938e-01, 2.1747e-01, 6.4777e-01,
        2.1395e-01, 5.0249e-04, 6.8027e-01, 1.3382e-04, 3.9736e-06, 0.0000e+00,
        0.0000e+00, 1.3554e-01], device='cuda:0')
Epoch: 15


100%|██████████| 170/170 [05:50<00:00,  2.06s/it]


Epoch: 15, Average train loss per epoch: 0.5268919720369227
Epoch: 15, Average train jaccard_micro coeff per epoch: 0.7358540892601013
Epoch: 15, Average train jaccard_macro coeff per epoch: 0.3361176550388336
Epoch: 15, Average train jaccard_weighted coeff per epoch: 0.7470923662185669
Epoch: 15, Average train jaccard_none coeff per epoch: tensor([6.3208e-01, 9.1241e-01, 6.0194e-01, 7.4432e-01, 5.1604e-02, 5.9752e-02,
        1.9490e-01, 3.3305e-02, 1.4635e-01, 7.9479e-01, 3.5922e-01, 8.4297e-01,
        3.0720e-01, 1.9535e-03, 7.7795e-01, 1.6280e-02, 3.6035e-02, 4.2129e-02,
        6.1100e-05, 1.6711e-01], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


Epoch 00015: reducing learning rate of group 0 to 5.0000e-03.
epoch=14, learning rate=0.0050
Epoch: 15, Average val loss per epoch: 0.8792250584810972
Epoch: 15, Average val jaccard_micro coeff per epoch: 0.6081090569496155
Epoch: 15, Average val jaccard_macro coeff per epoch: 0.22348101437091827
Epoch: 15, Average val jaccard_weighted coeff per epoch: 0.6298875212669373
Epoch: 15, Average val jaccard_none coeff per epoch: tensor([5.4973e-01, 8.3677e-01, 2.7800e-01, 6.2644e-01, 1.6952e-02, 7.3123e-03,
        1.4267e-01, 3.7803e-02, 8.6719e-02, 6.9974e-01, 1.4775e-01, 1.2479e-01,
        1.2587e-01, 2.2760e-03, 6.6611e-01, 0.0000e+00, 6.1937e-02, 4.9590e-05,
        0.0000e+00, 5.8703e-02], device='cuda:0')
Epoch: 16


100%|██████████| 170/170 [05:50<00:00,  2.06s/it]


Epoch: 16, Average train loss per epoch: 0.4961172070573358
Epoch: 16, Average train jaccard_micro coeff per epoch: 0.7485077381134033
Epoch: 16, Average train jaccard_macro coeff per epoch: 0.34867939352989197
Epoch: 16, Average train jaccard_weighted coeff per epoch: 0.7599374651908875
Epoch: 16, Average train jaccard_none coeff per epoch: tensor([6.5257e-01, 9.2328e-01, 6.3018e-01, 7.5751e-01, 7.2331e-02, 7.7678e-02,
        1.9768e-01, 3.4531e-02, 1.4848e-01, 8.0268e-01, 3.6921e-01, 8.4449e-01,
        3.2741e-01, 1.8669e-03, 8.0325e-01, 1.7026e-02, 5.2656e-02, 7.5183e-02,
        1.0904e-05, 1.8556e-01], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=15, learning rate=0.0050
Epoch: 16, Average val loss per epoch: 0.5558015238493681
Epoch: 16, Average val jaccard_micro coeff per epoch: 0.7137869596481323
Epoch: 16, Average val jaccard_macro coeff per epoch: 0.3262580931186676
Epoch: 16, Average val jaccard_weighted coeff per epoch: 0.7331889867782593
Epoch: 16, Average val jaccard_none coeff per epoch: tensor([6.2679e-01, 8.6132e-01, 5.5098e-01, 7.4093e-01, 2.3163e-02, 5.6310e-02,
        2.2836e-01, 3.7035e-02, 1.3683e-01, 8.2253e-01, 2.7378e-01, 7.6485e-01,
        2.6508e-01, 1.5856e-03, 7.8945e-01, 3.8313e-03, 1.0583e-01, 3.6140e-02,
        1.0844e-04, 2.0025e-01], device='cuda:0')
Epoch: 17


100%|██████████| 170/170 [05:50<00:00,  2.06s/it]


Epoch: 17, Average train loss per epoch: 0.4639701077166726
Epoch: 17, Average train jaccard_micro coeff per epoch: 0.7626915574073792
Epoch: 17, Average train jaccard_macro coeff per epoch: 0.3634355366230011
Epoch: 17, Average train jaccard_weighted coeff per epoch: 0.7749332189559937
Epoch: 17, Average train jaccard_none coeff per epoch: tensor([6.7213e-01, 9.3027e-01, 6.5096e-01, 7.7400e-01, 7.6189e-02, 1.1220e-01,
        2.1591e-01, 4.1374e-02, 1.6492e-01, 8.2157e-01, 3.8363e-01, 8.6212e-01,
        3.4323e-01, 2.5000e-03, 8.2123e-01, 1.7523e-02, 7.0646e-02, 9.9412e-02,
        1.2053e-04, 2.0878e-01], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=16, learning rate=0.0050
Epoch: 17, Average val loss per epoch: 0.5565811153501272
Epoch: 17, Average val jaccard_micro coeff per epoch: 0.7192165851593018
Epoch: 17, Average val jaccard_macro coeff per epoch: 0.3276391327381134
Epoch: 17, Average val jaccard_weighted coeff per epoch: 0.7362761497497559
Epoch: 17, Average val jaccard_none coeff per epoch: tensor([6.4462e-01, 8.7857e-01, 5.4755e-01, 7.3380e-01, 5.5839e-02, 4.1197e-02,
        2.2469e-01, 3.2864e-02, 1.2383e-01, 8.1385e-01, 2.6411e-01, 7.9842e-01,
        2.5161e-01, 4.7712e-04, 7.8303e-01, 8.9145e-02, 5.2974e-02, 1.5515e-02,
        5.7567e-06, 2.0070e-01], device='cuda:0')
Epoch: 18


100%|██████████| 170/170 [05:50<00:00,  2.06s/it]


Epoch: 18, Average train loss per epoch: 0.4573001149822684
Epoch: 18, Average train jaccard_micro coeff per epoch: 0.7653939723968506
Epoch: 18, Average train jaccard_macro coeff per epoch: 0.3703640401363373
Epoch: 18, Average train jaccard_weighted coeff per epoch: 0.778159499168396
Epoch: 18, Average train jaccard_none coeff per epoch: tensor([6.7761e-01, 9.3243e-01, 6.5503e-01, 7.7503e-01, 9.6162e-02, 1.1181e-01,
        2.2185e-01, 4.2919e-02, 1.7070e-01, 8.1980e-01, 3.9082e-01, 8.6128e-01,
        3.5037e-01, 1.8396e-03, 8.1876e-01, 6.4944e-02, 7.5450e-02, 1.2285e-01,
        6.0842e-04, 2.1702e-01], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=17, learning rate=0.0050
Epoch: 18, Average val loss per epoch: 0.5757907554507256
Epoch: 18, Average val jaccard_micro coeff per epoch: 0.719700813293457
Epoch: 18, Average val jaccard_macro coeff per epoch: 0.3341640532016754
Epoch: 18, Average val jaccard_weighted coeff per epoch: 0.7352235913276672
Epoch: 18, Average val jaccard_none coeff per epoch: tensor([6.3207e-01, 8.7975e-01, 5.3849e-01, 7.4115e-01, 4.3513e-02, 7.0022e-02,
        2.0574e-01, 3.6195e-02, 1.3325e-01, 8.2450e-01, 2.6711e-01, 7.6554e-01,
        2.6260e-01, 7.1040e-04, 7.6356e-01, 2.1397e-02, 2.4185e-01, 1.5722e-02,
        1.3494e-03, 2.3876e-01], device='cuda:0')
Epoch: 19


100%|██████████| 170/170 [05:50<00:00,  2.06s/it]


Epoch: 19, Average train loss per epoch: 0.4496535899008022
Epoch: 19, Average train jaccard_micro coeff per epoch: 0.7683530449867249
Epoch: 19, Average train jaccard_macro coeff per epoch: 0.3729591369628906
Epoch: 19, Average train jaccard_weighted coeff per epoch: 0.7812829613685608
Epoch: 19, Average train jaccard_none coeff per epoch: tensor([0.6852, 0.9361, 0.6576, 0.7820, 0.0928, 0.1257, 0.2279, 0.0446, 0.1743,
        0.8188, 0.3826, 0.8600, 0.3578, 0.0018, 0.8283, 0.0531, 0.0780, 0.1132,
        0.0014, 0.2381], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=18, learning rate=0.0050
Epoch: 19, Average val loss per epoch: 0.7046905159950256
Epoch: 19, Average val jaccard_micro coeff per epoch: 0.6762773394584656
Epoch: 19, Average val jaccard_macro coeff per epoch: 0.29434219002723694
Epoch: 19, Average val jaccard_weighted coeff per epoch: 0.6998985409736633
Epoch: 19, Average val jaccard_none coeff per epoch: tensor([6.1526e-01, 8.8642e-01, 5.0317e-01, 6.6864e-01, 5.7388e-02, 7.9913e-02,
        2.1924e-01, 3.6056e-02, 1.3211e-01, 7.9631e-01, 2.6969e-01, 2.9504e-01,
        2.2258e-01, 5.3424e-05, 7.3204e-01, 3.0187e-02, 1.2301e-01, 2.5831e-03,
        3.8633e-06, 2.1715e-01], device='cuda:0')
Epoch: 20


100%|██████████| 170/170 [05:50<00:00,  2.06s/it]


Epoch: 20, Average train loss per epoch: 0.4476991476381526
Epoch: 20, Average train jaccard_micro coeff per epoch: 0.7693697810173035
Epoch: 20, Average train jaccard_macro coeff per epoch: 0.3779727816581726
Epoch: 20, Average train jaccard_weighted coeff per epoch: 0.7820810675621033
Epoch: 20, Average train jaccard_none coeff per epoch: tensor([6.8578e-01, 9.3448e-01, 6.5743e-01, 7.8069e-01, 9.9028e-02, 1.3540e-01,
        2.3193e-01, 4.4082e-02, 1.7990e-01, 8.2250e-01, 3.9975e-01, 8.5306e-01,
        3.7017e-01, 2.9426e-03, 8.2925e-01, 5.2376e-02, 9.7880e-02, 1.2813e-01,
        8.6099e-04, 2.5382e-01], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


Epoch 00020: reducing learning rate of group 0 to 2.5000e-03.
epoch=19, learning rate=0.0025
Epoch: 20, Average val loss per epoch: 0.5580443823710084
Epoch: 20, Average val jaccard_micro coeff per epoch: 0.7196078896522522
Epoch: 20, Average val jaccard_macro coeff per epoch: 0.33575186133384705
Epoch: 20, Average val jaccard_weighted coeff per epoch: 0.738823652267456
Epoch: 20, Average val jaccard_none coeff per epoch: tensor([6.0716e-01, 8.8172e-01, 5.2515e-01, 7.6939e-01, 4.4103e-02, 1.0223e-01,
        2.3918e-01, 3.9508e-02, 1.4565e-01, 8.1857e-01, 2.7114e-01, 7.4160e-01,
        2.8740e-01, 6.5087e-03, 7.9479e-01, 2.1155e-02, 1.5604e-01, 6.8584e-03,
        1.4556e-04, 2.5675e-01], device='cuda:0')
Epoch: 21


100%|██████████| 170/170 [05:50<00:00,  2.06s/it]


Epoch: 21, Average train loss per epoch: 0.4106828989351497
Epoch: 21, Average train jaccard_micro coeff per epoch: 0.7863829731941223
Epoch: 21, Average train jaccard_macro coeff per epoch: 0.3962056040763855
Epoch: 21, Average train jaccard_weighted coeff per epoch: 0.7992482781410217
Epoch: 21, Average train jaccard_none coeff per epoch: tensor([0.7140, 0.9460, 0.6867, 0.7992, 0.1091, 0.1740, 0.2477, 0.0498, 0.1912,
        0.8318, 0.4082, 0.8729, 0.3896, 0.0037, 0.8463, 0.0767, 0.1497, 0.1417,
        0.0032, 0.2827], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=20, learning rate=0.0025
Epoch: 21, Average val loss per epoch: 0.5480948155745864
Epoch: 21, Average val jaccard_micro coeff per epoch: 0.7368884682655334
Epoch: 21, Average val jaccard_macro coeff per epoch: 0.3473883867263794
Epoch: 21, Average val jaccard_weighted coeff per epoch: 0.7544202208518982
Epoch: 21, Average val jaccard_none coeff per epoch: tensor([0.6567, 0.8969, 0.5834, 0.7619, 0.0895, 0.0714, 0.2556, 0.0489, 0.1840,
        0.8288, 0.2771, 0.7649, 0.2636, 0.0093, 0.8144, 0.0435, 0.0766, 0.0707,
        0.0023, 0.2481], device='cuda:0')
Epoch: 22


100%|██████████| 170/170 [05:51<00:00,  2.07s/it]


Epoch: 22, Average train loss per epoch: 0.39673411302706774
Epoch: 22, Average train jaccard_micro coeff per epoch: 0.7932957410812378
Epoch: 22, Average train jaccard_macro coeff per epoch: 0.40393009781837463
Epoch: 22, Average train jaccard_weighted coeff per epoch: 0.8058213591575623
Epoch: 22, Average train jaccard_none coeff per epoch: tensor([0.7271, 0.9490, 0.7023, 0.8054, 0.1325, 0.1760, 0.2532, 0.0567, 0.2010,
        0.8382, 0.4121, 0.8769, 0.4005, 0.0045, 0.8564, 0.0951, 0.1456, 0.1494,
        0.0053, 0.2914], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=21, learning rate=0.0025
Epoch: 22, Average val loss per epoch: 0.5339144747704268
Epoch: 22, Average val jaccard_micro coeff per epoch: 0.7374980449676514
Epoch: 22, Average val jaccard_macro coeff per epoch: 0.3538372218608856
Epoch: 22, Average val jaccard_weighted coeff per epoch: 0.7542576789855957
Epoch: 22, Average val jaccard_none coeff per epoch: tensor([0.6476, 0.8942, 0.5839, 0.7665, 0.0707, 0.1051, 0.2275, 0.0401, 0.1565,
        0.8275, 0.2976, 0.8073, 0.3253, 0.0033, 0.7912, 0.0557, 0.1148, 0.0573,
        0.0114, 0.2931], device='cuda:0')
Epoch: 23


100%|██████████| 170/170 [05:51<00:00,  2.07s/it]


Epoch: 23, Average train loss per epoch: 0.38697393326198354
Epoch: 23, Average train jaccard_micro coeff per epoch: 0.7982106804847717
Epoch: 23, Average train jaccard_macro coeff per epoch: 0.41002964973449707
Epoch: 23, Average train jaccard_weighted coeff per epoch: 0.810943067073822
Epoch: 23, Average train jaccard_none coeff per epoch: tensor([0.7367, 0.9514, 0.7099, 0.8112, 0.1386, 0.1865, 0.2584, 0.0590, 0.2090,
        0.8341, 0.4220, 0.8769, 0.4156, 0.0047, 0.8615, 0.1098, 0.1560, 0.1386,
        0.0130, 0.3077], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=22, learning rate=0.0025
Epoch: 23, Average val loss per epoch: 0.5269262790679932
Epoch: 23, Average val jaccard_micro coeff per epoch: 0.7426995038986206
Epoch: 23, Average val jaccard_macro coeff per epoch: 0.35984063148498535
Epoch: 23, Average val jaccard_weighted coeff per epoch: 0.7590751647949219
Epoch: 23, Average val jaccard_none coeff per epoch: tensor([0.6554, 0.8977, 0.5846, 0.7612, 0.0976, 0.0932, 0.2643, 0.0489, 0.1771,
        0.8381, 0.3027, 0.8095, 0.3119, 0.0081, 0.8194, 0.1172, 0.0835, 0.0265,
        0.0029, 0.2969], device='cuda:0')
Epoch: 24


100%|██████████| 170/170 [05:51<00:00,  2.07s/it]


Epoch: 24, Average train loss per epoch: 0.37834440680111153
Epoch: 24, Average train jaccard_micro coeff per epoch: 0.8017686009407043
Epoch: 24, Average train jaccard_macro coeff per epoch: 0.4186273217201233
Epoch: 24, Average train jaccard_weighted coeff per epoch: 0.813805103302002
Epoch: 24, Average train jaccard_none coeff per epoch: tensor([0.7397, 0.9518, 0.7203, 0.8145, 0.1462, 0.1980, 0.2668, 0.0647, 0.2137,
        0.8413, 0.4335, 0.8778, 0.4167, 0.0059, 0.8648, 0.1554, 0.1659, 0.1606,
        0.0160, 0.3190], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=23, learning rate=0.0025
Epoch: 24, Average val loss per epoch: 0.5260936990380287
Epoch: 24, Average val jaccard_micro coeff per epoch: 0.743682861328125
Epoch: 24, Average val jaccard_macro coeff per epoch: 0.3646724820137024
Epoch: 24, Average val jaccard_weighted coeff per epoch: 0.7606361508369446
Epoch: 24, Average val jaccard_none coeff per epoch: tensor([0.6464, 0.8963, 0.5795, 0.7733, 0.0957, 0.1036, 0.2643, 0.0587, 0.1869,
        0.8406, 0.3004, 0.8153, 0.3216, 0.0040, 0.8154, 0.0896, 0.1565, 0.0172,
        0.0020, 0.3262], device='cuda:0')
Epoch: 25


100%|██████████| 170/170 [05:51<00:00,  2.07s/it]


Epoch: 25, Average train loss per epoch: 0.36883697264334736
Epoch: 25, Average train jaccard_micro coeff per epoch: 0.8061206936836243
Epoch: 25, Average train jaccard_macro coeff per epoch: 0.42479830980300903
Epoch: 25, Average train jaccard_weighted coeff per epoch: 0.8178272247314453
Epoch: 25, Average train jaccard_none coeff per epoch: tensor([0.7466, 0.9536, 0.7229, 0.8196, 0.1567, 0.1993, 0.2715, 0.0745, 0.2229,
        0.8453, 0.4330, 0.8783, 0.4216, 0.0055, 0.8694, 0.1642, 0.1723, 0.1864,
        0.0161, 0.3363], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=24, learning rate=0.0025
Epoch: 25, Average val loss per epoch: 0.5347758531570435
Epoch: 25, Average val jaccard_micro coeff per epoch: 0.7414416670799255
Epoch: 25, Average val jaccard_macro coeff per epoch: 0.3657659590244293
Epoch: 25, Average val jaccard_weighted coeff per epoch: 0.7599680423736572
Epoch: 25, Average val jaccard_none coeff per epoch: tensor([0.6464, 0.8930, 0.5821, 0.7810, 0.0916, 0.1133, 0.2621, 0.0562, 0.1931,
        0.8342, 0.3135, 0.8016, 0.3512, 0.0022, 0.8057, 0.0915, 0.1675, 0.0125,
        0.0027, 0.3139], device='cuda:0')
Epoch: 26


100%|██████████| 170/170 [05:50<00:00,  2.06s/it]


Epoch: 26, Average train loss per epoch: 0.36310187508078184
Epoch: 26, Average train jaccard_micro coeff per epoch: 0.8093799352645874
Epoch: 26, Average train jaccard_macro coeff per epoch: 0.428161084651947
Epoch: 26, Average train jaccard_weighted coeff per epoch: 0.8212041854858398
Epoch: 26, Average train jaccard_none coeff per epoch: tensor([0.7534, 0.9553, 0.7311, 0.8193, 0.1669, 0.2198, 0.2764, 0.0758, 0.2300,
        0.8483, 0.4426, 0.8823, 0.4323, 0.0073, 0.8708, 0.1484, 0.1657, 0.1782,
        0.0223, 0.3368], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=25, learning rate=0.0025
Epoch: 26, Average val loss per epoch: 0.5132983587682247
Epoch: 26, Average val jaccard_micro coeff per epoch: 0.7459722757339478
Epoch: 26, Average val jaccard_macro coeff per epoch: 0.3678942918777466
Epoch: 26, Average val jaccard_weighted coeff per epoch: 0.7622369527816772
Epoch: 26, Average val jaccard_none coeff per epoch: tensor([0.6588, 0.8944, 0.5739, 0.7724, 0.0582, 0.1058, 0.2758, 0.0649, 0.1904,
        0.8424, 0.2998, 0.8102, 0.3336, 0.0034, 0.8129, 0.1351, 0.1363, 0.0491,
        0.0056, 0.3351], device='cuda:0')
Epoch: 27


100%|██████████| 170/170 [05:51<00:00,  2.07s/it]


Epoch: 27, Average train loss per epoch: 0.357814522876459
Epoch: 27, Average train jaccard_micro coeff per epoch: 0.8112788796424866
Epoch: 27, Average train jaccard_macro coeff per epoch: 0.4325028955936432
Epoch: 27, Average train jaccard_weighted coeff per epoch: 0.8228559494018555
Epoch: 27, Average train jaccard_none coeff per epoch: tensor([0.7541, 0.9546, 0.7391, 0.8257, 0.1676, 0.2177, 0.2798, 0.0851, 0.2331,
        0.8501, 0.4465, 0.8818, 0.4334, 0.0061, 0.8663, 0.1780, 0.1703, 0.1841,
        0.0293, 0.3474], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=26, learning rate=0.0025
Epoch: 27, Average val loss per epoch: 0.5422098031267524
Epoch: 27, Average val jaccard_micro coeff per epoch: 0.7441984415054321
Epoch: 27, Average val jaccard_macro coeff per epoch: 0.3705058991909027
Epoch: 27, Average val jaccard_weighted coeff per epoch: 0.7611163258552551
Epoch: 27, Average val jaccard_none coeff per epoch: tensor([6.4207e-01, 8.9883e-01, 5.8917e-01, 7.7438e-01, 9.8860e-02, 1.0413e-01,
        2.7924e-01, 6.0992e-02, 1.8986e-01, 8.3751e-01, 3.2311e-01, 8.0658e-01,
        3.5483e-01, 3.6721e-03, 7.9626e-01, 8.1256e-02, 1.9231e-01, 3.9584e-02,
        6.3506e-04, 3.3682e-01], device='cuda:0')
Epoch: 28


100%|██████████| 170/170 [05:51<00:00,  2.07s/it]


Epoch: 28, Average train loss per epoch: 0.3606960563098683
Epoch: 28, Average train jaccard_micro coeff per epoch: 0.8094006776809692
Epoch: 28, Average train jaccard_macro coeff per epoch: 0.4326479732990265
Epoch: 28, Average train jaccard_weighted coeff per epoch: 0.8206456303596497
Epoch: 28, Average train jaccard_none coeff per epoch: tensor([0.7455, 0.9511, 0.7218, 0.8273, 0.1667, 0.2173, 0.2820, 0.0911, 0.2375,
        0.8498, 0.4397, 0.8799, 0.4401, 0.0088, 0.8704, 0.1824, 0.1785, 0.1895,
        0.0202, 0.3534], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=27, learning rate=0.0025
Epoch: 28, Average val loss per epoch: 0.568017041310668
Epoch: 28, Average val jaccard_micro coeff per epoch: 0.7418608069419861
Epoch: 28, Average val jaccard_macro coeff per epoch: 0.3683677315711975
Epoch: 28, Average val jaccard_weighted coeff per epoch: 0.7574723362922668
Epoch: 28, Average val jaccard_none coeff per epoch: tensor([0.6364, 0.8957, 0.5770, 0.7702, 0.1027, 0.1040, 0.2797, 0.0793, 0.2029,
        0.8409, 0.3085, 0.8052, 0.3563, 0.0048, 0.7790, 0.1373, 0.1764, 0.0209,
        0.0082, 0.2820], device='cuda:0')
Epoch: 29


100%|██████████| 170/170 [05:51<00:00,  2.07s/it]


Epoch: 29, Average train loss per epoch: 0.3501046457711388
Epoch: 29, Average train jaccard_micro coeff per epoch: 0.8153645396232605
Epoch: 29, Average train jaccard_macro coeff per epoch: 0.4400392174720764
Epoch: 29, Average train jaccard_weighted coeff per epoch: 0.8264805674552917
Epoch: 29, Average train jaccard_none coeff per epoch: tensor([0.7608, 0.9559, 0.7395, 0.8318, 0.1827, 0.2278, 0.2913, 0.0966, 0.2427,
        0.8516, 0.4483, 0.8797, 0.4514, 0.0081, 0.8646, 0.1894, 0.2008, 0.1819,
        0.0308, 0.3650], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=28, learning rate=0.0025
Epoch: 29, Average val loss per epoch: 0.5842328742146492
Epoch: 29, Average val jaccard_micro coeff per epoch: 0.7366594672203064
Epoch: 29, Average val jaccard_macro coeff per epoch: 0.3604680299758911
Epoch: 29, Average val jaccard_weighted coeff per epoch: 0.753997802734375
Epoch: 29, Average val jaccard_none coeff per epoch: tensor([0.6568, 0.8932, 0.5771, 0.7576, 0.0872, 0.1066, 0.2790, 0.0716, 0.1657,
        0.8361, 0.3073, 0.6835, 0.2844, 0.0112, 0.8295, 0.1132, 0.1889, 0.0316,
        0.0108, 0.3182], device='cuda:0')
Epoch: 30


100%|██████████| 170/170 [05:53<00:00,  2.08s/it]


Epoch: 30, Average train loss per epoch: 0.35316210652098934
Epoch: 30, Average train jaccard_micro coeff per epoch: 0.8151144981384277
Epoch: 30, Average train jaccard_macro coeff per epoch: 0.4414293169975281
Epoch: 30, Average train jaccard_weighted coeff per epoch: 0.8262420892715454
Epoch: 30, Average train jaccard_none coeff per epoch: tensor([0.7620, 0.9572, 0.7496, 0.8277, 0.1867, 0.2407, 0.2868, 0.0953, 0.2354,
        0.8501, 0.4600, 0.8647, 0.4497, 0.0101, 0.8727, 0.1966, 0.1973, 0.1773,
        0.0332, 0.3754], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


Epoch 00030: reducing learning rate of group 0 to 1.2500e-03.
epoch=29, learning rate=0.0013
Epoch: 30, Average val loss per epoch: 0.5913199130445719
Epoch: 30, Average val jaccard_micro coeff per epoch: 0.7274091839790344
Epoch: 30, Average val jaccard_macro coeff per epoch: 0.35394182801246643
Epoch: 30, Average val jaccard_weighted coeff per epoch: 0.7435646057128906
Epoch: 30, Average val jaccard_none coeff per epoch: tensor([0.5977, 0.8928, 0.5651, 0.7562, 0.0872, 0.1147, 0.2799, 0.0678, 0.1815,
        0.8416, 0.3067, 0.5131, 0.3520, 0.0151, 0.8185, 0.1037, 0.1705, 0.0711,
        0.0094, 0.3343], device='cuda:0')
Epoch: 31


100%|██████████| 170/170 [05:51<00:00,  2.07s/it]


Epoch: 31, Average train loss per epoch: 0.32544354077647714
Epoch: 31, Average train jaccard_micro coeff per epoch: 0.8280414342880249
Epoch: 31, Average train jaccard_macro coeff per epoch: 0.46232104301452637
Epoch: 31, Average train jaccard_weighted coeff per epoch: 0.8382624983787537
Epoch: 31, Average train jaccard_none coeff per epoch: tensor([0.7815, 0.9616, 0.7707, 0.8438, 0.2053, 0.2611, 0.3037, 0.1039, 0.2575,
        0.8590, 0.4657, 0.8801, 0.4744, 0.0175, 0.8855, 0.2579, 0.2493, 0.2143,
        0.0500, 0.4034], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=30, learning rate=0.0013
Epoch: 31, Average val loss per epoch: 0.5159693844616413
Epoch: 31, Average val jaccard_micro coeff per epoch: 0.7527158260345459
Epoch: 31, Average val jaccard_macro coeff per epoch: 0.380504310131073
Epoch: 31, Average val jaccard_weighted coeff per epoch: 0.7682416439056396
Epoch: 31, Average val jaccard_none coeff per epoch: tensor([0.6497, 0.9007, 0.5920, 0.7843, 0.1078, 0.1121, 0.2955, 0.0679, 0.1993,
        0.8407, 0.3179, 0.8195, 0.3736, 0.0080, 0.8181, 0.1458, 0.1595, 0.0507,
        0.0168, 0.3500], device='cuda:0')
Epoch: 32


100%|██████████| 170/170 [05:51<00:00,  2.07s/it]


Epoch: 32, Average train loss per epoch: 0.31501160369199865
Epoch: 32, Average train jaccard_micro coeff per epoch: 0.833471417427063
Epoch: 32, Average train jaccard_macro coeff per epoch: 0.47004181146621704
Epoch: 32, Average train jaccard_weighted coeff per epoch: 0.8432018756866455
Epoch: 32, Average train jaccard_none coeff per epoch: tensor([0.7925, 0.9629, 0.7785, 0.8463, 0.2209, 0.2715, 0.3118, 0.1110, 0.2645,
        0.8624, 0.4868, 0.8887, 0.4741, 0.0194, 0.8876, 0.2805, 0.2480, 0.2246,
        0.0579, 0.4109], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=31, learning rate=0.0013
Epoch: 32, Average val loss per epoch: 0.5295664006844163
Epoch: 32, Average val jaccard_micro coeff per epoch: 0.7502835988998413
Epoch: 32, Average val jaccard_macro coeff per epoch: 0.38280150294303894
Epoch: 32, Average val jaccard_weighted coeff per epoch: 0.767075777053833
Epoch: 32, Average val jaccard_none coeff per epoch: tensor([0.6425, 0.8982, 0.5904, 0.7770, 0.0854, 0.1178, 0.2905, 0.0833, 0.2051,
        0.8489, 0.3003, 0.8206, 0.3596, 0.0170, 0.8302, 0.1475, 0.2081, 0.0585,
        0.0136, 0.3615], device='cuda:0')
Epoch: 33


100%|██████████| 170/170 [05:51<00:00,  2.07s/it]


Epoch: 33, Average train loss per epoch: 0.3120075977900449
Epoch: 33, Average train jaccard_micro coeff per epoch: 0.8350226879119873
Epoch: 33, Average train jaccard_macro coeff per epoch: 0.4702046513557434
Epoch: 33, Average train jaccard_weighted coeff per epoch: 0.8447169661521912
Epoch: 33, Average train jaccard_none coeff per epoch: tensor([0.7951, 0.9634, 0.7816, 0.8485, 0.2218, 0.2740, 0.3143, 0.1116, 0.2672,
        0.8592, 0.4820, 0.8907, 0.4814, 0.0216, 0.8852, 0.2644, 0.2849, 0.2141,
        0.0432, 0.4000], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=32, learning rate=0.0013
Epoch: 33, Average val loss per epoch: 0.5528789004310966
Epoch: 33, Average val jaccard_micro coeff per epoch: 0.7448149919509888
Epoch: 33, Average val jaccard_macro coeff per epoch: 0.37470152974128723
Epoch: 33, Average val jaccard_weighted coeff per epoch: 0.76204913854599
Epoch: 33, Average val jaccard_none coeff per epoch: tensor([0.6339, 0.8945, 0.5805, 0.7797, 0.0984, 0.1044, 0.2928, 0.0723, 0.2102,
        0.8411, 0.3151, 0.8148, 0.3451, 0.0169, 0.8170, 0.1105, 0.1505, 0.0426,
        0.0225, 0.3513], device='cuda:0')
Epoch: 34


100%|██████████| 170/170 [05:51<00:00,  2.07s/it]


Epoch: 34, Average train loss per epoch: 0.31047573037007276
Epoch: 34, Average train jaccard_micro coeff per epoch: 0.8355797529220581
Epoch: 34, Average train jaccard_macro coeff per epoch: 0.47274133563041687
Epoch: 34, Average train jaccard_weighted coeff per epoch: 0.8448675274848938
Epoch: 34, Average train jaccard_none coeff per epoch: tensor([0.7972, 0.9635, 0.7808, 0.8482, 0.2231, 0.2793, 0.3115, 0.1087, 0.2678,
        0.8631, 0.4862, 0.8868, 0.4820, 0.0261, 0.8854, 0.2873, 0.2771, 0.2130,
        0.0603, 0.4072], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=33, learning rate=0.0013
Epoch: 34, Average val loss per epoch: 0.5335966637358069
Epoch: 34, Average val jaccard_micro coeff per epoch: 0.7536887526512146
Epoch: 34, Average val jaccard_macro coeff per epoch: 0.3851070702075958
Epoch: 34, Average val jaccard_weighted coeff per epoch: 0.7696138024330139
Epoch: 34, Average val jaccard_none coeff per epoch: tensor([0.6447, 0.9024, 0.5947, 0.7862, 0.1077, 0.1096, 0.3059, 0.0789, 0.2122,
        0.8451, 0.3118, 0.8019, 0.3662, 0.0244, 0.8261, 0.1197, 0.2287, 0.0565,
        0.0167, 0.3628], device='cuda:0')
Epoch: 35


100%|██████████| 170/170 [05:51<00:00,  2.07s/it]


Epoch: 35, Average train loss per epoch: 0.3028955265879631
Epoch: 35, Average train jaccard_micro coeff per epoch: 0.8393944501876831
Epoch: 35, Average train jaccard_macro coeff per epoch: 0.48050379753112793
Epoch: 35, Average train jaccard_weighted coeff per epoch: 0.8484411835670471
Epoch: 35, Average train jaccard_none coeff per epoch: tensor([0.8044, 0.9643, 0.7874, 0.8534, 0.2354, 0.2949, 0.3200, 0.1167, 0.2708,
        0.8634, 0.4929, 0.8909, 0.4900, 0.0331, 0.8904, 0.3081, 0.2887, 0.2169,
        0.0623, 0.4262], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=34, learning rate=0.0013
Epoch: 35, Average val loss per epoch: 0.5388479335233569
Epoch: 35, Average val jaccard_micro coeff per epoch: 0.7513315081596375
Epoch: 35, Average val jaccard_macro coeff per epoch: 0.3819855749607086
Epoch: 35, Average val jaccard_weighted coeff per epoch: 0.7663743495941162
Epoch: 35, Average val jaccard_none coeff per epoch: tensor([0.6510, 0.8985, 0.5821, 0.7777, 0.1018, 0.0949, 0.3035, 0.0798, 0.2089,
        0.8457, 0.3133, 0.8048, 0.3680, 0.0191, 0.8177, 0.1253, 0.2145, 0.0525,
        0.0238, 0.3565], device='cuda:0')
Epoch: 36


100%|██████████| 170/170 [05:51<00:00,  2.07s/it]


Epoch: 36, Average train loss per epoch: 0.2974332974237554
Epoch: 36, Average train jaccard_micro coeff per epoch: 0.8419510126113892
Epoch: 36, Average train jaccard_macro coeff per epoch: 0.4854016900062561
Epoch: 36, Average train jaccard_weighted coeff per epoch: 0.8506237268447876
Epoch: 36, Average train jaccard_none coeff per epoch: tensor([0.8072, 0.9647, 0.7892, 0.8557, 0.2389, 0.3073, 0.3259, 0.1187, 0.2781,
        0.8685, 0.4964, 0.8925, 0.4982, 0.0435, 0.8934, 0.3092, 0.2827, 0.2392,
        0.0705, 0.4283], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=35, learning rate=0.0013
Epoch: 36, Average val loss per epoch: 0.5320347724482417
Epoch: 36, Average val jaccard_micro coeff per epoch: 0.7507143020629883
Epoch: 36, Average val jaccard_macro coeff per epoch: 0.38706889748573303
Epoch: 36, Average val jaccard_weighted coeff per epoch: 0.7671051025390625
Epoch: 36, Average val jaccard_none coeff per epoch: tensor([0.6364, 0.8926, 0.5875, 0.7852, 0.1035, 0.1164, 0.3038, 0.0818, 0.2134,
        0.8477, 0.3018, 0.8176, 0.3723, 0.0266, 0.8359, 0.1514, 0.2191, 0.0762,
        0.0152, 0.3571], device='cuda:0')
Epoch: 37


100%|██████████| 170/170 [05:51<00:00,  2.07s/it]


Epoch: 37, Average train loss per epoch: 0.29259393372956444
Epoch: 37, Average train jaccard_micro coeff per epoch: 0.8445537090301514
Epoch: 37, Average train jaccard_macro coeff per epoch: 0.4889228940010071
Epoch: 37, Average train jaccard_weighted coeff per epoch: 0.8530527353286743
Epoch: 37, Average train jaccard_none coeff per epoch: tensor([0.8129, 0.9657, 0.7943, 0.8577, 0.2423, 0.3098, 0.3255, 0.1204, 0.2803,
        0.8672, 0.5075, 0.8933, 0.5049, 0.0493, 0.8919, 0.3097, 0.3150, 0.2340,
        0.0708, 0.4259], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.83s/it]


epoch=36, learning rate=0.0013
Epoch: 37, Average val loss per epoch: 0.5382042163982987
Epoch: 37, Average val jaccard_micro coeff per epoch: 0.7494975924491882
Epoch: 37, Average val jaccard_macro coeff per epoch: 0.3820173740386963
Epoch: 37, Average val jaccard_weighted coeff per epoch: 0.767748236656189
Epoch: 37, Average val jaccard_none coeff per epoch: tensor([0.6346, 0.8969, 0.5885, 0.7871, 0.1133, 0.1254, 0.3088, 0.0852, 0.2076,
        0.8493, 0.3051, 0.8101, 0.3788, 0.0287, 0.8270, 0.1350, 0.1353, 0.0432,
        0.0252, 0.3553], device='cuda:0')
Epoch: 38


100%|██████████| 170/170 [05:51<00:00,  2.07s/it]


Epoch: 38, Average train loss per epoch: 0.2905220636550118
Epoch: 38, Average train jaccard_micro coeff per epoch: 0.8458239436149597
Epoch: 38, Average train jaccard_macro coeff per epoch: 0.4949299991130829
Epoch: 38, Average train jaccard_weighted coeff per epoch: 0.8544556498527527
Epoch: 38, Average train jaccard_none coeff per epoch: tensor([0.8161, 0.9662, 0.7959, 0.8562, 0.2503, 0.3226, 0.3319, 0.1256, 0.2853,
        0.8715, 0.4985, 0.8929, 0.5046, 0.0691, 0.8943, 0.3212, 0.3061, 0.2557,
        0.0866, 0.4479], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=37, learning rate=0.0013
Epoch: 38, Average val loss per epoch: 0.5596478106454015
Epoch: 38, Average val jaccard_micro coeff per epoch: 0.7467178702354431
Epoch: 38, Average val jaccard_macro coeff per epoch: 0.3783394694328308
Epoch: 38, Average val jaccard_weighted coeff per epoch: 0.7638756036758423
Epoch: 38, Average val jaccard_none coeff per epoch: tensor([0.6308, 0.8985, 0.5797, 0.7922, 0.0978, 0.1370, 0.2954, 0.0784, 0.1907,
        0.8220, 0.3068, 0.8182, 0.3793, 0.0449, 0.8165, 0.1399, 0.1398, 0.0340,
        0.0217, 0.3432], device='cuda:0')
Epoch: 39


100%|██████████| 170/170 [05:51<00:00,  2.07s/it]


Epoch: 39, Average train loss per epoch: 0.2968180242706748
Epoch: 39, Average train jaccard_micro coeff per epoch: 0.8424645066261292
Epoch: 39, Average train jaccard_macro coeff per epoch: 0.488411545753479
Epoch: 39, Average train jaccard_weighted coeff per epoch: 0.8513800501823425
Epoch: 39, Average train jaccard_none coeff per epoch: tensor([0.8122, 0.9656, 0.7961, 0.8518, 0.2441, 0.3036, 0.3290, 0.1231, 0.2843,
        0.8677, 0.5031, 0.8918, 0.5063, 0.0644, 0.8876, 0.2928, 0.3113, 0.2056,
        0.0845, 0.4432], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=38, learning rate=0.0013
Epoch: 39, Average val loss per epoch: 0.5910743083804846
Epoch: 39, Average val jaccard_micro coeff per epoch: 0.7498198747634888
Epoch: 39, Average val jaccard_macro coeff per epoch: 0.37974727153778076
Epoch: 39, Average val jaccard_weighted coeff per epoch: 0.7655383348464966
Epoch: 39, Average val jaccard_none coeff per epoch: tensor([0.6389, 0.8961, 0.5794, 0.7849, 0.0995, 0.1204, 0.3049, 0.0964, 0.2061,
        0.8465, 0.2904, 0.8185, 0.3492, 0.0391, 0.8250, 0.1084, 0.1765, 0.0406,
        0.0232, 0.3510], device='cuda:0')
Epoch: 40


100%|██████████| 170/170 [05:51<00:00,  2.07s/it]


Epoch: 40, Average train loss per epoch: 0.2941024767125354
Epoch: 40, Average train jaccard_micro coeff per epoch: 0.8435247540473938
Epoch: 40, Average train jaccard_macro coeff per epoch: 0.490236759185791
Epoch: 40, Average train jaccard_weighted coeff per epoch: 0.8522599339485168
Epoch: 40, Average train jaccard_none coeff per epoch: tensor([0.8119, 0.9653, 0.7922, 0.8570, 0.2509, 0.3177, 0.3283, 0.1259, 0.2842,
        0.8652, 0.4956, 0.8943, 0.5010, 0.0716, 0.8921, 0.3238, 0.2977, 0.2111,
        0.0812, 0.4378], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=39, learning rate=0.0013
Epoch: 40, Average val loss per epoch: 0.5490339612588286
Epoch: 40, Average val jaccard_micro coeff per epoch: 0.7524454593658447
Epoch: 40, Average val jaccard_macro coeff per epoch: 0.38935399055480957
Epoch: 40, Average val jaccard_weighted coeff per epoch: 0.769390881061554
Epoch: 40, Average val jaccard_none coeff per epoch: tensor([0.6396, 0.8985, 0.5954, 0.7843, 0.1158, 0.1246, 0.3074, 0.0863, 0.2255,
        0.8504, 0.3091, 0.8154, 0.3754, 0.0333, 0.8270, 0.1346, 0.2353, 0.0477,
        0.0291, 0.3523], device='cuda:0')
Epoch: 41


100%|██████████| 170/170 [05:51<00:00,  2.07s/it]


Epoch: 41, Average train loss per epoch: 0.28108189456603105
Epoch: 41, Average train jaccard_micro coeff per epoch: 0.8502888679504395
Epoch: 41, Average train jaccard_macro coeff per epoch: 0.5027238130569458
Epoch: 41, Average train jaccard_weighted coeff per epoch: 0.858304500579834
Epoch: 41, Average train jaccard_none coeff per epoch: tensor([0.8227, 0.9670, 0.8019, 0.8638, 0.2652, 0.3252, 0.3385, 0.1297, 0.2896,
        0.8743, 0.5137, 0.8941, 0.5175, 0.0790, 0.8945, 0.3447, 0.3535, 0.2398,
        0.0945, 0.4454], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=40, learning rate=0.0013
Epoch: 41, Average val loss per epoch: 0.55885109025985
Epoch: 41, Average val jaccard_micro coeff per epoch: 0.7523998022079468
Epoch: 41, Average val jaccard_macro coeff per epoch: 0.3922802209854126
Epoch: 41, Average val jaccard_weighted coeff per epoch: 0.7696875333786011
Epoch: 41, Average val jaccard_none coeff per epoch: tensor([0.6310, 0.8991, 0.5961, 0.7886, 0.1158, 0.1289, 0.3123, 0.0916, 0.2295,
        0.8489, 0.3069, 0.8147, 0.3813, 0.0402, 0.8315, 0.1429, 0.2338, 0.0705,
        0.0156, 0.3665], device='cuda:0')
Epoch: 42


100%|██████████| 170/170 [05:51<00:00,  2.06s/it]


Epoch: 42, Average train loss per epoch: 0.2745495387736489
Epoch: 42, Average train jaccard_micro coeff per epoch: 0.8535744547843933
Epoch: 42, Average train jaccard_macro coeff per epoch: 0.5086214542388916
Epoch: 42, Average train jaccard_weighted coeff per epoch: 0.8613647818565369
Epoch: 42, Average train jaccard_none coeff per epoch: tensor([0.8314, 0.9674, 0.8068, 0.8667, 0.2691, 0.3404, 0.3429, 0.1343, 0.2973,
        0.8748, 0.5099, 0.8941, 0.5183, 0.0962, 0.8996, 0.3716, 0.3474, 0.2371,
        0.1082, 0.4589], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=41, learning rate=0.0013
Epoch: 42, Average val loss per epoch: 0.5800646161660552
Epoch: 42, Average val jaccard_micro coeff per epoch: 0.7494539618492126
Epoch: 42, Average val jaccard_macro coeff per epoch: 0.3850550949573517
Epoch: 42, Average val jaccard_weighted coeff per epoch: 0.7667553424835205
Epoch: 42, Average val jaccard_none coeff per epoch: tensor([0.6265, 0.8985, 0.5950, 0.7833, 0.1075, 0.1273, 0.3124, 0.0960, 0.2217,
        0.8518, 0.3117, 0.8158, 0.3492, 0.0524, 0.8239, 0.1213, 0.1409, 0.0646,
        0.0272, 0.3741], device='cuda:0')
Epoch: 43


100%|██████████| 170/170 [05:51<00:00,  2.07s/it]


Epoch: 43, Average train loss per epoch: 0.27200601819683523
Epoch: 43, Average train jaccard_micro coeff per epoch: 0.854845404624939
Epoch: 43, Average train jaccard_macro coeff per epoch: 0.5142545104026794
Epoch: 43, Average train jaccard_weighted coeff per epoch: 0.8625022768974304
Epoch: 43, Average train jaccard_none coeff per epoch: tensor([0.8342, 0.9682, 0.8062, 0.8673, 0.2778, 0.3450, 0.3476, 0.1349, 0.3003,
        0.8769, 0.5106, 0.8960, 0.5244, 0.1021, 0.9005, 0.3833, 0.3673, 0.2663,
        0.1123, 0.4638], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=42, learning rate=0.0013
Epoch: 43, Average val loss per epoch: 0.5650269342586398
Epoch: 43, Average val jaccard_micro coeff per epoch: 0.7521037459373474
Epoch: 43, Average val jaccard_macro coeff per epoch: 0.3902673125267029
Epoch: 43, Average val jaccard_weighted coeff per epoch: 0.7683359384536743
Epoch: 43, Average val jaccard_none coeff per epoch: tensor([0.6390, 0.8945, 0.5858, 0.7839, 0.1223, 0.1146, 0.3131, 0.0857, 0.2196,
        0.8521, 0.2882, 0.8191, 0.3856, 0.0665, 0.8238, 0.1462, 0.2111, 0.0669,
        0.0216, 0.3659], device='cuda:0')
Epoch: 44


100%|██████████| 170/170 [05:51<00:00,  2.07s/it]


Epoch: 44, Average train loss per epoch: 0.278101905216189
Epoch: 44, Average train jaccard_micro coeff per epoch: 0.8521353602409363
Epoch: 44, Average train jaccard_macro coeff per epoch: 0.5101550817489624
Epoch: 44, Average train jaccard_weighted coeff per epoch: 0.8599368929862976
Epoch: 44, Average train jaccard_none coeff per epoch: tensor([0.8311, 0.9657, 0.8044, 0.8650, 0.2810, 0.3495, 0.3460, 0.1373, 0.2999,
        0.8736, 0.5137, 0.8921, 0.5244, 0.1117, 0.8981, 0.3681, 0.3397, 0.2385,
        0.1104, 0.4527], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=43, learning rate=0.0013
Epoch: 44, Average val loss per epoch: 0.5899527706205845
Epoch: 44, Average val jaccard_micro coeff per epoch: 0.735221803188324
Epoch: 44, Average val jaccard_macro coeff per epoch: 0.38005703687667847
Epoch: 44, Average val jaccard_weighted coeff per epoch: 0.7544477581977844
Epoch: 44, Average val jaccard_none coeff per epoch: tensor([0.6152, 0.8774, 0.5728, 0.7695, 0.1043, 0.1223, 0.3054, 0.0918, 0.2139,
        0.8418, 0.2993, 0.8088, 0.3832, 0.0656, 0.8185, 0.1079, 0.1861, 0.0447,
        0.0216, 0.3509], device='cuda:0')
Epoch: 45


100%|██████████| 170/170 [05:51<00:00,  2.07s/it]


Epoch: 45, Average train loss per epoch: 0.2899774597848163
Epoch: 45, Average train jaccard_micro coeff per epoch: 0.8454405069351196
Epoch: 45, Average train jaccard_macro coeff per epoch: 0.49728456139564514
Epoch: 45, Average train jaccard_weighted coeff per epoch: 0.8536772131919861
Epoch: 45, Average train jaccard_none coeff per epoch: tensor([0.8131, 0.9640, 0.7943, 0.8580, 0.2508, 0.3279, 0.3410, 0.1331, 0.2904,
        0.8724, 0.5080, 0.8945, 0.5151, 0.1039, 0.8909, 0.3175, 0.3091, 0.2127,
        0.0964, 0.4524], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=44, learning rate=0.0013
Epoch: 45, Average val loss per epoch: 0.5499317841604352
Epoch: 45, Average val jaccard_micro coeff per epoch: 0.7497986555099487
Epoch: 45, Average val jaccard_macro coeff per epoch: 0.3939540684223175
Epoch: 45, Average val jaccard_weighted coeff per epoch: 0.7686739563941956
Epoch: 45, Average val jaccard_none coeff per epoch: tensor([0.6321, 0.8941, 0.5932, 0.7941, 0.1213, 0.1273, 0.3139, 0.1025, 0.2234,
        0.8439, 0.3056, 0.8251, 0.3653, 0.0948, 0.8181, 0.1347, 0.2476, 0.0472,
        0.0381, 0.3569], device='cuda:0')
Epoch: 46


100%|██████████| 170/170 [05:51<00:00,  2.07s/it]


Epoch: 46, Average train loss per epoch: 0.2652825816589243
Epoch: 46, Average train jaccard_micro coeff per epoch: 0.8582112193107605
Epoch: 46, Average train jaccard_macro coeff per epoch: 0.5194490551948547
Epoch: 46, Average train jaccard_weighted coeff per epoch: 0.865704357624054
Epoch: 46, Average train jaccard_none coeff per epoch: tensor([0.8412, 0.9688, 0.8136, 0.8706, 0.2938, 0.3653, 0.3556, 0.1435, 0.3072,
        0.8774, 0.5245, 0.8977, 0.5307, 0.1224, 0.8963, 0.3707, 0.3659, 0.2440,
        0.1225, 0.4773], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=45, learning rate=0.0013
Epoch: 46, Average val loss per epoch: 0.5842742761597037
Epoch: 46, Average val jaccard_micro coeff per epoch: 0.7514213919639587
Epoch: 46, Average val jaccard_macro coeff per epoch: 0.3919976055622101
Epoch: 46, Average val jaccard_weighted coeff per epoch: 0.7684417366981506
Epoch: 46, Average val jaccard_none coeff per epoch: tensor([0.6278, 0.8956, 0.5914, 0.7903, 0.1051, 0.1209, 0.3183, 0.1072, 0.2319,
        0.8500, 0.3055, 0.8221, 0.3918, 0.0777, 0.8235, 0.1293, 0.1823, 0.0620,
        0.0458, 0.3614], device='cuda:0')
Epoch: 47


100%|██████████| 170/170 [05:51<00:00,  2.07s/it]


Epoch: 47, Average train loss per epoch: 0.26414869462742524
Epoch: 47, Average train jaccard_micro coeff per epoch: 0.8585291504859924
Epoch: 47, Average train jaccard_macro coeff per epoch: 0.5230318307876587
Epoch: 47, Average train jaccard_weighted coeff per epoch: 0.8659005761146545
Epoch: 47, Average train jaccard_none coeff per epoch: tensor([0.8405, 0.9685, 0.8060, 0.8726, 0.2959, 0.3642, 0.3576, 0.1448, 0.3096,
        0.8801, 0.5225, 0.8967, 0.5339, 0.1329, 0.9026, 0.3982, 0.3829, 0.2513,
        0.1307, 0.4691], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=46, learning rate=0.0013
Epoch: 47, Average val loss per epoch: 0.5835056304931641
Epoch: 47, Average val jaccard_micro coeff per epoch: 0.7517297267913818
Epoch: 47, Average val jaccard_macro coeff per epoch: 0.3894353210926056
Epoch: 47, Average val jaccard_weighted coeff per epoch: 0.7689728140830994
Epoch: 47, Average val jaccard_none coeff per epoch: tensor([0.6252, 0.9000, 0.5910, 0.7920, 0.1156, 0.1329, 0.3166, 0.0975, 0.2370,
        0.8493, 0.3015, 0.8148, 0.3864, 0.0841, 0.8248, 0.1223, 0.1757, 0.0458,
        0.0239, 0.3525], device='cuda:0')
Epoch: 48


100%|██████████| 170/170 [05:52<00:00,  2.07s/it]


Epoch: 48, Average train loss per epoch: 0.2637725267340155
Epoch: 48, Average train jaccard_micro coeff per epoch: 0.8604480028152466
Epoch: 48, Average train jaccard_macro coeff per epoch: 0.5235617160797119
Epoch: 48, Average train jaccard_weighted coeff per epoch: 0.8681537508964539
Epoch: 48, Average train jaccard_none coeff per epoch: tensor([0.8473, 0.9695, 0.8134, 0.8702, 0.2923, 0.3754, 0.3618, 0.1498, 0.3126,
        0.8808, 0.5252, 0.8953, 0.5355, 0.1367, 0.9038, 0.3744, 0.3378, 0.2492,
        0.1458, 0.4943], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=47, learning rate=0.0013
Epoch: 48, Average val loss per epoch: 0.6174694327637553
Epoch: 48, Average val jaccard_micro coeff per epoch: 0.7394636869430542
Epoch: 48, Average val jaccard_macro coeff per epoch: 0.38340526819229126
Epoch: 48, Average val jaccard_weighted coeff per epoch: 0.7592015266418457
Epoch: 48, Average val jaccard_none coeff per epoch: tensor([0.6197, 0.8977, 0.5763, 0.7698, 0.1062, 0.1332, 0.3117, 0.0963, 0.2296,
        0.8336, 0.2986, 0.7444, 0.3968, 0.0657, 0.8279, 0.1374, 0.1894, 0.0482,
        0.0159, 0.3698], device='cuda:0')
Epoch: 49


100%|██████████| 170/170 [05:51<00:00,  2.07s/it]


Epoch: 49, Average train loss per epoch: 0.2741265511688064
Epoch: 49, Average train jaccard_micro coeff per epoch: 0.8543063998222351
Epoch: 49, Average train jaccard_macro coeff per epoch: 0.514056384563446
Epoch: 49, Average train jaccard_weighted coeff per epoch: 0.8620010614395142
Epoch: 49, Average train jaccard_none coeff per epoch: tensor([0.8393, 0.9684, 0.8106, 0.8604, 0.2814, 0.3627, 0.3500, 0.1382, 0.2964,
        0.8739, 0.5185, 0.8789, 0.5341, 0.1244, 0.8987, 0.3805, 0.3515, 0.2246,
        0.1183, 0.4702], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]


epoch=48, learning rate=0.0013
Epoch: 49, Average val loss per epoch: 0.5724103013053536
Epoch: 49, Average val jaccard_micro coeff per epoch: 0.754052460193634
Epoch: 49, Average val jaccard_macro coeff per epoch: 0.39428871870040894
Epoch: 49, Average val jaccard_weighted coeff per epoch: 0.7711252570152283
Epoch: 49, Average val jaccard_none coeff per epoch: tensor([0.6391, 0.8997, 0.5960, 0.7892, 0.1232, 0.1254, 0.3211, 0.1012, 0.2345,
        0.8494, 0.3025, 0.8116, 0.3749, 0.0655, 0.8295, 0.1474, 0.1983, 0.0669,
        0.0390, 0.3713], device='cuda:0')
Epoch: 50


100%|██████████| 170/170 [05:51<00:00,  2.06s/it]


Epoch: 50, Average train loss per epoch: 0.257033372714239
Epoch: 50, Average train jaccard_micro coeff per epoch: 0.8627684116363525
Epoch: 50, Average train jaccard_macro coeff per epoch: 0.5315767526626587
Epoch: 50, Average train jaccard_weighted coeff per epoch: 0.86970055103302
Epoch: 50, Average train jaccard_none coeff per epoch: tensor([0.8528, 0.9696, 0.8199, 0.8744, 0.2934, 0.3874, 0.3660, 0.1460, 0.3163,
        0.8804, 0.5393, 0.8952, 0.5469, 0.1496, 0.9038, 0.4231, 0.3666, 0.2400,
        0.1649, 0.4959], device='cuda:0')


100%|██████████| 32/32 [00:58<00:00,  1.82s/it]

epoch=49, learning rate=0.0013
Epoch: 50, Average val loss per epoch: 0.5644780546426773
Epoch: 50, Average val jaccard_micro coeff per epoch: 0.7505946755409241
Epoch: 50, Average val jaccard_macro coeff per epoch: 0.39438769221305847
Epoch: 50, Average val jaccard_weighted coeff per epoch: 0.7693668603897095
Epoch: 50, Average val jaccard_none coeff per epoch: tensor([0.6326, 0.8950, 0.5892, 0.7898, 0.1257, 0.1141, 0.3199, 0.1114, 0.2441,
        0.8470, 0.3043, 0.8176, 0.3880, 0.0749, 0.8305, 0.1354, 0.2073, 0.0604,
        0.0217, 0.3789], device='cuda:0')



