In [1]:
import argparse
import logging
import math
import os
import random
import shutil
import time
from collections import OrderedDict

import numpy as np
import torch
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from torch.utils.data.distributed import DistributedSampler
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm

from utils import AverageMeter, accuracy

logger = logging.getLogger(__name__)
best_acc = 0


def save_checkpoint(state, is_best, checkpoint, filename="checkpoint.pth.tar"):
    filepath = os.path.join(checkpoint, filename)
    torch.save(state, filepath)
    if is_best:
        shutil.copyfile(filepath, os.path.join(checkpoint, "model_best.pth.tar"))


def set_seed(args):
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if args.n_gpu > 0:
        torch.cuda.manual_seed_all(args.seed)


def get_cosine_schedule_with_warmup(
    optimizer,
    num_warmup_steps,
    num_training_steps,
    num_cycles=7.0 / 16.0,
    last_epoch=-1,
):
    def _lr_lambda(current_step):
        if current_step < num_warmup_steps:
            return float(current_step) / float(max(1, num_warmup_steps))
        no_progress = float(current_step - num_warmup_steps) / float(
            max(1, num_training_steps - num_warmup_steps)
        )
        return max(0.0, math.cos(math.pi * num_cycles * no_progress))

    return LambdaLR(optimizer, _lr_lambda, last_epoch)


def interleave(x, size):
    s = list(x.shape)
    return x.reshape([-1, size] + s[1:]).transpose(0, 1).reshape([-1] + s[1:])


def de_interleave(x, size):
    s = list(x.shape)
    return x.reshape([size, -1] + s[1:]).transpose(0, 1).reshape([-1] + s[1:])


In [2]:
args = {
    "gpu_id": 0,  # ID(s) for CUDA_VISIBLE_DEVICES
    "num_workers": 4,  # Number of workers
    "dataset": "cifar10",  # Dataset name, choices are ['cifar10', 'cifar100', 'pss']
    "num_labeled": 4000,  # Number of labeled data
    "expand_labels": False,  # Expand labels to fit eval steps, default is not to expand
    "arch": "wideresnet",  # Model architecture, choices are ['wideresnet', 'resnext']
    "total_steps": 2**20,  # Number of total steps to run
    "eval_step": 1024,  # Number of evaluation steps to run
    "start_epoch": 0,  # Manual epoch number, useful on restarts
    "batch_size": 8,  # Train batch size
    "lr": 0.03,  # Initial learning rate
    "warmup": 0.0,  # Warmup epochs, based on unlabeled data
    "wdecay": 0.0005,  # Weight decay
    "nesterov": True,  # Use Nesterov momentum
    "use_ema": True,  # Use Exponential Moving Average model
    "ema_decay": 0.999,  # EMA decay rate
    "mu": 7,  # Coefficient of unlabeled batch size
    "lambda_u": 1.0,  # Coefficient of unlabeled loss
    "T": 1.0,  # Pseudo label temperature
    "threshold": 0.95,  # Pseudo label threshold
    "out": "result",  # Directory to output the result
    "resume": "",  # Path to latest checkpoint, default is none
    "seed": None,  # Random seed
    "amp": False,  # Use 16-bit (mixed) precision through NVIDIA apex AMP
    "opt_level": "O1",  # Apex AMP optimization level, see details at NVIDIA apex AMP documentation
    "local_rank": -1,  # For distributed training: local rank
    "no_progress": False,  # Don't use progress bar
}

args = argparse.Namespace(**args)

if args.local_rank == -1:
    device = torch.device("cuda", args.gpu_id)
    args.world_size = 1
    args.n_gpu = torch.cuda.device_count()
else:
    torch.cuda.set_device(args.local_rank)
    device = torch.device("cuda", args.local_rank)
    torch.distributed.init_process_group(backend="nccl")
    args.world_size = torch.distributed.get_world_size()
    args.n_gpu = 1

args.device = device

logging.basicConfig(
    format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
    datefmt="%m/%d/%Y %H:%M:%S",
    level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN,
)

logger.warning(
    f"Process rank: {args.local_rank}, "
    f"device: {args.device}, "
    f"n_gpu: {args.n_gpu}, "
    f"distributed training: {bool(args.local_rank != -1)}, "
    f"16-bits training: {args.amp}",
)

logger.info(dict(args._get_kwargs()))

if args.seed is not None:
    set_seed(args)

if args.local_rank in [-1, 0]:
    os.makedirs(args.out, exist_ok=True)
    args.writer = SummaryWriter(args.out)


05/30/2024 15:46:38 - INFO - __main__ -   {'gpu_id': 0, 'num_workers': 4, 'dataset': 'cifar10', 'num_labeled': 4000, 'expand_labels': False, 'arch': 'wideresnet', 'total_steps': 1048576, 'eval_step': 1024, 'start_epoch': 0, 'batch_size': 8, 'lr': 0.03, 'warmup': 0.0, 'wdecay': 0.0005, 'nesterov': True, 'use_ema': True, 'ema_decay': 0.999, 'mu': 7, 'lambda_u': 1.0, 'T': 1.0, 'threshold': 0.95, 'out': 'result', 'resume': '', 'seed': None, 'amp': False, 'opt_level': 'O1', 'local_rank': -1, 'no_progress': False, 'world_size': 1, 'n_gpu': 1, 'device': device(type='cuda', index=0)}


In [3]:
if args.dataset == "cifar10":
    args.num_classes = 10
    if args.arch == "wideresnet":
        args.model_depth = 28
        args.model_width = 2
    elif args.arch == "resnext":
        args.model_cardinality = 4
        args.model_depth = 28
        args.model_width = 4

elif args.dataset == "cifar100":
    args.num_classes = 100
    if args.arch == "wideresnet":
        args.model_depth = 28
        args.model_width = 8
    elif args.arch == "resnext":
        args.model_cardinality = 8
        args.model_depth = 29
        args.model_width = 64

elif args.dataset == "pss":
    args.num_classes = 3
    if args.arch == "wideresnet":
        args.model_depth = 28
        args.model_width = 2
    elif args.arch == "resnext":
        args.model_cardinality = 4
        args.model_depth = 28
        args.model_width = 4

In [6]:
import logging
import math

import numpy as np
from PIL import Image
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import Dataset
import torch
import os
from PIL import Image

from randaugment import RandAugmentMC
if args.local_rank not in [-1, 0]:
    torch.distributed.barrier()

pss_mean = (0.52661989, 0.42101473, 0.34587943)
pss_std = (0.28316404, 0.2779676, 0.28667751)


class TransformFixMatch(object):
    def __init__(self, mean, std):
        self.weak = transforms.Compose(
            [
                transforms.Resize((32, 32)),
                transforms.RandomHorizontalFlip(),
                transforms.RandomCrop(
                    size=32,
                    padding=int(32 * 0.125),
                    padding_mode="reflect",
                ),
            ]
        )
        self.strong = transforms.Compose(
            [
                transforms.Resize((32, 32)),
                transforms.RandomHorizontalFlip(),
                transforms.RandomCrop(
                    size=32, padding=int(32 * 0.125), padding_mode="reflect"
                ),
                RandAugmentMC(n=2, m=10),
            ]
        )
        self.normalize = transforms.Compose(
            [transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)]
        )

    def __call__(self, x):
        weak = self.weak(x)
        strong = self.strong(x)
        return self.normalize(weak), self.normalize(strong)


class PSS_Unlabeled(Dataset):
    def __init__(self, img_dir, transform):
        self.ids = os.listdir(img_dir)
        self.ids.sort()

        self.images_fps = [os.path.join(img_dir, image_id) for image_id in self.ids]

        self.transform = transform

    def __len__(self):
        return len(self.images_fps)

    def __getitem__(self, i):
        image = Image.open(self.images_fps[i])
        img_transformed = self.transform(image)

        return img_transformed
        # img_transform = transforms.Compose([transforms.PILToTensor()])

        # return img_transform(img_transformed)


def x_u_split(args, labels):
    label_per_class = args.num_labeled // args.num_classes
    labels = np.array(labels)
    labeled_idx = []
    # unlabeled data: all data (https://github.com/kekmodel/FixMatch-pytorch/issues/10)
    unlabeled_idx = np.array(range(len(labels)))
    for i in range(args.num_classes):
        idx = np.where(labels == i)[0]
        idx = np.random.choice(idx, label_per_class, False)
        labeled_idx.extend(idx)
    labeled_idx = np.array(labeled_idx)
    assert len(labeled_idx) == args.num_labeled

    if args.expand_labels or args.num_labeled < args.batch_size:
        num_expand_x = math.ceil(args.batch_size * args.eval_step / args.num_labeled)
        labeled_idx = np.hstack([labeled_idx for _ in range(num_expand_x)])
    np.random.shuffle(labeled_idx)
    return labeled_idx, unlabeled_idx


def get_pss(args, root):
    transform_labeled = transforms.Compose(
        [
            transforms.Resize((32, 32)),
            transforms.RandomHorizontalFlip(),
            transforms.RandomCrop(
                size=32, padding=int(32 * 0.125), padding_mode="reflect"
            ),
            transforms.ToTensor(),
            transforms.Normalize(mean=pss_mean, std=pss_std),
        ]
    )
    transform_val = transforms.Compose(
        [
            transforms.Resize((32, 32)),
            transforms.ToTensor(),
            transforms.Normalize(mean=pss_mean, std=pss_std),
        ]
    )

    train_labeled_dataset = datasets.ImageFolder(
        root="C:\\Users\\can.michael\\Desktop\\others\\SSL\\FixMatch\\FixMatch-pytorch\\data\\pizza_steak_sushi\\20label\\train",
        transform=transform_labeled,
        target_transform=None,
    )

    train_unlabeled_dataset = PSS_Unlabeled(
        img_dir="C:\\Users\\can.michael\\Desktop\\others\\SSL\\FixMatch\\FixMatch-pytorch\\data\\pizza_steak_sushi\\20label\\unlabeled",
        transform=TransformFixMatch(mean=pss_mean, std=pss_std),
    )

    test_dataset = datasets.ImageFolder(
        root="C:\\Users\\can.michael\\Desktop\\others\\SSL\\FixMatch\\FixMatch-pytorch\\data\\pizza_steak_sushi\\20label\\test",
        transform=transform_val,
        target_transform=None,
    )

    return train_labeled_dataset, train_unlabeled_dataset, test_dataset

labeled_dataset, unlabeled_dataset, test_dataset = get_pss(
    args, "./data"
)


In [7]:

if args.local_rank == 0:
    torch.distributed.barrier()

train_sampler = RandomSampler if args.local_rank == -1 else DistributedSampler

labeled_trainloader = DataLoader(
    labeled_dataset,
    sampler=train_sampler(labeled_dataset),
    batch_size=args.batch_size,
    num_workers=args.num_workers,
    drop_last=True,
)

unlabeled_trainloader = DataLoader(
    unlabeled_dataset,
    sampler=train_sampler(unlabeled_dataset),
    batch_size=args.batch_size * args.mu,
    num_workers=args.num_workers,
    drop_last=True,
)

test_loader = DataLoader(
    test_dataset,
    sampler=SequentialSampler(test_dataset),
    batch_size=args.batch_size,
    num_workers=args.num_workers,
)

if args.local_rank not in [-1, 0]:
    torch.distributed.barrier()


In [8]:
def create_model(args):
    if args.arch == "wideresnet":
        import models.wideresnet as models

        model = models.build_wideresnet(
            depth=args.model_depth,
            widen_factor=args.model_width,
            dropout=0,
            num_classes=args.num_classes,
        )
    elif args.arch == "resnext":
        import models.resnext as models

        model = models.build_resnext(
            cardinality=args.model_cardinality,
            depth=args.model_depth,
            width=args.model_width,
            num_classes=args.num_classes,
        )
    logger.info(
        "Total params: {:.2f}M".format(
            sum(p.numel() for p in model.parameters()) / 1e6
        )
    )
    return model

model = create_model(args)

if args.local_rank == 0:
    torch.distributed.barrier()

model.to(args.device)

05/30/2024 15:47:35 - INFO - models.wideresnet -   Model: WideResNet 28x2
05/30/2024 15:47:35 - INFO - __main__ -   Total params: 1.47M


WideResNet(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (block1): NetworkBlock(
    (layer): Sequential(
      (0): BasicBlock(
        (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (relu1): LeakyReLU(negative_slope=0.1, inplace=True)
        (conv1): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (relu2): LeakyReLU(negative_slope=0.1, inplace=True)
        (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (convShortcut): Conv2d(16, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
      )
      (1): BasicBlock(
        (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
        (relu1): LeakyReLU(negative_slope=0.1, inplace=True)
        (conv1): Conv2d(32, 32, kernel_size=(

In [9]:
no_decay = ["bias", "bn"]
grouped_parameters = [
    {
        "params": [
            p
            for n, p in model.named_parameters()
            if not any(nd in n for nd in no_decay)
        ],
        "weight_decay": args.wdecay,
    },
    {
        "params": [
            p
            for n, p in model.named_parameters()
            if any(nd in n for nd in no_decay)
        ],
        "weight_decay": 0.0,
    },
]
optimizer = optim.SGD(
    grouped_parameters, lr=args.lr, momentum=0.9, nesterov=args.nesterov
)

args.epochs = math.ceil(args.total_steps / args.eval_step)
scheduler = get_cosine_schedule_with_warmup(
    optimizer, args.warmup, args.total_steps
)

if args.use_ema:
    from models.ema import ModelEMA

    ema_model = ModelEMA(args, model, args.ema_decay)

args.start_epoch = 0

if args.resume:
    logger.info("==> Resuming from checkpoint..")
    assert os.path.isfile(args.resume), "Error: no checkpoint directory found!"
    args.out = os.path.dirname(args.resume)
    checkpoint = torch.load(args.resume)
    best_acc = checkpoint["best_acc"]
    args.start_epoch = checkpoint["epoch"]
    model.load_state_dict(checkpoint["state_dict"])
    if args.use_ema:
        ema_model.ema.load_state_dict(checkpoint["ema_state_dict"])
    optimizer.load_state_dict(checkpoint["optimizer"])
    scheduler.load_state_dict(checkpoint["scheduler"])

if args.amp:
    from apex import amp

    model, optimizer = amp.initialize(model, optimizer, opt_level=args.opt_level)

if args.local_rank != -1:
    model = torch.nn.parallel.DistributedDataParallel(
        model,
        device_ids=[args.local_rank],
        output_device=args.local_rank,
        find_unused_parameters=True,
    )


In [10]:
def test(args, test_loader, model, epoch):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()

    if not args.no_progress:
        test_loader = tqdm(test_loader, disable=args.local_rank not in [-1, 0])

    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(test_loader):
            data_time.update(time.time() - end)
            model.eval()

            inputs = inputs.to(args.device)
            targets = targets.to(args.device)
            outputs = model(inputs)
            loss = F.cross_entropy(outputs, targets)

            prec1, prec5 = accuracy(outputs, targets, topk=(1, 5))
            losses.update(loss.item(), inputs.shape[0])
            top1.update(prec1.item(), inputs.shape[0])
            top5.update(prec5.item(), inputs.shape[0])
            batch_time.update(time.time() - end)
            end = time.time()
            if not args.no_progress:
                test_loader.set_description(
                    "Test Iter: {batch:4}/{iter:4}. Data: {data:.3f}s. Batch: {bt:.3f}s. Loss: {loss:.4f}. top1: {top1:.2f}. top5: {top5:.2f}. ".format(
                        batch=batch_idx + 1,
                        iter=len(test_loader),
                        data=data_time.avg,
                        bt=batch_time.avg,
                        loss=losses.avg,
                        top1=top1.avg,
                        top5=top5.avg,
                    )
                )
        if not args.no_progress:
            test_loader.close()

    logger.info("top-1 acc: {:.2f}".format(top1.avg))
    logger.info("top-5 acc: {:.2f}".format(top5.avg))
    return losses.avg, top1.avg



In [12]:
def train(
    args,
    labeled_trainloader,
    unlabeled_trainloader,
    test_loader,
    model,
    optimizer,
    ema_model,
    scheduler,
):
    if args.amp:
        from apex import amp
    global best_acc
    test_accs = []
    end = time.time()

    if args.world_size > 1:
        labeled_epoch = 0
        unlabeled_epoch = 0
        labeled_trainloader.sampler.set_epoch(labeled_epoch)
        unlabeled_trainloader.sampler.set_epoch(unlabeled_epoch)

    labeled_iter = iter(labeled_trainloader)
    unlabeled_iter = iter(unlabeled_trainloader)

    model.train()
    for epoch in range(args.start_epoch, args.epochs):
        batch_time = AverageMeter()
        data_time = AverageMeter()
        losses = AverageMeter()
        losses_x = AverageMeter()
        losses_u = AverageMeter()
        mask_probs = AverageMeter()
        if not args.no_progress:
            p_bar = tqdm(range(args.eval_step), disable=args.local_rank not in [-1, 0])
        for batch_idx in range(args.eval_step):
            try:
                inputs_x, targets_x = next(labeled_iter)
            except:
                if args.world_size > 1:
                    labeled_epoch += 1
                    labeled_trainloader.sampler.set_epoch(labeled_epoch)
                labeled_iter = iter(labeled_trainloader)
                inputs_x, targets_x = next(labeled_iter)

            try:
                unlabeled_pair = next(unlabeled_iter)
                (inputs_u_w, inputs_u_s), _ = unlabeled_pair[0], unlabeled_pair[1]
            except:
                if args.world_size > 1:
                    unlabeled_epoch += 1
                    unlabeled_trainloader.sampler.set_epoch(unlabeled_epoch)
                unlabeled_iter = iter(unlabeled_trainloader)
                unlabeled_pair = next(unlabeled_iter)
                (inputs_u_w, inputs_u_s), _ = unlabeled_pair[0], unlabeled_pair[1]

            data_time.update(time.time() - end)
            batch_size = inputs_x.shape[0]
            inputs = interleave(
                torch.cat((inputs_x, inputs_u_w, inputs_u_s)), 2 * args.mu + 1
            ).to(args.device)
            targets_x = targets_x.to(args.device)
            logits = model(inputs)
            logits = de_interleave(logits, 2 * args.mu + 1)
            logits_x = logits[:batch_size]
            logits_u_w, logits_u_s = logits[batch_size:].chunk(2)
            del logits

            targets_x = targets_x.long()

            Lx = F.cross_entropy(logits_x, targets_x, reduction="mean")

            pseudo_label = torch.softmax(logits_u_w.detach() / args.T, dim=-1)
            max_probs, targets_u = torch.max(pseudo_label, dim=-1)
            mask = max_probs.ge(args.threshold).float()

            Lu = (
                F.cross_entropy(logits_u_s, targets_u, reduction="none") * mask
            ).mean()

            loss = Lx + args.lambda_u * Lu

            if args.amp:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            losses.update(loss.item())
            losses_x.update(Lx.item())
            losses_u.update(Lu.item())
            optimizer.step()
            scheduler.step()
            if args.use_ema:
                ema_model.update(model)
            model.zero_grad()

            batch_time.update(time.time() - end)
            end = time.time()
            mask_probs.update(mask.mean().item())
            if not args.no_progress:
                p_bar.set_description(
                    "Train Epoch: {epoch}/{epochs:4}. Iter: {batch:4}/{iter:4}. LR: {lr:.4f}. Data: {data:.3f}s. Batch: {bt:.3f}s. Loss: {loss:.4f}. Loss_x: {loss_x:.4f}. Loss_u: {loss_u:.4f}. Mask: {mask:.2f}. ".format(
                        epoch=epoch + 1,
                        epochs=args.epochs,
                        batch=batch_idx + 1,
                        iter=args.eval_step,
                        lr=scheduler.get_last_lr()[0],
                        data=data_time.avg,
                        bt=batch_time.avg,
                        loss=losses.avg,
                        loss_x=losses_x.avg,
                        loss_u=losses_u.avg,
                        mask=mask_probs.avg,
                    )
                )
                p_bar.update()

        if not args.no_progress:
            p_bar.close()

        if args.use_ema:
            test_model = ema_model.ema
        else:
            test_model = model

        if args.local_rank in [-1, 0]:
            test_loss, test_acc = test(args, test_loader, test_model, epoch)

            # args.writer.add_scalar("train/1.train_loss", losses.avg, epoch)
            # args.writer.add_scalar("train/2.train_loss_x", losses_x.avg, epoch)
            # args.writer.add_scalar("train/3.train_loss_u", losses_u.avg, epoch)
            # args.writer.add_scalar("train/4.mask", mask_probs.avg, epoch)
            # args.writer.add_scalar("test/1.test_acc", test_acc, epoch)
            # args.writer.add_scalar("test/2.test_loss", test_loss, epoch)
            # TODO CMH UNCOMMENT BURAYI HIZLANSIN DIYE YORUMA ALDIN

            is_best = test_acc > best_acc
            best_acc = max(test_acc, best_acc)

            model_to_save = model.module if hasattr(model, "module") else model
            if args.use_ema:
                ema_to_save = (
                    ema_model.ema.module
                    if hasattr(ema_model.ema, "module")
                    else ema_model.ema
                )
            # save_checkpoint(
            #     {
            #         "epoch": epoch + 1,
            #         "state_dict": model_to_save.state_dict(),
            #         "ema_state_dict": (
            #             ema_to_save.state_dict() if args.use_ema else None
            #         ),
            #         "acc": test_acc,
            #         "best_acc": best_acc,
            #         "optimizer": optimizer.state_dict(),
            #         "scheduler": scheduler.state_dict(),
            #     },
            #     is_best,
            #     args.out,
            # ) TODO CMH UNCOMMENT BURAYI HIZLANSIN DIYE YORUMA ALDIN

            test_accs.append(test_acc)
            logger.info("Best top-1 acc: {:.2f}".format(best_acc))
            logger.info("Mean top-1 acc: {:.2f}\n".format(np.mean(test_accs[-20:])))

    # if args.local_rank in [-1, 0]:
    # args.writer.close() TODO CMH UNCOMMENT BURAYI HIZLANSIN DIYE YORUMA ALDIN


In [13]:

logger.info("***** Running training *****")
logger.info(f"  Task = {args.dataset}@{args.num_labeled}")
logger.info(f"  Num Epochs = {args.epochs}")
logger.info(f"  Batch size per GPU = {args.batch_size}")
logger.info(f"  Total train batch size = {args.batch_size*args.world_size}")
logger.info(f"  Total optimization steps = {args.total_steps}")

model.zero_grad()
train(
    args,
    labeled_trainloader,
    unlabeled_trainloader,
    test_loader,
    model,
    optimizer,
    ema_model,
    scheduler,
)



05/30/2024 15:47:44 - INFO - __main__ -   ***** Running training *****
05/30/2024 15:47:44 - INFO - __main__ -     Task = cifar10@4000
05/30/2024 15:47:44 - INFO - __main__ -     Num Epochs = 1024
05/30/2024 15:47:44 - INFO - __main__ -     Batch size per GPU = 8
05/30/2024 15:47:44 - INFO - __main__ -     Total train batch size = 8
05/30/2024 15:47:44 - INFO - __main__ -     Total optimization steps = 1048576
  0%|          | 0/1024 [00:00<?, ?it/s]

RuntimeError: DataLoader worker (pid(s) 8680, 20416, 18584, 18972) exited unexpectedly