In [5]:
import os 
os.chdir("/workspace/")
from PIL import Image
import numpy as np
import torch
from torch import Tensor, nn
from torchvision.transforms import functional as F, InterpolationMode
import dct_manip as dm
import torchvision.transforms as transforms
import utils.custom_transforms as ctrans

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
from data import get_dataloader
import argparse
from utils import configs

In [3]:
parser = argparse.ArgumentParser(description="JPEG-AnoViT")

# model config
parser.add_argument('--model_arch', type=str, default='vits', help='Model architecture (vitti, vits, vitb, vitl, swinv2)')
parser.add_argument('--no_subblock', action='store_true', help='If set, disable subblock conversion')
parser.add_argument("--embed_type", type=int, default=1, help='Embedding layer type. (1: grouped, 2: separate, 3: concatenate). Default 1')
parser.add_argument("--domain", type=str, default="dct", help="(DCT/RGB) Choose domain type")
parser.add_argument("--image_format", type=str, default="JPEG", help="(JPEG/PNG) Choose image format")

parser.add_argument("--configs", type=str, default=None, help="exp config file")
parser.add_argument("--device", type=str, default="cuda", help="gpu id")
parser.add_argument("--img_size", type=int, default=224, help="image size")
parser.add_argument("--patch_size", type=int, default=16, help="patch size")

# training config
parser.add_argument("--seed", type=int, default=42, help="random seed")
parser.add_argument("--epochs", type=int, default=500, help="number of training epochs")
parser.add_argument("--batch", type=int, default=32, help="batch size")
parser.add_argument("--lr", type=float, default=0.001, help="learning rate")
parser.add_argument("--wd", type=float, default=0.05, help="weight decay")
parser.add_argument("--drop", type=float, default=0.0, help="dropout rate")
parser.add_argument("--warmup_steps", type=int, default=100, help="warmup steps")


args = parser.parse_args([])

In [6]:
cfg = configs.generate_config(
    modelarch = args.model_arch.lower(),
    domain = args.domain,
    image_format = args.image_format,
    target = "bottle",
    modelver=args.embed_type,
    subblock=True if not args.no_subblock else False,
    epochs=None if args.epochs < 0 else args.epochs, # need to add
    batchsize=1 if args.batch < 0 else args.batch, # need to change order
    lr=None if args.lr < 0 else args.lr,
    wd=None if args.wd < 0 else args.wd,
    drop=None if args.drop < 0 else args.drop,
    warmup_steps=None if args.warmup_steps < 0 else args.warmup_steps, # need to add
    seed=None if args.seed < 0 else args.seed, # need to add
)

In [7]:
import os
os.chdir("/workspace/")
from data import MvtecAd

In [9]:
train_dataset = MvtecAd(
    datadir = cfg.DATASET.DATADIR,
    target = cfg.DATASET.TARGET,
    is_train=True,
    resize=cfg.MODEL.IMG_SIZE,
    image_domain=cfg.MODEL.DOMAIN,
    image_format=cfg.MODEL.IMAGE_FORMAT,
)

In [10]:
train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=cfg.TRAIN.BATCHSIZE,
    )

In [11]:
for img, mask, target in train_loader:
    print(img.shape)
    print(mask.shape)
    print(target.shape)
    break

AttributeError: 'list' object has no attribute 'shape'

In [20]:
img[0].shape

torch.Size([32, 1, 28, 28, 8, 8])

In [21]:
img[1].shape

torch.Size([32, 2, 14, 14, 8, 8])

In [24]:
mask.shape

torch.Size([32, 224])

In [25]:
target.shape

torch.Size([32])

In [36]:
def dct_to_rgb(coeff):
    """
    Args:
        coeff (Tensor or tuple): dct coefficient with five channels in (c, h, w, kh, kw)
                                if tuple, (Y, cbcr) is expected.

    Returns:
        Tensor: Converted YCbCr to RGB data
    """
    Y, cbcr = coeff
    assert Y.dtype == torch.float32 and cbcr.dtype == torch.float32, f"Y and CbCr dtype should be torch.float32. Current:{Y.dtype}, {cbcr.dtype}"

    ### Convert using JPEG encoder ###
    _, H, W, KH, KW = Y.shape
    _, CH, CW, _, _ = cbcr.shape
    dim_inferred = torch.tensor([[H*KH, W*KW], [CH*KH, CW*KW], [CH*KH, CW*KW]], dtype=torch.int32)
    quant_100 = 2*torch.ones((3,8,8), dtype=torch.int16)
    RGBimg = dm.decode_coeff(
        dim_inferred, quant_100, 
        (Y/2).round().to(torch.int16).clamp(min=-1024, max=1016).contiguous(), 
        (cbcr/2).round().to(torch.int16).clamp(min=-1024, max=1016).contiguous()
        )
    return RGBimg


In [20]:
def dct_to_rgb_batch(coeff_batch):
    """
    Args:
        coeff_batch (Tensor or tuple): dct coefficient batch with five channels in (b, c, h, w, kh, kw)
                                       if tuple, (Y, cbcr) is expected.

    Returns:
        Tensor: Batch of converted YCbCr to RGB data
    """
    Y_batch, cbcr_batch = coeff_batch
    assert Y_batch.dtype == torch.float32 and cbcr_batch.dtype == torch.float32, f"Y and CbCr dtype should be torch.float32. Current:{Y_batch.dtype}, {cbcr_batch.dtype}"

    B, _, H, W, KH, KW = Y_batch.shape
    _, _, CH, CW, _, _ = cbcr_batch.shape

    # Move tensors to the device Y_batch is on (assumed to be the CUDA device).
    device = Y_batch.device
    dim_inferred = torch.tensor([[H*KH, W*KW], [CH*KH, CW*KW], [CH*KH, CW*KW]], dtype=torch.int32).to(device).expand(B, -1, -1)
    quant_100 = 2 * torch.ones((B, 3, 8, 8), dtype=torch.int16).to(device)

    RGBimg_list = []

    for i in range(B):
        RGBimg = dm.decode_coeff(
            dim_inferred[i], 
            quant_100[i],
            (Y_batch[i]/2).round().to(torch.int16).clamp(min=-1024, max=1016).contiguous(),
            (cbcr_batch[i]/2).round().to(torch.int16).clamp(min=-1024, max=1016).contiguous()
        )
        RGBimg_list.append(RGBimg)

    return torch.stack(RGBimg_list).to(device)


In [22]:
dct_to_rgb_batch(img).shape

torch.Size([32, 3, 224, 224])

In [23]:
mask.shape

torch.Size([32, 224])

In [18]:
B, _, H, W, KH, KW = Y_batch.shape

In [19]:
_, CH, CW, _, _ = cbcr_batch.shape

ValueError: too many values to unpack (expected 5)

In [3]:
PNG = "./dataset/mvtec_ad/bottle/train/good/060.png"

In [7]:
img = Image.open(PNG)

In [9]:
dm.quantize_at_quality(F.pil_to_tensor(img), quality=100)

(tensor([[900, 900],
         [450, 450],
         [450, 450]], dtype=torch.int32),
 tensor([[[1, 1, 1, 1, 1, 1, 1, 1],
          [1, 1, 1, 1, 1, 1, 1, 1],
          [1, 1, 1, 1, 1, 1, 1, 1],
          [1, 1, 1, 1, 1, 1, 1, 1],
          [1, 1, 1, 1, 1, 1, 1, 1],
          [1, 1, 1, 1, 1, 1, 1, 1],
          [1, 1, 1, 1, 1, 1, 1, 1],
          [1, 1, 1, 1, 1, 1, 1, 1]],
 
         [[1, 1, 1, 1, 1, 1, 1, 1],
          [1, 1, 1, 1, 1, 1, 1, 1],
          [1, 1, 1, 1, 1, 1, 1, 1],
          [1, 1, 1, 1, 1, 1, 1, 1],
          [1, 1, 1, 1, 1, 1, 1, 1],
          [1, 1, 1, 1, 1, 1, 1, 1],
          [1, 1, 1, 1, 1, 1, 1, 1],
          [1, 1, 1, 1, 1, 1, 1, 1]],
 
         [[1, 1, 1, 1, 1, 1, 1, 1],
          [1, 1, 1, 1, 1, 1, 1, 1],
          [1, 1, 1, 1, 1, 1, 1, 1],
          [1, 1, 1, 1, 1, 1, 1, 1],
          [1, 1, 1, 1, 1, 1, 1, 1],
          [1, 1, 1, 1, 1, 1, 1, 1],
          [1, 1, 1, 1, 1, 1, 1, 1],
          [1, 1, 1, 1, 1, 1, 1, 1]]], dtype=torch.int16),
 tensor([[[[[1016,    0,

In [None]:
# change all the jpeg size 900 to 224
data_dir = 