In [1]:
'''
Surface Defect Detection using Convolutional AutoEncoder
'''
import torch
import torchvision
import torch.nn as nn
from torchvision.datasets import ImageFolder
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import pathlib
import os
from tqdm import tqdm

In [2]:

'''
Train/Test Data Preparation
'''
# normal dataset path
TRAIN_DATASET_PATH = pathlib.Path(os.path.dirname(os.path.realpath('__file__'))).parent.parent / "dataset" / "SDD" / "luxteel" / "ae_dataset" / "train"
TEST_DATASET_PATH = pathlib.Path(os.path.dirname(os.path.realpath('__file__'))).parent.parent / "dataset" / "SDD" / "luxteel" / "ae_dataset" / "test"

# set dataloader
DATA_BATCH_SIZE = 10
dataset = ImageFolder(TRAIN_DATASET_PATH, transform=transforms.ToTensor())
dataloader = DataLoader(dataset=dataset, batch_size=DATA_BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)

# calculate mean and std for normalization
_sum_channels = 0
_squared_sum_channels = 0
_total_batches = 0

for data, _ in tqdm(dataloader):
    _sum_channels += torch.mean(data, dim=[0,2,3]) # calc mean for each channels (dim=Batch, Channel, Height, Width)
    _squared_sum_channels += torch.mean(data**2, dim=[0,2,3])
    _total_batches += 1

mean = _sum_channels / _total_batches
std = (_squared_sum_channels / _squared_sum_channels - mean ** 2) ** 0.5
print(_sum_channels, _squared_sum_channels, _total_batches)
print(f"Mean : {mean}, Std:{std}")


100%|██████████| 51/51 [00:07<00:00,  6.51it/s]

tensor([15.0587, 15.0587, 15.0587]) tensor([4.7590, 4.7590, 4.7590]) 51
Mean : tensor([0.2953, 0.2953, 0.2953]), Std:tensor([0.9554, 0.9554, 0.9554])





In [8]:
'''
Convolutional Auto Encoder
'''
import torch.nn.functional as F
from torchinfo import summary

EPOCH = 10
BATCH_SIZE = 10
LEARNING_RATE = 0.001

class ConvAutoencoder(nn.Module):
    def __init__(self, channels):
        super(ConvAutoencoder, self).__init__()
        
        # Encoder
        self.conv_1 = nn.Conv2d(in_channels=channels, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.conv_2 = nn.Conv2d(in_channels=128, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.conv_3 = nn.Conv2d(in_channels=64, out_channels=32, kernel_size=3, stride=1, padding=1) # same dimension?
        self.pool_1 = nn.MaxPool2d(kernel_size=2, padding='same')
        self.pool_2 = nn.MaxPool2d(kernel_size=2, padding='same')
        
        # Decoder
        self.upscale_1 = nn.ConvTranspose2d(in_channels=32, out_channels=64, kernel_size=3, stride=2, padding=1)
        self.upscale_2 = nn.ConvTranspose2d(in_channels=64, out_channels=128, kernel_size=3, stride=2, padding=1)
        self.upscale_3 = nn.ConvTranspose2d(in_channels=128, out_channels=channels, kernel_size=3, stride=2, padding=2)
        
    
    # encoder part        
    def encoder(self, image): # 640x400x3
        conv1 = self.conv_1(image)
        relu1 = F.relu(conv1) # 640x400x128
        pool1 = self.pool1(relu1) # 320x200x128
  
        conv2 = self.conv_2(pool1)
        relu2 = F.relu(conv2) # 320x200x64
        pool2 = self.pool1(relu2) #160x100x64
        
        conv3 = self.conv_3(pool2)
        relu3 = F.relu(conv3) # 160x100x32
        pool3 = self.pool2(relu3) # 80x50x32
        
        #pool3 = pool3.view([image.size(0), 128, 80, 40]).cuda()
        return pool3
    
    # decoder part
    def decoder(self, encoding):
        up1 = self.upscale_1(encoding) # 160x100x64
        up_relu1 = F.relu(up1)
        
        up2 = self.upscale_2(up_relu1) # 320x200x128
        up_relu2 = F.relu(up2)
        
        up3 = self.upscale_3(up_relu2) # 640x400x3
        up_relu3 = F.relu(up3)
        
        return up_relu3
        
        # logits = self.conv(up_relu3)
        # logits = F.sigmoid(logits)
        # logits = logits.view([encoding.size(0), 1, 28, 28]).cuda()
        # return logits
    
    def forward(self, image):
        encoding = self.encoder(image)
        reconst = self.decoder(encoding)
        return encoding, reconst
    
    
model = ConvAutoencoder(channels=3)
summary(model, input_size=(3, 640, 400))

RuntimeError: Failed to run torchinfo. See above stack traces for more details. Executed layers up to: [Conv2d: 1]

In [None]:
'''
Training Phase
'''
import torch.optim as optim
from torch.autograd import Variable

def train(data_loader, size, model, criterion, optimizer, num_epochs=20):
	print('Start training')
	for epoch in range(num_epochs):
		print('Epoch {}/{}'.format(epoch, num_epochs-1))
		tloss = 0.0
		for data in data_loader:
			inputs, _ = data
			optimizer.zero_grad()
			encoding, logits = model(Variable(inputs.cuda()))
			loss = criterion(logits, Variable(inputs.cuda()))
			loss.backward()
			optimizer.step()
			tloss += loss.data[0]
		epoch_loss = tloss/size
		print('Epoch loss: {:4f}'.format(epoch_loss))
	print('Complete training')
	return model

# create model instance and upload into cuda device
model = ConvAutoencoder().cuda()

# 
criterion = nn.BCELoss()
size = len(dataset)

optimizer_fn = optim.Adam
optimizer = optimizer_fn(model.parameters(), lr=LEARNING_RATE)
model = train(dataloader, size, model, criterion, optimizer, num_epochs=EPOCH)

# test_image = random.choice(test_data)
# test_image = Variable(test_image[0].unsqueeze(0).cuda())
# _, out = model(test_image)

torchvision.utils.save_image(test_image.data, 'in.png')
torchvision.utils.save_image(out.data, 'out.png')

In [None]:

# set dataloader
DATA_BATCH_SIZE = 10

dataset = ImageFolder(TRAIN_DATASET_PATH, transform=transforms.ToTensor())
dataloader = DataLoader(dataset=dataset, batch_size=DATA_BATCH_SIZE)


# calc mean and std for image normalization
_sum_channels = 0
_squared_sum_channels = 0
_total_batches = 0

for data, _ in tqdm(dataloader):
    _sum_channels += torch.mean(data, dim=[0,2,3]) # calc mean for each channels (dim=Batch, Channel, Height, Width)
    _squared_sum_channels += torch.mean(data**2, dim=[0,2,3])
    _total_batches += 1

mean = _sum_channels / _total_batches
std = (_squared_sum_channels / _squared_sum_channels - mean ** 2) ** 0.5
print(_sum_channels, _squared_sum_channels, _total_batches)
print(f"Mean : {mean}, Std:{std}")


# normalization(0~1) of dataset images
# normalization이 필요할까 모르겠다
stats = (tuple(mean.tolist()), tuple(std.tolist()))
train_transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(*stats, inplace=True)])

train_dataset = ImageFolder(TRAIN_DATASET_PATH.as_posix(), train_transform)
train_dataloader = DataLoader(train_dataset, batch_size=DATA_BATCH_SIZE, shuffle=False, num_workers=4, pin_memory=True)