I am trying to reimplement a PyTorch code of doing Deep One-Class Classification based on the paper Deep SVDD using PyTorch. In my reimplementation, I will mainly using 

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch import nn
from torch.utils.data import DataLoader, Dataset, Subset
from torchvision import datasets
from torchvision.transforms import ToTensor, Normalize, Compose, Lambda
from torch.nn.functional import normalize

In [2]:
# Download MNIST Dataset

training_data = datasets.MNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

test_data = datasets.MNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
)

In [3]:
# Set hardware

device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)

In [4]:
class MNIST_LeNet_AutoEncoder(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.rep_dim = 32
        self.pool = nn.MaxPool2d(2, 2)
        
        # Encoder: Same as Deep Out-of-Context (OOC) network
        self.conv1 = nn.Conv2d(1, 8, 5, bias=False, padding=2)
        self.bn1 = nn.BatchNorm2d(8, eps=1e-04, affine=False)
        self.conv2 = nn.Conv2d(8, 4, 5, bias=False, padding=2)
        self.bn2 = nn.BatchNorm2d(4, eps=1e-04, affine=False)
        self.fc1 = nn.Linear(4 * 7 * 7, self.rep_dim, bias=False)
        
        # Decoder
        self.deconv1 = nn.ConvTranspose2d(2, 4, 5, bias=False, padding=2)
        self.bn3 = nn.BatchNorm2d(4, eps=1e-04, affine=False)
        self.deconv2 = nn.ConvTranspose2d(4, 8, 5, bias=False, padding=3)
        self.bn4 = nn.BatchNorm2d(8, eps=1e-04, affine=False)
        self.deconv3 = nn.ConvTranspose2d(8, 1, 5, bias=False, padding=2)
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.pool(nn.LeakyReLU(self.bn1(x)))
        x = self.conv2(x)
        x = self.pool(nn.LeakyReLU(self.bn2(x)))
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = x.view(x.size(0), int(self.rep_dim / 16), 4, 4)
        x = nn.functional.interpolate(nn.LeakyReLU(x), scale_factor=2)
        x = self.deconv1(x)
        x = nn.functional.interpolate(nn.LeakyReLU(self.bn3(x)), scale_factor=2)
        x = self.deconv2(x)
        x = nn.functional.interpolate(nn.LeakyReLU(self.bn4(x)), scale_factor=2)
        x = self.deconv3(x)
        x = torch.sigmoid(x)
        
        return x

In [5]:
class MNIST_LeNet_Network(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.rep_dim = 32
        self.pool = nn.MaxPool2d(2, 2)
        
        self.conv1 = nn.Conv2d(1, 8, 5, bias=False, padding=2)
        self.bn1 = nn.BatchNorm2d(8, eps=1e-04, affine=False)
        self.conv2 = nn.Conv2d(8, 4, 5, bias=False, padding=2)
        self.bn2 = nn.BatchNorm2d(4, eps=1e-04, affine=False)
        self.fc1 = nn.Linear(4 * 7 * 7, self.rep_dim, bias=False)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.pool(nn.LeakyReLU(self.bn1(x)))
        x = self.conv2(x)
        x = self.pool(nn.LeakyReLU(self.bn2(x)))
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        return x        

In [6]:
net = MNIST_LeNet_Network().to(device)
print(net)
ae = MNIST_LeNet_AutoEncoder().to(device)
print(ae)

MNIST_LeNet_Network(
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv1): Conv2d(1, 8, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), bias=False)
  (bn1): BatchNorm2d(8, eps=0.0001, momentum=0.1, affine=False, track_running_stats=True)
  (conv2): Conv2d(8, 4, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), bias=False)
  (bn2): BatchNorm2d(4, eps=0.0001, momentum=0.1, affine=False, track_running_stats=True)
  (fc1): Linear(in_features=196, out_features=32, bias=False)
)
MNIST_LeNet_AutoEncoder(
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv1): Conv2d(1, 8, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), bias=False)
  (bn1): BatchNorm2d(8, eps=0.0001, momentum=0.1, affine=False, track_running_stats=True)
  (conv2): Conv2d(8, 4, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), bias=False)
  (bn2): BatchNorm2d(4, eps=0.0001, momentum=0.1, affine=False, track_running_stats=True)
  (fc1): Lin

In [8]:
# Normalization of the MNIST Dataset

# Calculate the mean and standard deviation of the pixel values in the training dataset
train_data = torch.stack([t[0] for t in training_data])
mean = train_data.mean()
std = train_data.std()

# Define a new PyTorch transform that performs the same operation as the numpy-based GCN.
def GlobalContrastNormalization(tensor, scale='l2'):
    assert scale in ('l1', 'l2')
    n_features = int(torch.prod(tensor.shape))
            
    tensor = tensor - tensor.mean()
    
    if (scale == 'l1'):
        tensor = tensor / torch.mean(torch.abs(tensor))
    
    if (scale == 'l2'):
        tensor = tensor / torch.sqrt(torch.sum(tensor ** 2) / n_features)
    
    return tensor
    
    # contrast = torch.sqrt(self.lmda + (tensor**2).mean())       # calculating contrast
    # tensor = self.s * tensor / max(contrast, self.epsilon)      # make sure contrast larger than 0

# # Apply Global Contrast Normalization to the training and test datasets
# gcn = GlobalContrastNormalization()
# training_data.transform = Compose([ToTensor(), gcn])
# test_data.transform = Compose([ToTensor(), gcn])

transform = Compose([ToTensor(), Lambda(lambda x: GlobalContrastNormalization(x, scale='l1')), Normalize([0], [1])])
target_transform = Lambda(lambda x: int())

SyntaxError: invalid syntax (527837196.py, line 9)

In [None]:
# Pretrain with AutoEncoder first

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(ae.parameters(), lr=0.001)

# Number of epochs
epochs = 10

# Training loop
for epoch in range(epochs):
    running_loss = 0.0
    for images, _ in training_data:
        # Move images to the device
        images = images.to(device)
        
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = ae(images)
        loss = criterion(outputs, images)
        
        # Backward pass and optimize
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    # Print loss after each epoch
    print(f'Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(training_data)}')
