In [1]:
import numpy as np
import os

import torch
import torch.nn as nn
import torch.optim as optim

from random import randint, random
from PIL import Image
from torch.utils.data import DataLoader, TensorDataset

In [2]:
class CustomCNN(nn.Module):
    def __init__(self, num_classes, dropout_prob=0.4):
        super(CustomCNN, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1, dtype=torch.float16)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1, dtype=torch.float16)
        
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc1 = nn.Linear(64 * 149 * 149, 128, dtype=torch.float16)
        self.fc2 = nn.Linear(128, num_classes, dtype=torch.float16)
        
        self.dropout = nn.Dropout(p=dropout_prob)
        
    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 64 * 149 * 149)
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x


In [3]:
def random_blocking(image):
    mx = image.shape[-1]
    
    block_nums = randint(2, 4) # maybe change
    for b in range(block_nums):
        x_block_size = randint(4, 7) # maybe change
        y_block_size = randint(4, 7)
        
        x_start = randint(0, mx - x_block_size - 1)
        y_start = randint(0, mx - y_block_size - 1)
        
        for x in range(x_start, x_start + x_block_size):
            for y in range(y_start, y_start + y_block_size):
                image[0][x][y] = 0
                image[1][x][y] = 0
                image[2][x][y] = 0
                
    return image

In [4]:
def random_flipping(image):
    flips = randint(0, 3)
    return np.rot90(image, k=flips, axes=(1, 2))

In [5]:
def random_coloring(image):
    image[0] *= random()
    image[1] *= random()
    image[2] *= random()
    return image

In [6]:
def get_image_data(directory_path):
    data = []

    for filename in os.listdir(directory_path):
        file_path = os.path.join(directory_path, filename)

        if os.path.isfile(file_path):
            im = Image.open(file_path)
            img = np.asarray(im, dtype=np.float16) / 255.
            data += [img]

    return np.stack(data, axis=0).transpose((0, 3, 1, 2))

In [7]:
def get_one_hot_encoding(n, isdog):
    onehot = None
    if isdog:
        onehot = np.stack([[1, 0] for _ in range(n)], axis=0)
    else:
        onehot = np.stack([[0, 1] for _ in range(n)], axis=0)
    return torch.from_numpy(onehot).type(torch.float16)

In [8]:
def augment_images(images):
    augs = []
    
    for img in images:
        augs += [np.copy(img), random_flipping(random_blocking(np.copy(img))), random_coloring(random_flipping(random_blocking(np.copy(img))))]
            
    augmented_images = np.stack(augs, axis=0)    
    return torch.from_numpy(augmented_images).type(torch.float16)

In [9]:
def to_torch(arr):
    return torch.from_numpy(arr).type(torch.float16)

In [10]:
train_not_data = get_image_data('hotdog-nothotdog/train/nothotdog')
test_not_data = get_image_data('hotdog-nothotdog/test/nothotdog')
train_hot_data = get_image_data('hotdog-nothotdog/train/hotdog')
test_hot_data = get_image_data('hotdog-nothotdog/test/hotdog') 

In [11]:
train_not_augment = augment_images(train_not_data)
train_hot_augment = augment_images(train_hot_data)

In [12]:
train_not_onehot = get_one_hot_encoding(train_not_augment.shape[0], False)
test_not_onehot = get_one_hot_encoding(test_not_data.shape[0], False)
train_hot_onehot = get_one_hot_encoding(train_hot_augment.shape[0], True)
test_hot_onehot = get_one_hot_encoding(test_hot_data.shape[0], True)

In [13]:
train_data_tensor = torch.cat((train_not_augment, train_hot_augment), dim=0).type(torch.float32)
train_labels_tensor = torch.cat((train_not_onehot, train_hot_onehot), dim=0).type(torch.float32)
test_data_tensor = torch.cat((to_torch(test_not_data), to_torch(test_hot_data)), dim=0).type(torch.float32)
test_labels_tensor = torch.cat((test_not_onehot, test_hot_onehot), dim=0).type(torch.float32)

In [14]:
batch_size = 32

train_dataset = TensorDataset(train_data_tensor, train_labels_tensor)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = TensorDataset(test_data_tensor, test_labels_tensor)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [15]:
model = CustomCNN(num_classes=10)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.05)

for epoch in range(10):
    model.train()
    for inputs, labels in train_dataloader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_dataloader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print("Accuracy on the test dataset: " + str(accuracy) + "%")

RuntimeError: "slow_conv2d_cpu" not implemented for 'Half'