In [8]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
pip install -U "ray[tune]"

Collecting ray[tune]
  Downloading ray-1.11.0-cp37-cp37m-manylinux2014_x86_64.whl (52.7 MB)
[K     |████████████████████████████████| 52.7 MB 105 kB/s 
Collecting grpcio<=1.43.0,>=1.28.1
  Downloading grpcio-1.43.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.1 MB)
[K     |████████████████████████████████| 4.1 MB 47.2 MB/s 
[?25hCollecting redis>=3.5.0
  Downloading redis-4.2.2-py3-none-any.whl (226 kB)
[K     |████████████████████████████████| 226 kB 48.9 MB/s 
Collecting tensorboardX>=1.9
  Downloading tensorboardX-2.5-py2.py3-none-any.whl (125 kB)
[K     |████████████████████████████████| 125 kB 42.2 MB/s 
Collecting async-timeout>=4.0.2
  Downloading async_timeout-4.0.2-py3-none-any.whl (5.8 kB)
Collecting deprecated>=1.2.3
  Downloading Deprecated-1.2.13-py2.py3-none-any.whl (9.6 kB)
Installing collected packages: deprecated, async-timeout, redis, grpcio, tensorboardX, ray
  Attempting uninstall: grpcio
    Found existing installation: grpcio 1.44.0
    Uninst

In [2]:
import torch
from functools import partial
from torch import nn
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms
from torchvision.io import read_image
import os
import matplotlib.pyplot as plt
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
import csv
import numpy as np

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [4]:
def get_rgb(image):
    if image.shape[0] != 3:
        image = transforms.ToPILImage()(image).convert(mode='RGB')
        image = transforms.ToTensor()(image)
    return image

In [5]:
class WatermarkTrainDataset(Dataset):
    def __init__(self, dataset_dir, transform=None):
        self.positive_path = os.path.join(dataset_dir, 'positive')
        self.negative_path = os.path.join(dataset_dir, 'negative')
        self.positive_len = len(os.listdir(self.positive_path))
        self.negative_len = len(os.listdir(self.negative_path))
        # self.mean = self.get_mean()
        self.transform = transform

    def __len__(self):
        return self.positive_len + self.negative_len

    def __getitem__(self, idx):
        image_path, label = self.get_path(idx)
        image = read_image(image_path)
        image = get_rgb(image)
        if self.transform:
            image = self.transform(image)
        image = transforms.functional.convert_image_dtype(image=image, dtype=torch.float)
        return image, label

    def get_path(self, idx):
        if idx < self.positive_len:
            image_path = os.path.join(self.positive_path, f'Sample_{idx}.jpg')
            label = 1
        else:
            image_path = os.path.join(self.negative_path, 'Sample_{}.jpg'.format(idx-self.positive_len+1))
            label = 0
        return image_path, label

In [6]:
class WatermarkTestDataset(Dataset):
    def __init__(self, dataset_dir):
        self.dataset_path = os.path.join(dataset_dir)

    def __len__(self):
        return len(os.listdir(self.dataset_path))

    def __getitem__(self, idx):
        image_path = os.listdir(self.dataset_path)[idx]
        image_abs_path = os.path.join(self.dataset_path, image_path)
        image = read_image(image_abs_path)
        image = get_rgb(image)
        image = transforms.functional.convert_image_dtype(image=image, dtype=torch.float)
        return image, str(image_path)

In [9]:
train_val_dataset = WatermarkTrainDataset('/content/drive/MyDrive/dataset/train', transform=transforms.RandomHorizontalFlip())
train_size = int(0.85 * len(train_val_dataset))
val_size = len(train_val_dataset) - train_size
train_dataset, val_dataset = random_split(dataset=train_val_dataset, lengths=[train_size, val_size])
test_dataset = WatermarkTestDataset('/content/drive/MyDrive/dataset/test')

In [10]:
len(train_val_dataset)

8582

In [11]:
len(test_dataset)

1103

In [12]:
class InceptionModule(nn.Module):
    def __init__(self, Cin):
        super(InceptionModule, self).__init__()
        self.one = nn.Sequential(
            nn.Conv2d(in_channels=Cin, out_channels=int(3*Cin/4), kernel_size=1, stride=1)
        )

        self.two = nn.Sequential(
            nn.Conv2d(in_channels=Cin, out_channels=int(Cin/2), kernel_size=1, stride=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=int(Cin/2), out_channels=int(3*Cin/4), kernel_size=3, stride=1, padding=1)
        )

        self.three = nn.Sequential(
            nn.Conv2d(in_channels=Cin, out_channels=int(Cin/2), kernel_size=1, stride=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=int(Cin/2), out_channels=int(3*Cin/4), kernel_size=5, stride=1, padding=2)
        )

        self.four = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            nn.Conv2d(in_channels=Cin, out_channels=int(3*Cin/4), kernel_size=1, stride=1)
        )

        self.norm = nn.Sequential(
            nn.BatchNorm2d(num_features=3*Cin),
            nn.ReLU()
        )

    def forward(self, x):
        h1 = self.one(x)
        h2 = self.two(x)
        h3 = self.three(x)
        h4 = self.four(x)
        h = torch.cat(tensors=(h1, h2, h3, h4), dim=1)
        y = self.norm(h)
        return y


class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        self.one = nn.Sequential(   # 512
            nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, stride=2, padding=1),   # 255
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1), # 128
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=2, padding=1),     # 64
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),   # 32
            nn.BatchNorm2d(num_features=32),
        )

        self.two = nn.Sequential(
            InceptionModule(32),
            InceptionModule(96),
            InceptionModule(288)
        )

        self.three = nn.Sequential(
            nn.Conv2d(in_channels=3*288, out_channels=1024, kernel_size=3, stride=2, padding=1), # 16
            nn.ReLU(),
            nn.AvgPool2d(kernel_size=3, stride=2, padding=1), # 8
            nn.Dropout(p=0.5),
            nn.Flatten(),
            nn.Linear(in_features=1024*8*8, out_features=2)
        )

    def forward(self, x):
        y = self.three(self.two(self.one(x)))
        return y

In [13]:
def train_loop(train_dataloader, model, loss_fn, optimizer):
    train_loss, accuracy = 0, 0
    size = len(train_dataloader.dataset)
    num_batches = len(train_dataloader)

    model.train()
    for images, labels in train_dataloader:
        images, labels = images.to(device), labels.to(device)
        scores = model(images)

        loss = loss_fn(scores, labels)
        train_loss += loss.item()
        accuracy += (scores.argmax(dim=1) == labels).type(torch.float).sum().item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    train_loss /= num_batches
    accuracy = accuracy / size * 100

    return train_loss, accuracy


def val_loop(dataloader, model, loss_fn):
    loss, accuracy = 0, 0
    size = len(dataloader.dataset)
    num_batches = len(dataloader)

    model.eval()
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            scores = model(images)

            loss += loss_fn(scores, labels).item()
            accuracy += (scores.argmax(dim=1) == labels).type(torch.float).sum().item()


    loss /= num_batches
    accuracy = accuracy / size * 100

    return loss, accuracy

In [14]:
def test(dataloader, model):
    with open('output.csv', 'w') as file:
        writer = csv.writer(file)
        writer.writerow(['name', 'predicted'])

    model.eval()
    with torch.no_grad():
        for images, paths in dataloader:
            images = images.to(device)
            predictions = model(images).argmax(dim=1)
            with open('output.csv', 'a') as file:
                writer = csv.writer(file)
                writer.writerows([[path, prediction.item()] for prediction, path in zip(predictions, paths)])

In [15]:
def plot(train_loss, train_accuracy, val_loss, val_accuracy, epochs):
    epochs = np.arange(1,epochs+1, dtype=int)
    fig, ax = plt.subplots(ncols=2, figsize=(14, 6))
    #loss
    ax[0].scatter(epochs, np.array(train_loss), label='Train Loss')
    ax[0].scatter(epochs, np.array(val_loss), label='Validation Loss')
    ax[0].set_xlabel('Epoch number')
    ax[0].set_ylabel('Loss')
    ax[0].legend()
    #accuracy
    ax[1].scatter(epochs, np.array(train_accuracy), label='Train Accuracy')
    ax[1].scatter(epochs, np.array(val_accuracy), label='Validation Accuracy')
    ax[1].set_xlabel('Epoch number')
    ax[1].set_ylabel('Accuracy')
    ax[1].legend()
    #show
    plt.show()

In [16]:
def train(config, device, epochs, testing=False):
    model = Classifier().to(device)
    loss_fn = nn.CrossEntropyLoss().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=config['lr'], betas=(config['beta1'], config['beta2']))

    train_dataloader = DataLoader(dataset=train_dataset, batch_size=config['batch_size'], shuffle=True)
    val_dataloader = DataLoader(dataset=val_dataset, batch_size=config['batch_size'], shuffle=False)
    test_dataloader = DataLoader(dataset=test_dataset, batch_size=config['batch_size'], shuffle=False)

    train_loss_list, train_accuracy_list = [], []
    val_loss_list, val_accuracy_list = [], []
    best_val_accuracy = 0

    for epoch in range(epochs):
        train_loss, train_accuracy = train_loop(train_dataloader, model, loss_fn, optimizer)
        train_loss_list.append(train_loss)
        train_accuracy_list.append(train_accuracy)

        val_loss, val_accuracy = val_loop(val_dataloader, model, loss_fn)
        val_loss_list.append(val_loss)
        val_accuracy_list.append(val_accuracy)

        if not testing:
            tune.report(loss=val_loss, accuracy=val_accuracy, training_iteration=epoch+1)
        elif val_accuracy > best_val_accuracy:
            print(f'train_loss: {train_loss}, train_accuracy: {train_accuracy}, val_loss: {val_loss}, val_accuracy: {val_accuracy}')
            best_val_accuracy = val_accuracy
            best_model_dict = copy.deepcopy(model.state_dict())

    if testing:
        model.load_state_dict(best_model_dict)
        plot(train_loss_list, train_accuracy_list, val_loss_list, val_accuracy_list, epochs)
        test(test_dataloader, model)

In [17]:
def main(num_samples=8, max_num_epochs=8):
    config = {
        'lr': tune.loguniform(1e-4, 1e-2),
        'beta1': tune.uniform(0.9, 0.999),
        'beta2': tune.uniform(0.9, 0.999),
        'batch_size': tune.choice([16, 32])
    }

    scheduler = ASHAScheduler(
        metric='accuracy',
        mode='max',
        max_t=max_num_epochs,
        grace_period=4,
        reduction_factor=2
    )

    reporter = CLIReporter(
        metric_columns=['loss', 'accuracy', 'training_iteration'],
        parameter_columns=['lr', 'beta1', 'beta2', 'batch_size']
    )

    result = tune.run(
        partial(train, device=device, epochs=max_num_epochs),
        resources_per_trial={'cpu': 1, 'gpu': 0.03},
        config=config,
        num_samples=num_samples,
        scheduler=scheduler,
        progress_reporter=reporter
    )

    return result.get_best_trial('accuracy', 'max', 'last').config

In [None]:
config = main()

2022-04-12 12:07:12,265	INFO registry.py:70 -- Detected unknown callable for trainable. Converting to class.


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
| DEFAULT_15f10_00007 | TERMINATED | 172.28.0.2:10724 | 0.0111575   | 0.9056   | 0.904743 |           16 | 0.694463    |    48.913  |                    5 |
+---------------------+------------+------------------+-------------+----------+----------+--------------+-------------+------------+----------------------+


== Status ==
Current time: 2022-04-12 17:51:11 (running for 05:43:59.16)
Memory usage on this node: 4.7/12.7 GiB
Using AsyncHyperBand: num_stopped=8
Bracket: Iter 10.000: -0.6932952782002891 | Iter 5.000: -0.6933553058926653
Resources requested: 2.0/2 CPUs, 0.06/1 GPUs, 0.0/6.87 GiB heap, 0.0/3.44 GiB objects (0.0/1.0 accelerator_type:K80)
Result logdir: /root/ray_results/DEFAULT_2022-04-12_12-07-12
Number of trials: 10/10 (2 RUNNING, 8 TERMINATED)
+---------------------+------------+------------------+-------------+----------+----------+--------------+-------------+------------+----------------------+
| Trial n

2022-04-12 18:08:23,317	INFO tune.py:639 -- Total run time: 21671.06 seconds (21670.36 seconds for the tuning loop).


Result for DEFAULT_15f10_00008:
  accuracy: 51.16459627329193
  date: 2022-04-12_18-08-23
  done: true
  experiment_id: 951fec885d134e8b96a1ad5fe38b06ea
  hostname: 511ca5cfaff6
  iterations_since_restore: 5
  loss: 47607302861.012344
  node_ip: 172.28.0.2
  pid: 11338
  should_checkpoint: true
  time_since_restore: 4398.831380367279
  time_this_iter_s: 879.8844528198242
  time_total_s: 4398.831380367279
  timestamp: 1649786903
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 15f10_00008
  
== Status ==
Current time: 2022-04-12 18:08:23 (running for 06:01:10.39)
Memory usage on this node: 3.4/12.7 GiB
Using AsyncHyperBand: num_stopped=10
Bracket: Iter 10.000: -0.6936375529552146 | Iter 5.000: -0.6935556638639255
Resources requested: 0/2 CPUs, 0/1 GPUs, 0.0/6.87 GiB heap, 0.0/3.44 GiB objects (0.0/1.0 accelerator_type:K80)
Result logdir: /root/ray_results/DEFAULT_2022-04-12_12-07-12
Number of trials: 10/10 (10 TERMINATED)
+---------------------+------------+-------------

ValueError: ignored

In [None]:
train(config=config, device=device, epochs=30, testing=True)

In [19]:
config = {
        'lr': 0.000583759,
        'beta1': 0.935418,
        'beta2': 0.996841,
        'batch_size': 16
}
train(config=config, device=device, epochs=30, testing=True)

train_loss: 0.649768807647521, train_accuracy: 66.2599396764464, val_loss: 0.46007429633611513, val_accuracy: 81.83229813664596
train_loss: 0.43929519086030494, train_accuracy: 81.53276665752674, val_loss: 0.4088257795866625, val_accuracy: 84.16149068322981
train_loss: 0.43132658420424713, train_accuracy: 82.01261310666301, val_loss: 0.46895621753769157, val_accuracy: 84.3944099378882
train_loss: 0.40945343173256044, train_accuracy: 83.27392377296408, val_loss: 0.3825042635938268, val_accuracy: 85.79192546583852


KeyboardInterrupt: ignored