## Install Libraries


In [1]:
%pip install "ray[tune]"
%pip install deepaugment
%pip install randaugment
# %pip install torch torchvision torchaudio # Uncomment this to install PyTorch 2.0+ on ilab (required version to run this program)

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting ray[tune]
  Downloading ray-2.3.1-cp39-cp39-manylinux2014_x86_64.whl (58.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.6/58.6 MB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
Collecting frozenlist
  Downloading frozenlist-1.3.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (158 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m158.8/158.8 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting virtualenv>=20.0.24
  Downloading virtualenv-20.21.0-py3-none-any.whl (8.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.7/8.7 MB[0m [31m46.0 MB/s[0m eta [36m0:00:00[0m
Collecting aiosignal
  Downloading aiosignal-1.3.1-py3-none-any.whl (7.6 kB)
Collecting tensorboardX>=1.9
  Downloading tensorboardX-2.6-py2.py3-none-any.whl (114 kB)
[2K     [90m━━━━━━━━━━━━━━━

## Import Libraries


In [2]:
from filelock import FileLock
from ray import tune
from ray.air import session
from ray.air.checkpoint import Checkpoint
from torch.utils.data import random_split
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from collections import OrderedDict
import numpy as np
from torch.utils.data import ConcatDataset


## Data loaders


In [3]:
# new for project
class Cutout:
    def __init__(self, n_holes, length):
        self.n_holes = n_holes
        self.length = length

    def __call__(self, img):
        h = img.shape[0]
        w = img.shape[1]
        d = img.shape[2]
        mask = np.ones((h, w, d), np.float32)

        for n in range(self.n_holes):
            y = np.random.randint(h)
            x = np.random.randint(w)

            y1 = np.clip(y - self.length // 2, 0, h)
            y2 = np.clip(y + self.length // 2, 0, h)
            x1 = np.clip(x - self.length // 2, 0, w)
            x2 = np.clip(x + self.length // 2, 0, w)

            mask[y1:y2, x1:x2, 0:d] = 0.

        mask = torch.from_numpy(mask)
        mask = mask.expand_as(img)
        img *= mask

        return img

#from deepaugment.deepaugment import DeepAugment

# Define mixup function
def mixup_data(x, y, alpha=1.0):
    lam = np.random.beta(alpha, alpha)
    batch_size = x.size()[0]
    index = torch.randperm(batch_size)
    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam

In [4]:
def load_data(config, data_dir="./data"):

    transform_aug = transforms.Compose([
                #ImageNetPolicy(),
                #CIFAR10Policy(),
                transforms.ToTensor(),
                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), # to be consistent with others
                Cutout(n_holes=1, length=16),
            ])



    transform = transforms.Compose(
        [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
    )

    with FileLock(os.path.expanduser("~/.data.lock")):
        trainset_orig = torchvision.datasets.CIFAR10(
            root=data_dir, train=True, download=True, transform=transform
        )

        trainset_aug = torchvision.datasets.CIFAR10(
            root=data_dir, train=True, download=True, transform=transform_aug
        )       

        testset = torchvision.datasets.CIFAR10(
            root=data_dir, train=False, download=True, transform=transform
        )

        if config['add_aug'] == True and config['increase_data'] == True:
          trainset = ConcatDataset([trainset_orig, trainset_aug])
        elif config['add_aug'] == True:
          trainset = trainset_aug
        else:
          trainset = trainset_orig


    return trainset, testset


## Configurable neural network


In [5]:
def flatten(x, start_dim=1, end_dim=-1):
    return x.flatten(start_dim=start_dim, end_dim=end_dim)


In [6]:
# Will need to modify the parameters and the network depending on what you are experimenting with

C, H, W = 3, 32, 32
num_classes = 10

channel_1 = 32
channel_2 = 64
channel_3 = 128
pool_kernel_size = 2

kernel_size_1 = 3
kernel_size_2 = 5
kernel_size_3 = 7

pad_size_1 = 2
pad_size_2 = 3
pad_size_3 = 3

fc_count_1 = 1024

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, channel_1, kernel_size_1, padding=pad_size_1)
        self.norm1 = nn.BatchNorm2d(channel_1)
        self.pool1 = nn.MaxPool2d(pool_kernel_size)
        self.conv2 = nn.Conv2d(channel_1, channel_2, kernel_size_2, padding=pad_size_2)
        self.norm2 = nn.BatchNorm2d(channel_2)
        self.pool2 = nn.MaxPool2d(pool_kernel_size)
        self.conv3 = nn.Conv2d(channel_2, channel_3, kernel_size_3, padding=pad_size_3)
        self.norm3 = nn.BatchNorm2d(channel_3)
        self.pool3 = nn.MaxPool2d(pool_kernel_size)        

        H_after_conv1 = int(H + 2*pad_size_1 - 1*(kernel_size_1-1) - 1 + 1)
        W_after_conv1 = int(W + 2*pad_size_1 - 1*(kernel_size_1-1) - 1 + 1)
        H_after_pool1 = int((H_after_conv1 + 2*0 - 1*(pool_kernel_size-1) - 1) / pool_kernel_size + 1)
        W_after_pool1 = int((W_after_conv1 + 2*0 - 1*(pool_kernel_size-1) - 1) / pool_kernel_size + 1)
        H_after_conv2 = int(H_after_pool1 + 2*pad_size_2 - 1*(kernel_size_2-1) - 1 + 1)
        W_after_conv2 = int(W_after_pool1 + 2*pad_size_2 - 1*(kernel_size_2-1) - 1 + 1)
        H_after_pool2 = int((H_after_conv2 + 2*0 - 1*(pool_kernel_size-1) - 1) / pool_kernel_size + 1)
        W_after_pool2 = int((W_after_conv2 + 2*0 - 1*(pool_kernel_size-1) - 1) / pool_kernel_size + 1)  
        H_after_conv3 = int(H_after_pool2 + 2*pad_size_3 - 1*(kernel_size_3-1) - 1 + 1)
        W_after_conv3 = int(W_after_pool2 + 2*pad_size_3 - 1*(kernel_size_3-1) - 1 + 1)
        H_after_pool3 = int((H_after_conv3 + 2*0 - 1*(pool_kernel_size-1) - 1) / pool_kernel_size + 1)
        W_after_pool3 = int((W_after_conv3 + 2*0 - 1*(pool_kernel_size-1) - 1) / pool_kernel_size + 1)

        self.fc1 = nn.Linear(H_after_pool3*W_after_pool3*channel_3, fc_count_1)
        self.fc2 = nn.Linear(fc_count_1, fc_count_1)
        self.fc3 = nn.Linear(fc_count_1, 10)


    def forward(self, x):
        x = self.conv1(x)
        x = self.norm1(x)
        x = F.relu(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.norm2(x)
        x = F.relu(x)
        x = self.pool2(x)
        x = self.conv3(x)
        x = self.norm3(x)
        x = F.relu(x)
        x = self.pool3(x)
        x = flatten(x)
        x = self.fc1(x)
        #x = F.relu(x)
        x = self.fc2(x)
        #x = F.relu(x)
        x = self.fc3(x)

        return x



## The train function


In [7]:
def train_cifar(config):
    net = Net()  # Will need to modify the parameters depending on what you are experimenting with

    learning_rate = 0.015221
    momentum = 0.900000
    weight_decay = 0.001

    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if torch.cuda.device_count() > 1:
            net = nn.DataParallel(net)
    net.to(device)

    

    criterion = nn.CrossEntropyLoss()

    optimizer = optim.SGD(net.parameters(), lr=learning_rate, 
                weight_decay=weight_decay,
                momentum=momentum, nesterov=True)    


    # Do not modify any code below!
    loaded_checkpoint = session.get_checkpoint()
    if loaded_checkpoint:
        with loaded_checkpoint.as_directory() as loaded_checkpoint_dir:
            model_state, optimizer_state = torch.load(
                os.path.join(loaded_checkpoint_dir, "checkpoint.pt")
            )
            net.load_state_dict(model_state)
            optimizer.load_state_dict(optimizer_state)

    data_dir = os.path.abspath("./data")
    trainset, testset = load_data(config, data_dir)

    test_abs = int(len(trainset) * 0.8)
    train_subset, val_subset = random_split(
        trainset, [test_abs, len(trainset) - test_abs]
    )

    trainloader = torch.utils.data.DataLoader(
        train_subset, batch_size=64, shuffle=True, num_workers=8
    )
    valloader = torch.utils.data.DataLoader(
        val_subset, batch_size=64, shuffle=True, num_workers=8
    )

    for epoch in range(10):
        running_loss = 0.0
        epoch_steps = 0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            # mixup
            if config['mixup'] == True:
              x, y_a, y_b, lam = mixup_data(inputs, labels)
              x, y_a, y_b = map(torch.autograd.Variable, (x, y_a, y_b))

              scores = net(x)

              loss = lam * F.cross_entropy(scores, y_a) + (1 - lam) * F.cross_entropy(scores, y_b)
            else:
              outputs = net(inputs)
              loss = criterion(outputs, labels)

            optimizer.zero_grad()


            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            epoch_steps += 1
            if i % 2000 == 1999:
                print(
                    f"[{epoch + 1}, {i + 1:>5} loss: {running_loss / epoch_steps:.3f}]"
                )
                running_loss = 0.0

        val_loss = 0.0
        val_steps = 0
        total = 0
        correct = 0
        for i, data in enumerate(valloader, 0):
            with torch.no_grad():
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = net(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

                loss = criterion(outputs, labels)
                val_loss += loss.cpu().numpy()
                val_steps += 1

        os.makedirs("my_model", exist_ok=True)
        torch.save((net.state_dict(), optimizer.state_dict()), "my_model/checkpoint.pt")
        checkpoint = Checkpoint.from_directory("my_model")
        session.report(
            {"loss": (val_loss / val_steps), "accuracy": correct / total},
            checkpoint=checkpoint,
        )
    print("Finished Training")


## Test set accuracy


In [8]:
def test_best_model(best_result):
    best_trained_model = Net(
     )  # Will need to modify the parameters depending on what you are experimenting with

    # Do not modify any code below!
    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    best_trained_model.to(device)

    checkpoint_path = os.path.join(
        best_result.checkpoint.to_directory(), "checkpoint.pt"
    )

    model_state, optimizer_state = torch.load(checkpoint_path)
    best_trained_model.load_state_dict(model_state)

    trainset, testset = load_data(config)

    testloader = torch.utils.data.DataLoader(
        testset, batch_size=4, shuffle=False, num_workers=2
    )

    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = best_trained_model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f"Best trial test set accuracy: {correct / total}")


## Configuring the search space


In [9]:
# You will be experimenting with the hyperparameters here
# Use tune.grid_search to specify what values you want to experiment with a hyperparameter
config = {"add_aug": tune.grid_search([True, False]), 
          "increase_data": tune.grid_search([True, False]),
          'mixup': tune.grid_search([True, False])
          }



In [10]:
# Do not modify any code below!
def main(config):
    tuner = tune.Tuner(
        tune.with_resources(
            tune.with_parameters(train_cifar),
            resources={"gpu": 1},
        ),
        param_space=config,
    )
    results = tuner.fit()

    best_result = results.get_best_result("accuracy", "max")

    print(f"Best trial config: {best_result.config}")
    print(f"Best trial final validation loss: {best_result.metrics['loss']}")
    print(f"Best trial final validation accuracy: {best_result.metrics['accuracy']}")

    test_best_model(best_result)

    return results.get_dataframe("accuracy", "max")


results_df = main(config)


2023-04-13 12:22:42,737	INFO worker.py:1553 -- Started a local Ray instance.


0,1
Current time:,2023-04-13 12:59:35
Running for:,00:36:50.84
Memory:,4.7/12.7 GiB

Trial name,status,loc,add_aug,increase_data,mixup,iter,total time (s),loss,accuracy
train_cifar_e4c7c_00000,TERMINATED,172.28.0.12:934,True,True,True,10,449.899,0.853711,0.73855
train_cifar_e4c7c_00001,TERMINATED,172.28.0.12:934,False,True,True,10,207.919,0.749648,0.7833
train_cifar_e4c7c_00002,TERMINATED,172.28.0.12:934,True,False,True,10,267.042,1.12405,0.6318
train_cifar_e4c7c_00003,TERMINATED,172.28.0.12:934,False,False,True,10,194.53,0.785627,0.7725
train_cifar_e4c7c_00004,TERMINATED,172.28.0.12:934,True,True,False,10,427.006,0.736337,0.74665
train_cifar_e4c7c_00005,TERMINATED,172.28.0.12:934,False,True,False,10,196.125,0.896247,0.7485
train_cifar_e4c7c_00006,TERMINATED,172.28.0.12:934,True,False,False,10,261.965,1.06041,0.6362
train_cifar_e4c7c_00007,TERMINATED,172.28.0.12:934,False,False,False,10,194.417,0.879734,0.7549


[2m[36m(train_cifar pid=934)[0m Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /root/ray_results/train_cifar_2023-04-13_12-22-41/train_cifar_e4c7c_00000_0_add_aug=True,increase_data=True,mixup=True_2023-04-13_12-22-45/data/cifar-10-python.tar.gz


[2m[36m(train_cifar pid=934)[0m   0%|          | 0/170498071 [00:00<?, ?it/s]
  0%|          | 458752/170498071 [00:00<00:40, 4178255.92it/s]
  4%|▍         | 7340032/170498071 [00:00<00:04, 40689917.74it/s]
 11%|█         | 18546688/170498071 [00:00<00:02, 72554597.21it/s]
 17%|█▋        | 29687808/170498071 [00:00<00:01, 87600880.50it/s]
 24%|██▍       | 40927232/170498071 [00:00<00:01, 96416167.07it/s]
 30%|███       | 51937280/170498071 [00:00<00:01, 101037350.29it/s]
 37%|███▋      | 63176704/170498071 [00:00<00:01, 104698780.47it/s]
 44%|████▎     | 74350592/170498071 [00:00<00:00, 106856711.08it/s]
 50%|████▉     | 85065728/170498071 [00:00<00:00, 96383110.89it/s] 
 57%|█████▋    | 96468992/170498071 [00:01<00:00, 101394194.10it/s]
 63%|██████▎   | 107675648/170498071 [00:01<00:00, 104435629.90it/s]
 70%|██████▉   | 118816768/170498071 [00:01<00:00, 106475657.26it/s]
 76%|███████▋  | 130056192/170498071 [00:01<00:00, 108199692.05it/s]
 83%|████████▎ | 140967936/170498071 [00

[2m[36m(train_cifar pid=934)[0m Extracting /root/ray_results/train_cifar_2023-04-13_12-22-41/train_cifar_e4c7c_00000_0_add_aug=True,increase_data=True,mixup=True_2023-04-13_12-22-45/data/cifar-10-python.tar.gz to /root/ray_results/train_cifar_2023-04-13_12-22-41/train_cifar_e4c7c_00000_0_add_aug=True,increase_data=True,mixup=True_2023-04-13_12-22-45/data
[2m[36m(train_cifar pid=934)[0m Files already downloaded and verified
[2m[36m(train_cifar pid=934)[0m Files already downloaded and verified




Trial name,accuracy,date,done,episodes_total,experiment_id,experiment_tag,hostname,iterations_since_restore,loss,node_ip,pid,should_checkpoint,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
train_cifar_e4c7c_00000,0.73855,2023-04-13_12-30-26,True,,f65f13551bed437e9bd329330ca221e9,"0_add_aug=True,increase_data=True,mixup=True",fa277bb50508,10,0.853711,172.28.0.12,934,True,449.899,43.4501,449.899,1681389026,0,,10,e4c7c_00000,0.00574541
train_cifar_e4c7c_00001,0.7833,2023-04-13_12-33-54,True,,f65f13551bed437e9bd329330ca221e9,"1_add_aug=False,increase_data=True,mixup=True",fa277bb50508,10,0.749648,172.28.0.12,934,True,207.919,19.6706,207.919,1681389234,0,,10,e4c7c_00001,0.00574541
train_cifar_e4c7c_00002,0.6318,2023-04-13_12-38-21,True,,f65f13551bed437e9bd329330ca221e9,"2_add_aug=True,increase_data=False,mixup=True",fa277bb50508,10,1.12405,172.28.0.12,934,True,267.042,24.5928,267.042,1681389501,0,,10,e4c7c_00002,0.00574541
train_cifar_e4c7c_00003,0.7725,2023-04-13_12-41-36,True,,f65f13551bed437e9bd329330ca221e9,"3_add_aug=False,increase_data=False,mixup=True",fa277bb50508,10,0.785627,172.28.0.12,934,True,194.53,18.493,194.53,1681389696,0,,10,e4c7c_00003,0.00574541
train_cifar_e4c7c_00004,0.74665,2023-04-13_12-48-43,True,,f65f13551bed437e9bd329330ca221e9,"4_add_aug=True,increase_data=True,mixup=False",fa277bb50508,10,0.736337,172.28.0.12,934,True,427.006,40.9122,427.006,1681390123,0,,10,e4c7c_00004,0.00574541
train_cifar_e4c7c_00005,0.7485,2023-04-13_12-51-59,True,,f65f13551bed437e9bd329330ca221e9,"5_add_aug=False,increase_data=True,mixup=False",fa277bb50508,10,0.896247,172.28.0.12,934,True,196.125,20.0066,196.125,1681390319,0,,10,e4c7c_00005,0.00574541
train_cifar_e4c7c_00006,0.6362,2023-04-13_12-56-21,True,,f65f13551bed437e9bd329330ca221e9,"6_add_aug=True,increase_data=False,mixup=False",fa277bb50508,10,1.06041,172.28.0.12,934,True,261.965,26.0837,261.965,1681390581,0,,10,e4c7c_00006,0.00574541
train_cifar_e4c7c_00007,0.7549,2023-04-13_12-59-35,True,,f65f13551bed437e9bd329330ca221e9,"7_add_aug=False,increase_data=False,mixup=False",fa277bb50508,10,0.879734,172.28.0.12,934,True,194.417,18.4017,194.417,1681390775,0,,10,e4c7c_00007,0.00574541


[2m[36m(train_cifar pid=934)[0m Finished Training
[2m[36m(train_cifar pid=934)[0m Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /root/ray_results/train_cifar_2023-04-13_12-22-41/train_cifar_e4c7c_00001_1_add_aug=False,increase_data=True,mixup=True_2023-04-13_12-30-26/data/cifar-10-python.tar.gz


[2m[36m(train_cifar pid=934)[0m   0%|          | 0/170498071 [00:00<?, ?it/s]
  0%|          | 327680/170498071 [00:00<00:53, 3194605.42it/s]
  1%|          | 1507328/170498071 [00:00<00:20, 8116929.94it/s]
  2%|▏         | 2719744/170498071 [00:00<00:16, 9906061.53it/s]
  2%|▏         | 3964928/170498071 [00:00<00:15, 10750342.22it/s]
  3%|▎         | 5177344/170498071 [00:00<00:14, 11222828.21it/s]
  4%|▎         | 6389760/170498071 [00:00<00:14, 11519678.14it/s]
  4%|▍         | 7667712/170498071 [00:00<00:13, 11778108.14it/s]
  5%|▌         | 8847360/170498071 [00:00<00:13, 11605990.43it/s]
  6%|▌         | 10027008/170498071 [00:00<00:13, 11593951.21it/s]
  7%|▋         | 11304960/170498071 [00:01<00:13, 11901353.01it/s]
  7%|▋         | 12746752/170498071 [00:01<00:12, 12568812.99it/s]
  8%|▊         | 14057472/170498071 [00:01<00:12, 12711500.31it/s]
  9%|▉         | 15400960/170498071 [00:01<00:12, 12763626.97it/s]
 10%|▉         | 16809984/170498071 [00:01<00:11, 13130628.

[2m[36m(train_cifar pid=934)[0m Extracting /root/ray_results/train_cifar_2023-04-13_12-22-41/train_cifar_e4c7c_00001_1_add_aug=False,increase_data=True,mixup=True_2023-04-13_12-30-26/data/cifar-10-python.tar.gz to /root/ray_results/train_cifar_2023-04-13_12-22-41/train_cifar_e4c7c_00001_1_add_aug=False,increase_data=True,mixup=True_2023-04-13_12-30-26/data
[2m[36m(train_cifar pid=934)[0m Files already downloaded and verified
[2m[36m(train_cifar pid=934)[0m Files already downloaded and verified
[2m[36m(train_cifar pid=934)[0m Finished Training
[2m[36m(train_cifar pid=934)[0m Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /root/ray_results/train_cifar_2023-04-13_12-22-41/train_cifar_e4c7c_00002_2_add_aug=True,increase_data=False,mixup=True_2023-04-13_12-33-54/data/cifar-10-python.tar.gz


[2m[36m(train_cifar pid=934)[0m   0%|          | 0/170498071 [00:00<?, ?it/s]
  0%|          | 458752/170498071 [00:00<00:41, 4074869.76it/s]
  3%|▎         | 5144576/170498071 [00:00<00:05, 27967871.66it/s]
  7%|▋         | 11108352/170498071 [00:00<00:03, 41869247.93it/s]
 10%|█         | 17891328/170498071 [00:00<00:02, 51902171.94it/s]
 15%|█▍        | 24772608/170498071 [00:00<00:02, 57917711.96it/s]
 18%|█▊        | 30998528/170498071 [00:00<00:02, 59007607.55it/s]
 22%|██▏       | 38141952/170498071 [00:00<00:02, 62932158.41it/s]
 27%|██▋       | 45219840/170498071 [00:00<00:01, 65392605.83it/s]
 31%|███       | 53018624/170498071 [00:00<00:01, 69292577.23it/s]
 35%|███▌      | 60424192/170498071 [00:01<00:01, 70746971.97it/s]
 40%|███▉      | 67862528/170498071 [00:01<00:01, 71789778.06it/s]
 44%|████▍     | 75104256/170498071 [00:01<00:01, 71915156.79it/s]
 48%|████▊     | 82345984/170498071 [00:01<00:01, 72000822.89it/s]
 53%|█████▎    | 90210304/170498071 [00:01<00:01, 7

[2m[36m(train_cifar pid=934)[0m Extracting /root/ray_results/train_cifar_2023-04-13_12-22-41/train_cifar_e4c7c_00002_2_add_aug=True,increase_data=False,mixup=True_2023-04-13_12-33-54/data/cifar-10-python.tar.gz to /root/ray_results/train_cifar_2023-04-13_12-22-41/train_cifar_e4c7c_00002_2_add_aug=True,increase_data=False,mixup=True_2023-04-13_12-33-54/data
[2m[36m(train_cifar pid=934)[0m Files already downloaded and verified
[2m[36m(train_cifar pid=934)[0m Files already downloaded and verified
[2m[36m(train_cifar pid=934)[0m Finished Training
[2m[36m(train_cifar pid=934)[0m Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /root/ray_results/train_cifar_2023-04-13_12-22-41/train_cifar_e4c7c_00003_3_add_aug=False,increase_data=False,mixup=True_2023-04-13_12-38-21/data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]
  0%|          | 458752/170498071 [00:00<00:41, 4115053.13it/s]
  3%|▎         | 4849664/170498071 [00:00<00:06, 26227558.34it/s]
  8%|▊         | 13598720/170498071 [00:00<00:04, 37393604.27it/s]
 10%|█         | 17825792/170498071 [00:00<00:03, 39082024.97it/s]
 13%|█▎        | 22118400/170498071 [00:00<00:03, 40203690.64it/s]
 15%|█▌        | 26312704/170498071 [00:00<00:03, 40635191.17it/s]
 18%|█▊        | 30605312/170498071 [00:00<00:03, 41300191.91it/s]
 20%|██        | 34766848/170498071 [00:00<00:03, 41394186.36it/s]
 23%|██▎       | 38928384/170498071 [00:01<00:03, 41319977.03it/s]
 25%|██▌       | 43220992/170498071 [00:01<00:03, 41752783.02it/s]
 28%|██▊       | 47415296/170498071 [00:01<00:02, 41493821.38it/s]
 30%|███       | 51576832/170498071 [00:01<00:03, 39180781.32it/s]
 33%|███▎      | 56295424/170498071 [00:01<00:02, 39729692.94it/s]
 36%|███▌      | 61112320/170498071 [00:01<00:02, 42081634.20it/s]
 38%|███▊      | 653

[2m[36m(train_cifar pid=934)[0m Extracting /root/ray_results/train_cifar_2023-04-13_12-22-41/train_cifar_e4c7c_00003_3_add_aug=False,increase_data=False,mixup=True_2023-04-13_12-38-21/data/cifar-10-python.tar.gz to /root/ray_results/train_cifar_2023-04-13_12-22-41/train_cifar_e4c7c_00003_3_add_aug=False,increase_data=False,mixup=True_2023-04-13_12-38-21/data
[2m[36m(train_cifar pid=934)[0m Files already downloaded and verified
[2m[36m(train_cifar pid=934)[0m Files already downloaded and verified
[2m[36m(train_cifar pid=934)[0m Finished Training
[2m[36m(train_cifar pid=934)[0m Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /root/ray_results/train_cifar_2023-04-13_12-22-41/train_cifar_e4c7c_00004_4_add_aug=True,increase_data=True,mixup=False_2023-04-13_12-41-36/data/cifar-10-python.tar.gz


[2m[36m(train_cifar pid=934)[0m   0%|          | 0/170498071 [00:00<?, ?it/s]
  0%|          | 458752/170498071 [00:00<00:41, 4140199.61it/s]
  4%|▍         | 7372800/170498071 [00:00<00:04, 40716211.06it/s]
 11%|█         | 18808832/170498071 [00:00<00:02, 73450018.34it/s]
 17%|█▋        | 29032448/170498071 [00:00<00:01, 83811031.53it/s]
 23%|██▎       | 39190528/170498071 [00:00<00:01, 90094729.16it/s]
 29%|██▊       | 48824320/170498071 [00:00<00:01, 92128717.01it/s]
 35%|███▌      | 59801600/170498071 [00:00<00:01, 97810629.92it/s]
 41%|████      | 69632000/170498071 [00:00<00:01, 96475251.71it/s]
 47%|████▋     | 79888384/170498071 [00:00<00:00, 98286622.78it/s]
 53%|█████▎    | 90636288/170498071 [00:01<00:00, 101043868.36it/s]
 59%|█████▉    | 100761600/170498071 [00:01<00:00, 97594045.56it/s]
 65%|██████▌   | 111083520/170498071 [00:01<00:00, 99174897.76it/s]
 71%|███████   | 121307136/170498071 [00:01<00:00, 99905261.23it/s]
 77%|███████▋  | 131334144/170498071 [00:01<00:

[2m[36m(train_cifar pid=934)[0m Extracting /root/ray_results/train_cifar_2023-04-13_12-22-41/train_cifar_e4c7c_00004_4_add_aug=True,increase_data=True,mixup=False_2023-04-13_12-41-36/data/cifar-10-python.tar.gz to /root/ray_results/train_cifar_2023-04-13_12-22-41/train_cifar_e4c7c_00004_4_add_aug=True,increase_data=True,mixup=False_2023-04-13_12-41-36/data
[2m[36m(train_cifar pid=934)[0m Files already downloaded and verified
[2m[36m(train_cifar pid=934)[0m Files already downloaded and verified
[2m[36m(train_cifar pid=934)[0m Finished Training
[2m[36m(train_cifar pid=934)[0m Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /root/ray_results/train_cifar_2023-04-13_12-22-41/train_cifar_e4c7c_00005_5_add_aug=False,increase_data=True,mixup=False_2023-04-13_12-48-43/data/cifar-10-python.tar.gz


[2m[36m(train_cifar pid=934)[0m   0%|          | 0/170498071 [00:00<?, ?it/s]
  0%|          | 294912/170498071 [00:00<00:58, 2887959.46it/s]
  1%|▏         | 2424832/170498071 [00:00<00:12, 13492586.93it/s]
  3%|▎         | 5013504/170498071 [00:00<00:08, 18905482.66it/s]
  4%|▍         | 7307264/170498071 [00:00<00:07, 20469339.34it/s]
  6%|▌         | 10223616/170498071 [00:00<00:06, 23507558.77it/s]
  8%|▊         | 13008896/170498071 [00:00<00:06, 24866572.08it/s]
 10%|▉         | 16252928/170498071 [00:00<00:05, 27278462.03it/s]
 11%|█▏        | 19431424/170498071 [00:00<00:05, 28408866.65it/s]
 13%|█▎        | 22511616/170498071 [00:00<00:05, 29015925.21it/s]
 15%|█▌        | 25788416/170498071 [00:01<00:04, 30150766.18it/s]
 17%|█▋        | 28835840/170498071 [00:01<00:04, 29949981.38it/s]
 19%|█▊        | 31850496/170498071 [00:01<00:04, 28173047.20it/s]
 20%|██        | 34701312/170498071 [00:01<00:04, 28236623.74it/s]
 22%|██▏       | 38207488/170498071 [00:01<00:04, 301

[2m[36m(train_cifar pid=934)[0m Extracting /root/ray_results/train_cifar_2023-04-13_12-22-41/train_cifar_e4c7c_00005_5_add_aug=False,increase_data=True,mixup=False_2023-04-13_12-48-43/data/cifar-10-python.tar.gz to /root/ray_results/train_cifar_2023-04-13_12-22-41/train_cifar_e4c7c_00005_5_add_aug=False,increase_data=True,mixup=False_2023-04-13_12-48-43/data
[2m[36m(train_cifar pid=934)[0m Files already downloaded and verified
[2m[36m(train_cifar pid=934)[0m Files already downloaded and verified
[2m[36m(train_cifar pid=934)[0m Finished Training
[2m[36m(train_cifar pid=934)[0m Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /root/ray_results/train_cifar_2023-04-13_12-22-41/train_cifar_e4c7c_00006_6_add_aug=True,increase_data=False,mixup=False_2023-04-13_12-51-59/data/cifar-10-python.tar.gz


[2m[36m(train_cifar pid=934)[0m   0%|          | 0/170498071 [00:00<?, ?it/s]
  0%|          | 458752/170498071 [00:00<00:40, 4163311.85it/s]
  3%|▎         | 5537792/170498071 [00:00<00:05, 30368895.96it/s]
 15%|█▍        | 24870912/170498071 [00:00<00:01, 74855432.95it/s]
 20%|█▉        | 33521664/170498071 [00:00<00:01, 78981884.47it/s]
 26%|██▌       | 44728320/170498071 [00:00<00:01, 90122592.79it/s]
 32%|███▏      | 54296576/170498071 [00:00<00:01, 91796328.53it/s]
 37%|███▋      | 63635456/170498071 [00:00<00:01, 91995616.49it/s]
 43%|████▎     | 73302016/170498071 [00:00<00:01, 93405995.27it/s]
 49%|████▉     | 83820544/170498071 [00:01<00:00, 97018212.53it/s]
 56%|█████▌    | 95715328/170498071 [00:01<00:00, 103680510.26it/s]
 62%|██████▏   | 106102784/170498071 [00:01<00:00, 95513442.50it/s]
 69%|██████▊   | 117080064/170498071 [00:01<00:00, 99462553.72it/s]
 75%|███████▌  | 128057344/170498071 [00:01<00:00, 102345556.97it/s]
 82%|████████▏ | 139198464/170498071 [00:01<00

[2m[36m(train_cifar pid=934)[0m Extracting /root/ray_results/train_cifar_2023-04-13_12-22-41/train_cifar_e4c7c_00006_6_add_aug=True,increase_data=False,mixup=False_2023-04-13_12-51-59/data/cifar-10-python.tar.gz to /root/ray_results/train_cifar_2023-04-13_12-22-41/train_cifar_e4c7c_00006_6_add_aug=True,increase_data=False,mixup=False_2023-04-13_12-51-59/data
[2m[36m(train_cifar pid=934)[0m Files already downloaded and verified
[2m[36m(train_cifar pid=934)[0m Files already downloaded and verified
[2m[36m(train_cifar pid=934)[0m Finished Training
[2m[36m(train_cifar pid=934)[0m Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /root/ray_results/train_cifar_2023-04-13_12-22-41/train_cifar_e4c7c_00007_7_add_aug=False,increase_data=False,mixup=False_2023-04-13_12-56-21/data/cifar-10-python.tar.gz


[2m[36m(train_cifar pid=934)[0m   0%|          | 0/170498071 [00:00<?, ?it/s]
  0%|          | 458752/170498071 [00:00<00:40, 4150756.31it/s]
  3%|▎         | 5210112/170498071 [00:00<00:05, 28502559.13it/s]
  7%|▋         | 11436032/170498071 [00:00<00:03, 43377635.20it/s]
 10%|█         | 17367040/170498071 [00:00<00:03, 49382648.48it/s]
 14%|█▎        | 23134208/170498071 [00:00<00:02, 52120370.00it/s]
 17%|█▋        | 28409856/170498071 [00:00<00:03, 46727768.59it/s]
 19%|█▉        | 33226752/170498071 [00:00<00:03, 42966612.63it/s]
 22%|██▏       | 37650432/170498071 [00:00<00:03, 40786569.91it/s]
 25%|██▍       | 41811968/170498071 [00:01<00:03, 39506581.10it/s]
 27%|██▋       | 45842432/170498071 [00:01<00:03, 38443295.08it/s]
 29%|██▉       | 49741824/170498071 [00:01<00:03, 37769313.91it/s]
 31%|███▏      | 53542912/170498071 [00:01<00:03, 36845221.17it/s]
 34%|███▎      | 57311232/170498071 [00:01<00:03, 37009280.24it/s]
 36%|███▌      | 61046784/170498071 [00:01<00:02, 3

[2m[36m(train_cifar pid=934)[0m Extracting /root/ray_results/train_cifar_2023-04-13_12-22-41/train_cifar_e4c7c_00007_7_add_aug=False,increase_data=False,mixup=False_2023-04-13_12-56-21/data/cifar-10-python.tar.gz to /root/ray_results/train_cifar_2023-04-13_12-22-41/train_cifar_e4c7c_00007_7_add_aug=False,increase_data=False,mixup=False_2023-04-13_12-56-21/data
[2m[36m(train_cifar pid=934)[0m Files already downloaded and verified
[2m[36m(train_cifar pid=934)[0m Files already downloaded and verified


2023-04-13 12:59:35,858	INFO tune.py:798 -- Total run time: 2211.22 seconds (2210.83 seconds for the tuning loop).


[2m[36m(train_cifar pid=934)[0m Finished Training
Best trial config: {'add_aug': False, 'increase_data': True, 'mixup': True}
Best trial final validation loss: 0.7496478553790196
Best trial final validation accuracy: 0.7833
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:03<00:00, 49370551.74it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
Files already downloaded and verified
Best trial test set accuracy: 0.7619


In [11]:
final_df = results_df.drop(
    columns=[
        "should_checkpoint",
        "timesteps_total",
        "episodes_total",
        "training_iteration",
        "trial_id",
        "experiment_id",
        "date",
        "timestamp",
        "pid",
        "hostname",
        "node_ip",
        "time_since_restore",
        "timesteps_since_restore",
        "iterations_since_restore",
        "warmup_time",
        "logdir",
    ]
)
final_df.sort_values(by=['accuracy'], ascending=False).head(10)

Unnamed: 0,loss,accuracy,time_this_iter_s,done,time_total_s,config/add_aug,config/increase_data,config/mixup
1,0.749648,0.7833,19.670618,False,207.918568,False,True,True
3,0.76632,0.7801,18.949716,False,176.037481,False,False,True
7,0.862819,0.7627,18.696309,False,176.015229,False,False,False
5,0.728118,0.7555,17.991747,False,100.912571,False,True,False
4,0.716579,0.75385,41.54143,False,342.383889,True,True,False
0,0.868907,0.7433,42.727987,False,406.448656,True,True,True
6,1.014857,0.6486,23.890211,False,235.881642,True,False,False
2,1.124053,0.6318,24.592758,False,267.042112,True,False,True


In [12]:
final_df.sort_values(by=['accuracy']).head(10)

Unnamed: 0,loss,accuracy,time_this_iter_s,done,time_total_s,config/add_aug,config/increase_data,config/mixup
2,1.124053,0.6318,24.592758,False,267.042112,True,False,True
6,1.014857,0.6486,23.890211,False,235.881642,True,False,False
0,0.868907,0.7433,42.727987,False,406.448656,True,True,True
4,0.716579,0.75385,41.54143,False,342.383889,True,True,False
5,0.728118,0.7555,17.991747,False,100.912571,False,True,False
7,0.862819,0.7627,18.696309,False,176.015229,False,False,False
3,0.76632,0.7801,18.949716,False,176.037481,False,False,True
1,0.749648,0.7833,19.670618,False,207.918568,False,True,True
