## Install Libraries


In [1]:
%pip install "ray[tune]"
# %pip install torch torchvision torchaudio # Uncomment this to install PyTorch 2.0+ on ilab (required version to run this program)

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


## Import Libraries


In [2]:
from filelock import FileLock
from ray import tune
from ray.air import session
from ray.air.checkpoint import Checkpoint
from torch.utils.data import random_split
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms


## Data loaders


In [3]:
def load_data(data_dir="./data"):
    transform = transforms.Compose(
        [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
    )

    with FileLock(os.path.expanduser("~/.data.lock")):
        trainset = torchvision.datasets.CIFAR10(
            root=data_dir, train=True, download=True, transform=transform
        )

        testset = torchvision.datasets.CIFAR10(
            root=data_dir, train=False, download=True, transform=transform
        )

    return trainset, testset


## Configurable neural network


In [4]:
def flatten(x, start_dim=1, end_dim=-1):
    return x.flatten(start_dim=start_dim, end_dim=end_dim)


In [5]:
# Will need to modify the parameters and the network depending on what you are experimenting with
class Net(nn.Module):
    def __init__(self, kernel_size):
        super(Net, self).__init__()
        self.conv = nn.Conv2d(3, 3, kernel_size, padding="same")
        self.fc = nn.Linear(3 * 32 * 32, 10)

    def forward(self, x):
        x = F.relu(self.conv(x))
        x = flatten(x)
        x = self.fc(x)
        return x


## The train function


In [6]:
def train_cifar(config):
    net = Net(
        config["kernel_size"]
    )  # Will need to modify the parameters depending on what you are experimenting with

    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if torch.cuda.device_count() > 1:
            net = nn.DataParallel(net)
    net.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=1e-3)

    # Do not modify any code below!
    loaded_checkpoint = session.get_checkpoint()
    if loaded_checkpoint:
        with loaded_checkpoint.as_directory() as loaded_checkpoint_dir:
            model_state, optimizer_state = torch.load(
                os.path.join(loaded_checkpoint_dir, "checkpoint.pt")
            )
            net.load_state_dict(model_state)
            optimizer.load_state_dict(optimizer_state)

    data_dir = os.path.abspath("./data")
    trainset, testset = load_data(data_dir)

    test_abs = int(len(trainset) * 0.8)
    train_subset, val_subset = random_split(
        trainset, [test_abs, len(trainset) - test_abs]
    )

    trainloader = torch.utils.data.DataLoader(
        train_subset, batch_size=64, shuffle=True, num_workers=8
    )
    valloader = torch.utils.data.DataLoader(
        val_subset, batch_size=64, shuffle=True, num_workers=8
    )

    for epoch in range(10):
        running_loss = 0.0
        epoch_steps = 0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            epoch_steps += 1
            if i % 2000 == 1999:
                print(
                    f"[{epoch + 1}, {i + 1:>5} loss: {running_loss / epoch_steps:.3f}]"
                )
                running_loss = 0.0

        val_loss = 0.0
        val_steps = 0
        total = 0
        correct = 0
        for i, data in enumerate(valloader, 0):
            with torch.no_grad():
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = net(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

                loss = criterion(outputs, labels)
                val_loss += loss.cpu().numpy()
                val_steps += 1

        os.makedirs("my_model", exist_ok=True)
        torch.save((net.state_dict(), optimizer.state_dict()), "my_model/checkpoint.pt")
        checkpoint = Checkpoint.from_directory("my_model")
        session.report(
            {"loss": (val_loss / val_steps), "accuracy": correct / total},
            checkpoint=checkpoint,
        )
    print("Finished Training")


## Test set accuracy


In [7]:
def test_best_model(best_result):
    best_trained_model = Net(
        best_result.config["kernel_size"]
    )  # Will need to modify the parameters depending on what you are experimenting with

    # Do not modify any code below!
    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    best_trained_model.to(device)

    checkpoint_path = os.path.join(
        best_result.checkpoint.to_directory(), "checkpoint.pt"
    )

    model_state, optimizer_state = torch.load(checkpoint_path)
    best_trained_model.load_state_dict(model_state)

    trainset, testset = load_data()

    testloader = torch.utils.data.DataLoader(
        testset, batch_size=4, shuffle=False, num_workers=2
    )

    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = best_trained_model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f"Best trial test set accuracy: {correct / total}")


## Configuring the search space


In [8]:
# You will be experimenting with the hyperparameters here
# Use tune.grid_search to specify what values you want to experiment with a hyperparameter
config = {"kernel_size": tune.grid_search([1, 2, 3, 5, 10, 15, 32])}


In [9]:
# Do not modify any code below!
def main(config):
    tuner = tune.Tuner(
        tune.with_resources(
            tune.with_parameters(train_cifar),
            resources={"cpu": os.cpu_count(), "gpu": 0},
        ),
        param_space=config,
    )
    results = tuner.fit()

    best_result = results.get_best_result("accuracy", "max")

    print(f"Best trial config: {best_result.config}")
    print(f"Best trial final validation loss: {best_result.metrics['loss']}")
    print(f"Best trial final validation accuracy: {best_result.metrics['accuracy']}")

    test_best_model(best_result)

    return results.get_dataframe("accuracy", "max")


results_df = main(config)


2023-04-03 13:06:33,742	INFO worker.py:1553 -- Started a local Ray instance.


0,1
Current time:,2023-04-03 14:10:08
Running for:,01:03:32.10
Memory:,12.7/287.9 GiB

Trial name,status,loc,kernel_size,iter,total time (s),loss,accuracy
train_cifar_e3fb8_00000,TERMINATED,128.6.4.123:4163727,1,10,38.2241,1.97179,0.3192
train_cifar_e3fb8_00001,TERMINATED,128.6.4.123:4163727,2,10,37.8177,1.78083,0.3833
train_cifar_e3fb8_00002,TERMINATED,128.6.4.123:4163727,3,10,40.3099,1.77341,0.3864
train_cifar_e3fb8_00003,TERMINATED,128.6.4.123:4163727,5,10,53.7996,1.77219,0.3815
train_cifar_e3fb8_00004,TERMINATED,128.6.4.123:4163727,10,10,156.818,1.68718,0.4139
train_cifar_e3fb8_00005,TERMINATED,128.6.4.123:4163727,15,10,642.429,1.63899,0.4228
train_cifar_e3fb8_00006,TERMINATED,128.6.4.123:4163727,32,10,2835.44,1.62559,0.4328


[2m[36m(train_cifar pid=4163727)[0m Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /common/home/vig4/ray_results/train_cifar_2023-04-03_13-06-30/train_cifar_e3fb8_00000_0_kernel_size=1_2023-04-03_13-06-36/data/cifar-10-python.tar.gz


0it [00:00, ?it/s]far pid=4163727)[0m 
  0%|          | 0/170498071 [00:00<?, ?it/s]
  1%|          | 868352/170498071 [00:00<00:21, 7820584.58it/s]
  6%|▌         | 10649600/170498071 [00:00<00:02, 58452636.62it/s]
 13%|█▎        | 22159360/170498071 [00:00<00:01, 83690851.43it/s]
 20%|█▉        | 33841152/170498071 [00:00<00:01, 96568109.29it/s]
 27%|██▋       | 45481984/170498071 [00:00<00:01, 103612221.85it/s]
 33%|███▎      | 57057280/170498071 [00:00<00:01, 107684053.53it/s]
 40%|████      | 68681728/170498071 [00:00<00:00, 110462986.35it/s]
 47%|████▋     | 80240640/170498071 [00:00<00:00, 112039876.58it/s]
 54%|█████▍    | 91881472/170498071 [00:01<00:00, 113361086.74it/s]
 61%|██████    | 103464960/170498071 [00:01<00:00, 114100770.21it/s]
 67%|██████▋   | 115064832/170498071 [00:01<00:00, 114669098.74it/s]
 74%|███████▍  | 126664704/170498071 [00:01<00:00, 115067743.63it/s]
 81%|████████  | 138272768/170498071 [00:01<00:00, 115352391.39it/s]
 95%|█████████▍| 161423360/170498

[2m[36m(train_cifar pid=4163727)[0m Files already downloaded and verified


[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(

Trial name,accuracy,date,done,episodes_total,experiment_id,experiment_tag,hostname,iterations_since_restore,loss,node_ip,pid,should_checkpoint,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
train_cifar_e3fb8_00000,0.3192,2023-04-03_13-07-21,True,,07c13cc96e354f88a0156711233faa3d,0_kernel_size=1,jupyter.cs.rutgers.edu,10,1.97179,128.6.4.123,4163727,True,38.2241,3.21397,38.2241,1680541641,0,,10,e3fb8_00000,0.00789785
train_cifar_e3fb8_00001,0.3833,2023-04-03_13-07-59,True,,07c13cc96e354f88a0156711233faa3d,1_kernel_size=2,jupyter.cs.rutgers.edu,10,1.78083,128.6.4.123,4163727,True,37.8177,3.29976,37.8177,1680541679,0,,10,e3fb8_00001,0.00789785
train_cifar_e3fb8_00002,0.3864,2023-04-03_13-08-39,True,,07c13cc96e354f88a0156711233faa3d,2_kernel_size=3,jupyter.cs.rutgers.edu,10,1.77341,128.6.4.123,4163727,True,40.3099,3.69137,40.3099,1680541719,0,,10,e3fb8_00002,0.00789785
train_cifar_e3fb8_00003,0.3815,2023-04-03_13-09-33,True,,07c13cc96e354f88a0156711233faa3d,3_kernel_size=5,jupyter.cs.rutgers.edu,10,1.77219,128.6.4.123,4163727,True,53.7996,4.84924,53.7996,1680541773,0,,10,e3fb8_00003,0.00789785
train_cifar_e3fb8_00004,0.4139,2023-04-03_13-12-10,True,,07c13cc96e354f88a0156711233faa3d,4_kernel_size=10,jupyter.cs.rutgers.edu,10,1.68718,128.6.4.123,4163727,True,156.818,15.1751,156.818,1680541930,0,,10,e3fb8_00004,0.00789785
train_cifar_e3fb8_00005,0.4228,2023-04-03_13-22-52,True,,07c13cc96e354f88a0156711233faa3d,5_kernel_size=15,jupyter.cs.rutgers.edu,10,1.63899,128.6.4.123,4163727,True,642.429,59.2988,642.429,1680542572,0,,10,e3fb8_00005,0.00789785
train_cifar_e3fb8_00006,0.4328,2023-04-03_14-10-08,True,,07c13cc96e354f88a0156711233faa3d,6_kernel_size=32,jupyter.cs.rutgers.edu,10,1.62559,128.6.4.123,4163727,True,2835.44,279.448,2835.44,1680545408,0,,10,e3fb8_00006,0.00789785


[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(

[2m[36m(train_cifar pid=4163727)[0m Finished Training
[2m[36m(train_cifar pid=4163727)[0m Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /common/home/vig4/ray_results/train_cifar_2023-04-03_13-06-30/train_cifar_e3fb8_00001_1_kernel_size=2_2023-04-03_13-07-21/data/cifar-10-python.tar.gz


0it [00:00, ?it/s]far pid=4163727)[0m 
  0%|          | 40960/170498071 [00:00<07:37, 372281.69it/s]
  1%|▏         | 2359296/170498071 [00:00<00:12, 13257509.04it/s]
  8%|▊         | 13475840/170498071 [00:00<00:02, 56952665.82it/s]
 15%|█▍        | 25059328/170498071 [00:00<00:01, 79845928.25it/s]
 21%|██▏       | 36601856/170498071 [00:00<00:01, 92542176.87it/s]
 28%|██▊       | 48193536/170498071 [00:00<00:01, 100418470.43it/s]
 35%|███▌      | 59727872/170498071 [00:00<00:01, 105272210.50it/s]
 42%|████▏     | 71303168/170498071 [00:00<00:00, 108597875.75it/s]
 49%|████▊     | 82845696/170498071 [00:00<00:00, 110677024.94it/s]
 55%|█████▌    | 94380032/170498071 [00:01<00:00, 112087157.89it/s]
 62%|██████▏   | 105938944/170498071 [00:01<00:00, 113156510.09it/s]
 69%|██████▉   | 117555200/170498071 [00:01<00:00, 114065920.40it/s]
 76%|███████▌  | 128983040/170498071 [00:01<00:00, 113770289.19it/s]
 82%|████████▏ | 140574720/170498071 [00:01<00:00, 114410191.24it/s]
 89%|████████▉ 

[2m[36m(train_cifar pid=4163727)[0m Files already downloaded and verified


[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
170500096it [00:05, 30261411.54it/s]                                
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
170500096it [00:05, 30133526.16it/s]               

[2m[36m(train_cifar pid=4163727)[0m Finished Training
[2m[36m(train_cifar pid=4163727)[0m Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /common/home/vig4/ray_results/train_cifar_2023-04-03_13-06-30/train_cifar_e3fb8_00002_2_kernel_size=3_2023-04-03_13-07-59/data/cifar-10-python.tar.gz


0it [00:00, ?it/s]far pid=4163727)[0m 
  0%|          | 40960/170498071 [00:00<07:39, 371223.06it/s]
  1%|          | 2105344/170498071 [00:00<00:14, 11780349.46it/s]
  8%|▊         | 13131776/170498071 [00:00<00:02, 55664830.74it/s]
 14%|█▍        | 24576000/170498071 [00:00<00:01, 78517051.96it/s]
 21%|██        | 36036608/170498071 [00:00<00:01, 91394764.95it/s]
 27%|██▋       | 46661632/170498071 [00:00<00:01, 96388913.59it/s]
 34%|███▍      | 58089472/170498071 [00:00<00:01, 102180476.97it/s]
 40%|████      | 68755456/170498071 [00:00<00:00, 103594570.60it/s]
 47%|████▋     | 80093184/170498071 [00:00<00:00, 106637230.28it/s]
 53%|█████▎    | 90824704/170498071 [00:01<00:00, 106817648.83it/s]
 60%|█████▉    | 102252544/170498071 [00:01<00:00, 109087848.54it/s]
 67%|██████▋   | 113647616/170498071 [00:01<00:00, 110552537.83it/s]
 73%|███████▎  | 125100032/170498071 [00:01<00:00, 111731722.28it/s]
 80%|███████▉  | 136323072/170498071 [00:01<00:00, 111849516.10it/s]
 87%|████████▋ |

[2m[36m(train_cifar pid=4163727)[0m Files already downloaded and verified


[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
170500096it [00:06, 28041005.66it/s]                                
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
170500096it [00:06, 27904990.75it/s]               

[2m[36m(train_cifar pid=4163727)[0m Finished Training
[2m[36m(train_cifar pid=4163727)[0m Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /common/home/vig4/ray_results/train_cifar_2023-04-03_13-06-30/train_cifar_e3fb8_00003_3_kernel_size=5_2023-04-03_13-08-39/data/cifar-10-python.tar.gz


0it [00:00, ?it/s]far pid=4163727)[0m 
  0%|          | 40960/170498071 [00:00<07:31, 377870.21it/s]
  1%|▏         | 2252800/170498071 [00:00<00:13, 12737955.48it/s]
  8%|▊         | 13443072/170498071 [00:00<00:02, 57162918.09it/s]
 14%|█▍        | 24117248/170498071 [00:00<00:01, 76495152.59it/s]
 21%|██        | 35643392/170498071 [00:00<00:01, 90356868.04it/s]
 27%|██▋       | 46350336/170498071 [00:00<00:01, 96007048.34it/s]
 34%|███▍      | 57843712/170498071 [00:00<00:01, 102140265.50it/s]
 41%|████      | 69173248/170498071 [00:00<00:00, 105671406.27it/s]
 47%|████▋     | 80068608/170498071 [00:00<00:00, 106695063.28it/s]
 53%|█████▎    | 91078656/170498071 [00:01<00:00, 107740010.63it/s]
 60%|█████▉    | 101924864/170498071 [00:01<00:00, 107938167.92it/s]
 66%|██████▋   | 113016832/170498071 [00:01<00:00, 108834874.08it/s]
 73%|███████▎  | 123912192/170498071 [00:01<00:00, 108522892.70it/s]
 79%|███████▉  | 135045120/170498071 [00:01<00:00, 109344463.45it/s]
 86%|████████▌ |

[2m[36m(train_cifar pid=4163727)[0m Files already downloaded and verified


[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
170500096it [00:06, 28267572.03it/s]                                
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
170500096it [00:06, 28152013.19it/s]               

[2m[36m(train_cifar pid=4163727)[0m Finished Training
[2m[36m(train_cifar pid=4163727)[0m Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /common/home/vig4/ray_results/train_cifar_2023-04-03_13-06-30/train_cifar_e3fb8_00004_4_kernel_size=10_2023-04-03_13-09-33/data/cifar-10-python.tar.gz


0it [00:00, ?it/s]far pid=4163727)[0m 
  0%|          | 40960/170498071 [00:00<07:35, 374482.45it/s]
  1%|          | 1802240/170498071 [00:00<00:16, 10130103.20it/s]
  6%|▌         | 9445376/170498071 [00:00<00:04, 39688907.75it/s]
 10%|█         | 17104896/170498071 [00:00<00:02, 54051064.93it/s]
 13%|█▎        | 22790144/170498071 [00:00<00:02, 54969534.81it/s]
 17%|█▋        | 28704768/170498071 [00:00<00:02, 56285525.27it/s]
 21%|██        | 34979840/170498071 [00:00<00:02, 58379828.00it/s]
 24%|██▍       | 41000960/170498071 [00:00<00:02, 58948956.38it/s]
 28%|██▊       | 47620096/170498071 [00:00<00:02, 61201146.25it/s]
 32%|███▏      | 53755904/170498071 [00:01<00:01, 59234592.12it/s]
 35%|███▌      | 59703296/170498071 [00:01<00:01, 59032343.49it/s]
 42%|████▏     | 71712768/170498071 [00:01<00:01, 59495028.89it/s]
 46%|████▌     | 78028800/170498071 [00:01<00:01, 60358585.56it/s]
 50%|████▉     | 84500480/170498071 [00:01<00:01, 61658591.62it/s]
 53%|█████▎    | 90677248/170

[2m[36m(train_cifar pid=4163727)[0m Files already downloaded and verified


[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
170500096it [00:07, 23995057.53it/s]                               
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
170500096it [00:07, 23869440.02it/s]                               
170500096it [00:07, 23851817.09it/s]                               
[2m[36m(train_cifar pid=

[2m[36m(train_cifar pid=4163727)[0m Finished Training
[2m[36m(train_cifar pid=4163727)[0m Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /common/home/vig4/ray_results/train_cifar_2023-04-03_13-06-30/train_cifar_e3fb8_00005_5_kernel_size=15_2023-04-03_13-12-10/data/cifar-10-python.tar.gz


0it [00:00, ?it/s]far pid=4163727)[0m 
  0%|          | 40960/170498071 [00:00<08:05, 351055.92it/s]
  1%|          | 1056768/170498071 [00:00<00:45, 3710580.36it/s]
  1%|          | 2105344/170498071 [00:00<00:29, 5637745.51it/s]
  2%|▏         | 3153920/170498071 [00:00<00:25, 6677412.67it/s]
  2%|▏         | 4202496/170498071 [00:00<00:22, 7242765.13it/s]
  3%|▎         | 5251072/170498071 [00:00<00:21, 7576706.13it/s]
  4%|▎         | 6299648/170498071 [00:00<00:21, 7805597.29it/s]
  4%|▍         | 7348224/170498071 [00:01<00:20, 7975413.79it/s]
  5%|▍         | 8396800/170498071 [00:01<00:20, 8094680.46it/s]
  6%|▌         | 9445376/170498071 [00:01<00:19, 8176101.50it/s]
  6%|▌         | 10493952/170498071 [00:01<00:19, 8218776.97it/s]
  7%|▋         | 11542528/170498071 [00:01<00:19, 8158692.00it/s]
  7%|▋         | 12591104/170498071 [00:01<00:18, 8454904.69it/s]
  8%|▊         | 13639680/170498071 [00:01<00:18, 8659333.03it/s]
  9%|▊         | 14688256/170498071 [00:01<00:17,

[2m[36m(train_cifar pid=4163727)[0m Files already downloaded and verified


[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
170500096it [00:43, 3922277.96it/s][0m 
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
170500096it [00:43, 3920619.35it/s][0m 
170500096it [00:43, 3920593.28it/s][0m 


[2m[36m(train_cifar pid=4163727)[0m Finished Training
[2m[36m(train_cifar pid=4163727)[0m Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /common/home/vig4/ray_results/train_cifar_2023-04-03_13-06-30/train_cifar_e3fb8_00006_6_kernel_size=32_2023-04-03_13-22-52/data/cifar-10-python.tar.gz


0it [00:00, ?it/s]far pid=4163727)[0m 
  0%|          | 40960/170498071 [00:00<07:38, 371381.95it/s]
  1%|          | 1056768/170498071 [00:00<00:42, 3983666.78it/s]
  1%|          | 2105344/170498071 [00:00<00:27, 6074172.09it/s]
  2%|▏         | 3153920/170498071 [00:00<00:23, 7139230.69it/s]
  2%|▏         | 4202496/170498071 [00:00<00:21, 7877697.00it/s]
  3%|▎         | 5251072/170498071 [00:00<00:19, 8387652.07it/s]
  4%|▎         | 6299648/170498071 [00:00<00:18, 8711352.05it/s]
  4%|▍         | 7348224/170498071 [00:00<00:18, 8896429.35it/s]
  5%|▍         | 8396800/170498071 [00:01<00:17, 9139841.51it/s]
  6%|▌         | 9445376/170498071 [00:01<00:17, 9226160.13it/s]
  6%|▌         | 10493952/170498071 [00:01<00:16, 9536379.71it/s]
  7%|▋         | 12591104/170498071 [00:01<00:15, 10235714.20it/s]
  9%|▊         | 14688256/170498071 [00:01<00:14, 10441941.98it/s]
 10%|▉         | 16785408/170498071 [00:01<00:14, 10528633.70it/s]
 10%|█         | 17833984/170498071 [00:01<00:

[2m[36m(train_cifar pid=4163727)[0m Files already downloaded and verified


170500096it [00:42, 4057622.20it/s][0m 
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
[2m[36m(train_cifar pid=4163727)[0m   img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
170500096it [00:42, 4053315.07it/s][0m 
170500096it [00:42, 4053094.78it/s][0m 


[2m[36m(train_cifar pid=4163727)[0m Finished Training
Best trial config: {'kernel_size': 32}
Best trial final validation loss: 1.62558657880042
Best trial final validation accuracy: 0.4328
Files already downloaded and verified
Files already downloaded and verified


  img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
  img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
  return F.conv2d(input, weight, bias, self.stride,


Best trial test set accuracy: 0.436


In [10]:
results_df.drop(
    columns=[
        "should_checkpoint",
        "timesteps_total",
        "episodes_total",
        "training_iteration",
        "trial_id",
        "experiment_id",
        "date",
        "timestamp",
        "pid",
        "hostname",
        "node_ip",
        "time_since_restore",
        "timesteps_since_restore",
        "iterations_since_restore",
        "warmup_time",
        "logdir",
    ]
)


Unnamed: 0,loss,accuracy,time_this_iter_s,done,time_total_s,config/kernel_size
0,1.971795,0.3192,3.213966,False,38.224074,1
1,1.780833,0.3833,3.299755,False,37.817678,2
2,1.781364,0.3867,3.627258,False,36.618532,3
3,1.772187,0.3815,4.849235,False,53.799612,5
4,1.687177,0.4139,15.175148,False,156.8179,10
5,1.638986,0.4228,59.298784,False,642.428981,15
6,1.625587,0.4328,279.4482,False,2835.435086,32
