<a href="https://colab.research.google.com/github/amitpanwarIndia/DLOps/blob/main/M22AIE202_DLOps_ClassAssignment_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
!pip install ray



In [5]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
from torch.utils.tensorboard import SummaryWriter

In [6]:
train_set = torchvision.datasets.FashionMNIST(root="./data", train=True, download=True, transform=transforms.ToTensor())
test_set = torchvision.datasets.FashionMNIST(root="./data", train=False, download=True, transform=transforms.ToTensor())

In [7]:
model = torchvision.models.resnet18(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 10)

In [8]:
def train_model(config):
    train_loader = DataLoader(train_set, batch_size=config["batch_size"], shuffle=True)
    test_loader = DataLoader(test_set, batch_size=config["batch_size"], shuffle=False)

    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=config["lr"], momentum=0.9) #SGD
    optimizer = optim.Adam(model.parameters(), lr=config["lr"])  # Adam optimizer

    writer = SummaryWriter()

    for epoch in range(5):  # Number of epochs can be tuned
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

        train_loss = running_loss / len(train_loader)
        train_accuracy = 100. * correct / total

        # Log training loss and accuracy
        writer.add_scalar("Loss/train", train_loss, epoch)
        writer.add_scalar("Accuracy/train", train_accuracy, epoch)

        # Report metrics to Ray Tune
        tune.report(loss=train_loss, accuracy=train_accuracy)

    writer.close()

In [9]:
!pip install -U tensorboardx



In [None]:
config = {
    "lr": tune.loguniform(1e-6, 1e-1),
    "batch_size": tune.choice([4, 8, 16, 32])
}

scheduler = ASHAScheduler(metric="loss", mode="min")

reporter = CLIReporter(
    parameter_columns=["lr", "batch_size"],
    metric_columns=["loss", "accuracy", "training_iteration"])

analysis = tune.run(
    train_model,
    resources_per_trial={"cpu": 0, "gpu": 1},  # Use GPU only
    config=config,
    num_samples=10,
    scheduler=scheduler,
    progress_reporter=reporter)

2024-03-31 10:44:10,725	INFO tune.py:622 -- [output] This will use the new output engine with verbosity 2. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949


+--------------------------------------------------------------------+
| Configuration for experiment     train_model_2024-03-31_10-44-12   |
+--------------------------------------------------------------------+
| Search algorithm                 BasicVariantGenerator             |
| Scheduler                        AsyncHyperBandScheduler           |
| Number of trials                 10                                |
+--------------------------------------------------------------------+

View detailed results here: /root/ray_results/train_model_2024-03-31_10-44-12
To visualize your results with TensorBoard, run: `tensorboard --logdir /tmp/ray/session_2024-03-31_10-40-29_059336_1654/artifacts/2024-03-31_10-44-12/train_model_2024-03-31_10-44-12/driver_artifacts`

Trial status: 10 PENDING
Current time: 2024-03-31 10:44:51. Total running time: 39s
Logical resource usage: 0/2 CPUs, 0/1 GPUs (0.0/1.0 accelerator_type:T4)
+-----------------------------------------------------------------

In [None]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
best_trial = analysis.get_best_trial("accuracy", "max", "last")
best_checkpoint = best_trial.checkpoint.value
best_model = torch.load(best_checkpoint)
best_model.eval()

test_loader = DataLoader(test_set, batch_size=32, shuffle=False)
top5_correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = best_model(inputs)
        _, predicted = outputs.topk(5, 1, True, True)
        labels = labels.view(-1, 1).expand_as(predicted)
        top5_correct += predicted.eq(labels).sum().item()
        total += labels.size(0)

top5_accuracy = 100. * top5_correct / total
print(f"Top-5 Test Accuracy: {top5_accuracy:.2f}%")
