In [4]:
!pip install ray

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting ray
  Downloading ray-2.2.0-cp38-cp38-manylinux2014_x86_64.whl (57.4 MB)
[K     |████████████████████████████████| 57.4 MB 1.1 MB/s 
Collecting virtualenv>=20.0.24
  Downloading virtualenv-20.17.1-py3-none-any.whl (8.8 MB)
[K     |████████████████████████████████| 8.8 MB 75.2 MB/s 
Collecting distlib<1,>=0.3.6
  Downloading distlib-0.3.6-py2.py3-none-any.whl (468 kB)
[K     |████████████████████████████████| 468 kB 81.8 MB/s 
Installing collected packages: distlib, virtualenv, ray
Successfully installed distlib-0.3.6 ray-2.2.0 virtualenv-20.17.1


In [5]:
import pandas as pd 
import numpy as np
import tensorflow as tf 
import os
import time
import torch.nn as nn 
import torch.optim as optim
import torchvision
from torchvision import datasets, models, transforms
from torchvision.io import read_image, ImageReadMode
import copy
import pathlib
import random
import torch
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler

In [6]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
def get_file_paths_and_labels(data_root):
  """
  Returns a list of paths to image and text files, corresonding 
  class labels and mapping of class names to label index.
  """
  image_paths = sorted([str(path).split("jpg/")[1] for path in data_root.glob('*/*.jpg')])
  random.shuffle(image_paths)
  label_names = sorted(item.name for item in data_root.glob('*/') if item.is_dir())
  label_to_index = dict((name, index) for index, name in enumerate(label_names))
  labels = [label_to_index[pathlib.Path(path).parent.name] for path in image_paths]
  labels_df = pd.DataFrame({"path": image_paths, "label": labels })
  return labels_df, label_to_index

In [8]:
image_dir = "/content/drive/MyDrive/Tobacco3482-jpg/"
data_dir = pathlib.Path(image_dir)
model_name = "inception"
num_classes = 10
num_epochs = 15 

In [9]:
labels_df, label_to_index = get_file_paths_and_labels(data_dir)

In [10]:
def initialize_model(model_name="inception", num_classes=10, use_pretrained=True):
    model_ft = models.inception_v3(pretrained=use_pretrained)
    # Handle the auxilary net
    num_ftrs = model_ft.AuxLogits.fc.in_features
    model_ft.AuxLogits.fc = nn.Linear(num_ftrs, num_classes)
    # Handle the primary net
    num_ftrs = model_ft.fc.in_features
    model_ft.fc = nn.Linear(num_ftrs,num_classes)
    input_size = 299

    return model_ft, input_size

In [11]:
net, input_size = initialize_model()

Downloading: "https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth" to /root/.cache/torch/hub/checkpoints/inception_v3_google-0cc3c7bd.pth


  0%|          | 0.00/104M [00:00<?, ?B/s]

In [12]:
transforms_train = transforms.Compose([
        transforms.ToPILImage(),
        transforms.RandomResizedCrop(input_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ])

transforms_test = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize(input_size),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ])

In [13]:
class CustomImageDataset(Dataset):
    def __init__(self, transform, labels_df=labels_df, image_dir=image_dir):
        self.img_labels = labels_df
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.img_labels.iloc[idx, 0])
        image = read_image(img_path, mode = ImageReadMode.RGB)
        image = self.transform(image)
        label = self.img_labels.iloc[idx, 1]
        
        return image, label

In [14]:
train_dataset = CustomImageDataset(transform=transforms_train)
test_dataset = CustomImageDataset(transform=transforms_test)
val_split_rel = 0.1
test_split_rel = 0.1

In [15]:
dataset_size = labels_df.shape[0]
indices = list(range(dataset_size))
val_split = int(np.floor(val_split_rel * dataset_size))
test_split = val_split * 2
train_indices = indices[test_split:]
val_indices = indices[:val_split]
test_indices = indices[val_split:test_split]

# Creating PT data samplers and loaders:
train_sampler = SubsetRandomSampler(train_indices)
val_sampler = SubsetRandomSampler(val_indices)
test_sampler = SubsetRandomSampler(test_indices)

In [16]:
def train_inception(config):

    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=config["batch_size"], 
                                           sampler=train_sampler)

    val_loader = torch.utils.data.DataLoader(test_dataset, batch_size=config["batch_size"],
                                                sampler=val_sampler)

    dataloaders = {"train": train_loader, "val": val_loader}

    net, input_size = initialize_model()
    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if torch.cuda.device_count() > 1:
            net = nn.DataParallel(net)
    net.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=config["lr"], momentum=config["mom"], weight_decay=config["weight_decay"])


    for epoch in range(config["num_epochs"]):  # loop over the dataset multiple times
        running_loss = 0.0
        epoch_steps = 0
        for i, data in enumerate(dataloaders["train"], 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs, aux_outputs = net(inputs)
            loss1 = criterion(outputs, labels)
            loss2 = criterion(aux_outputs, labels)
            loss = loss1 + 0.4*loss2
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            epoch_steps += 1
            if i % 8000 == 7999:  # print every 2000 mini-batches
                print("[%d, %5d] loss: %.3f" % (epoch + 1, i + 1,
                                                running_loss / epoch_steps))
                running_loss = 0.0

        # Validation loss
        val_loss = 0.0
        val_steps = 0
        total = 0
        correct = 0
        for i, data in enumerate(dataloaders["val"], 0):
            with torch.no_grad():
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)

                outputs, aux_outputs = net(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

                loss = criterion(outputs, labels)
                val_loss += loss.cpu().numpy()
                val_steps += 1

        with tune.checkpoint_dir(epoch) as checkpoint_dir:
            path = os.path.join(checkpoint_dir, "checkpoint")
            print(f"The path is: {path}")
            torch.save((net.state_dict(), optimizer.state_dict()), path)

        tune.report(loss=(val_loss / val_steps), accuracy=correct / total)
    print("Finished Training")

In [17]:
def test_accuracy(net, config, device="cuda"):
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=config["batch_size"],
                                                sampler=test_sampler)
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs, aux_outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return correct / total

In [30]:
def main(num_samples=10, max_num_epochs=15, gpus_per_trial=1):
    config = {
        "lr": tune.loguniform(1e-4, 1e-1),
        "batch_size": tune.choice([4, 8, 16]),
        "mom": tune.choice([0.1, 0.2, 0.3, 0.4, 0.5, 0.6]),
        "weight_decay": tune.loguniform(0.001, 0.1),
        "num_epochs": 15
      
    }

    best_config = {
        "lr": tune.choice([0.0040]),
        "batch_size": tune.choice([16]),
        "mom": tune.choice([0.4]),
        "weight_decay": tune.choice([0.025]),
        "num_epochs": tune.choice([15])
    }

    scheduler = ASHAScheduler(
        metric="loss",
        mode="min",
        max_t=max_num_epochs,
        grace_period=5,
        reduction_factor=2)
    
    reporter = CLIReporter(
        metric_columns=["loss", "accuracy", "training_iteration"])
    
    result = tune.run(
        train_inception,
        resources_per_trial={"cpu": 2, "gpu": gpus_per_trial},
        config=config,
        num_samples=num_samples,
        scheduler=scheduler,
        progress_reporter=reporter)

    best_trial = result.get_best_trial("loss", "min", "last")
    print("Best trial config: {}".format(best_trial.config))
    print("Best trial final validation loss: {}".format(
        best_trial.last_result["loss"]))
    print("Best trial final validation accuracy: {}F".format(
        best_trial.last_result["accuracy"]))
  
    best_trained_model, input_size = initialize_model()
    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if gpus_per_trial > 1:
            best_trained_model = nn.DataParallel(best_trained_model)
    best_trained_model.to(device)
    
    print(f"The checkpoint directory is: {best_trial.checkpoint.dir_or_data}")

    best_checkpoint_dir = best_trial.checkpoint.dir_or_data
    
    model_state, optimizer_state = torch.load(os.path.join(
        best_checkpoint_dir, "checkpoint"))
    best_trained_model.load_state_dict(model_state)

    test_acc = test_accuracy(best_trained_model, config, device)
    print("Best trial test set accuracy: {}".format(test_acc))


In [None]:
main(num_samples=15, max_num_epochs=15, gpus_per_trial=1)



== Status ==
Current time: 2022-12-27 19:37:08 (running for 00:00:00.27)
Memory usage on this node: 2.9/25.5 GiB 
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 10.000: None | Iter 5.000: None
Resources requested: 2.0/4 CPUs, 1.0/1 GPUs, 0.0/15.06 GiB heap, 0.0/7.53 GiB objects (0.0/1.0 accelerator_type:T4)
Result logdir: /root/ray_results/train_inception_2022-12-27_19-37-08
Number of trials: 15/15 (14 PENDING, 1 RUNNING)
+-----------------------------+----------+-------------------+--------------+-------------+-------+----------------+
| Trial name                  | status   | loc               |   batch_size |          lr |   mom |   weight_decay |
|-----------------------------+----------+-------------------+--------------+-------------+-------+----------------|
| train_inception_d9e64_00000 | RUNNING  | 172.28.0.12:43314 |            8 | 0.0425003   |   0.4 |     0.0191314  |
| train_inception_d9e64_00001 | PENDING  |                   |           16 | 0.0106584   |   0.2 |    



== Status ==
Current time: 2022-12-27 19:37:16 (running for 00:00:07.92)
Memory usage on this node: 3.9/25.5 GiB 
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 10.000: None | Iter 5.000: None
Resources requested: 2.0/4 CPUs, 1.0/1 GPUs, 0.0/15.06 GiB heap, 0.0/7.53 GiB objects (0.0/1.0 accelerator_type:T4)
Result logdir: /root/ray_results/train_inception_2022-12-27_19-37-08
Number of trials: 15/15 (14 PENDING, 1 RUNNING)
+-----------------------------+----------+-------------------+--------------+-------------+-------+----------------+
| Trial name                  | status   | loc               |   batch_size |          lr |   mom |   weight_decay |
|-----------------------------+----------+-------------------+--------------+-------------+-------+----------------|
| train_inception_d9e64_00000 | RUNNING  | 172.28.0.12:43314 |            8 | 0.0425003   |   0.4 |     0.0191314  |
| train_inception_d9e64_00001 | PENDING  |                   |           16 | 0.0106584   |   0.2 |    

Trial name,accuracy,date,done,episodes_total,experiment_id,hostname,iterations_since_restore,loss,node_ip,pid,should_checkpoint,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
train_inception_d9e64_00000,0.479885,2022-12-27_20-12-04,False,,810f943de15c4ec3a2d12c2e1d6c52c4,61ede8adcf3c,10,1.50707,172.28.0.12,43314,True,2092.7,210.356,2092.7,1672171924,0,,10,d9e64_00000,0.00335097


[1;30;43mDie letzten 5000 Zeilen der Streamingausgabe wurden abgeschnitten.[0m
| train_inception_d9e64_00010 | PENDING  |                   |            4 | 0.000146019 |   0.5 |     0.00570456 |         |            |                      |
| train_inception_d9e64_00011 | PENDING  |                   |            8 | 0.000892713 |   0.5 |     0.0371054  |         |            |                      |
| train_inception_d9e64_00012 | PENDING  |                   |           16 | 0.0248975   |   0.1 |     0.00208436 |         |            |                      |
| train_inception_d9e64_00013 | PENDING  |                   |           16 | 0.0231243   |   0.2 |     0.00554563 |         |            |                      |
| train_inception_d9e64_00014 | PENDING  |                   |            4 | 0.0259445   |   0.3 |     0.0180473  |         |            |                      |
+-----------------------------+----------+-------------------+--------------+-------------+-------+-----

In [31]:
!pip install lime 

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting lime
  Downloading lime-0.2.0.1.tar.gz (275 kB)
[K     |████████████████████████████████| 275 kB 4.9 MB/s 
Building wheels for collected packages: lime
  Building wheel for lime (setup.py) ... [?25l[?25hdone
  Created wheel for lime: filename=lime-0.2.0.1-py3-none-any.whl size=283857 sha256=70587b12c76b6057b41b1744e094e575da476433c7e3037e74139d324206e4a3
  Stored in directory: /root/.cache/pip/wheels/e6/a6/20/cc1e293fcdb67ede666fed293cb895395e7ecceb4467779546
Successfully built lime
Installing collected packages: lime
Successfully installed lime-0.2.0.1


In [21]:

config = {
    "lr": tune.loguniform(1e-4, 1e-1),
    "batch_size": 16,
    "mom": tune.choice([0.1, 0.2, 0.3, 0.4, 0.5]),
    "weight_decay": tune.choice([0.025, 0.05, 0.075, 0.1, 0.2, 0.3, 0.4]),
    "num_epochs": 15
  
}
best_checkpoint_dir = "/root/ray_results/train_inception_2022-12-27_11-08-45/train_inception_d467a_00000_0_batch_size=16,lr=0.0040,mom=0.4000,num_epochs=15,weight_decay=0.0250_2022-12-27_11-08-45/checkpoint_000014"
best_trained_model, input_size = initialize_model()
device = "cpu"
if torch.cuda.is_available():
    device = "cuda:0"
best_trained_model.to(device)
model_state, optimizer_state = torch.load(os.path.join(
    best_checkpoint_dir, "checkpoint"))
best_trained_model.load_state_dict(model_state)

test_acc = test_accuracy(best_trained_model, config, device)
print("Best trial test set accuracy: {}".format(test_acc))



Best trial test set accuracy: 0.7758620689655172


In [180]:
import shutil 
shutil.copytree("/root/ray_results/train_inception_2022-12-27_11-08-45/train_inception_d467a_00000_0_batch_size=16,lr=0.0040,mom=0.4000,num_epochs=15,weight_decay=0.0250_2022-12-27_11-08-45/checkpoint_000014", "/content/drive/MyDrive/models/27-12-v2")

'/content/drive/MyDrive/models/27-12-v2'

In [16]:
config = {
    "lr": tune.loguniform(1e-4, 1e-1),
    "batch_size": 16,
    "mom": tune.choice([0.1, 0.2, 0.3, 0.4, 0.5]),
    "weight_decay": tune.choice([0.025, 0.05, 0.075, 0.1, 0.2, 0.3, 0.4]),
    "num_epochs": 15
  
}
best_checkpoint_dir = "/content/drive/MyDrive/models/27-12-v2/"
best_trained_model, input_size = initialize_model()
device = "cpu"
best_trained_model.to(device)
model_state, optimizer_state = torch.load(os.path.join(
    best_checkpoint_dir, "checkpoint"))
best_trained_model.load_state_dict(model_state)


<All keys matched successfully>

In [17]:
device = "cpu"

In [18]:
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1,
                                            sampler=test_sampler)
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        print(images.shape)
        images, labels = images.to(device), labels.to(device)
        best_trained_model.eval()
        outputs = best_trained_model(images)
        print(outputs.data)
        _, predicted = torch.max(outputs.data, 1)
        print(predicted)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        break

torch.Size([1, 3, 299, 299])
tensor([[-2.1643, -0.6423, -0.4608,  0.0468,  0.7049, -2.9165, -2.4240,  0.2104,
          6.2781,  0.3802]])
tensor([8])


In [19]:

!pip install visualime==0.0.2

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting visualime==0.0.2
  Downloading visualime-0.0.2-py3-none-any.whl (14 kB)
Installing collected packages: visualime
  Attempting uninstall: visualime
    Found existing installation: visualime 0.0.3
    Uninstalling visualime-0.0.3:
      Successfully uninstalled visualime-0.0.3
Successfully installed visualime-0.0.2


In [22]:
from visualime.explain import explain_classification, render_explanation
from visualime.lime import create_segments

In [90]:
def preprocessing(images):
  image_array = images.numpy()
  image_array = image_array.reshape(1, 299, 299, 3)
  return image_array

In [91]:
image_preprocessed = preprocessing(images)

In [92]:
image_preprocessed.shape

(1, 299, 299, 3)

In [93]:
image_tensor = torch.Tensor(image_preprocessed)

In [94]:
def predict_fn(image_preprocessed):
  best_trained_model.eval()
  image_preprocessed = image_preprocessed.reshape(1, 3, 299, 299)
  image_tensor = torch.Tensor(image_preprocessed)
  outputs = best_trained_model(image_tensor)
  predictions = outputs.data.cpu().numpy()
  return predictions

In [95]:
predict_fn(image_preprocessed)

array([[-2.164348  , -0.6422686 , -0.46083874,  0.04683117,  0.7049142 ,
        -2.9165063 , -2.4240427 ,  0.21043095,  6.278126  ,  0.38016188]],
      dtype=float32)

In [96]:
segment_mask, segment_weights = explain_classification(image=image_preprocessed, predict_fn=predict_fn)

ValueError: ignored