# mount

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:

%cd /content/drive/MyDrive/project

# INSTALL REQ

In [3]:
!pip install -r requirements.txt



# INFERENCE BASELINE

In [4]:

import torch
import torchvision.transforms as transforms
import torch.optim as optim
import torchvision.transforms.functional as FT
from tqdm import tqdm
from torch.utils.data import DataLoader
import random
import sys
from models import Yolov1
from loss_func import LossFunc
from utility_funcs import *
from IPython.display import Image

from data_loader import VOCDataset
config = load_config("config.yaml")
print(config)

# always make the same random split in data.
seed = config['models_config']['seed']
torch.manual_seed(seed)
LEARNING_RATE = float(config['models_config']['baseline']['learning_rate'])
# DEVICE tells code to bind data whether to GPU or CPU
DEVICE = torch.device('cpu')
if torch.cuda.is_available():
    DEVICE = torch.device('cuda')
print('DEVICE - >', DEVICE)

BATCH_SIZE = int(config['models_config']['test_batch_size'])
WEIGHT_DECAY = int(config['models_config']['weight_decay'])
NUM_WORKERS = 2
PIN_MEMORY = True
LOAD_MODEL = True

# load pre-trained model path, from the disk for predictions
# put the path of model that is on the drive.
LOAD_MODEL_FILE= r'saved_models/model_full_data_80_tensor(0.9161).pth.tar'

# NOTE: images their labels have same filenames but extension is different only.
# path where your all the images are
IMG_DIR = "data/data_200/images_200"
# path where your all the labels of those images are
LABEL_DIR = "data/data_200/labels_200"

def plot_image(image, boxes):
    im = np.array(image)
    height, width, _ = im.shape

    # Create figure and axes
    fig, ax = plt.subplots(1)
    # Display the image
    ax.imshow(im)

    # box[0] is x midpoint, box[2] is width
    # box[1] is y midpoint, box[3] is height

    num_class = {
        0: "airplane",
        1: "bicycle",
        2:  "bird",
        3: "boat",
        4: "bottle",
        5: "bus",
        6: "car",
        7: "cat",
        8: "chair",
        9: "cow",
        10: "table",
        11: "dog",
        12: "horse",
        13: "bike",
        14: "person",
        15: "pot",
        16: "goat",
        17:  "sofa",
        18: "train",
        19: "tv",
    }

    # Create a Rectangle potch
    for box in boxes:

        id = int(box[0])
        label = num_class[id]

        print(box)
        box = box[2:]
        assert len(box) == 4, "Got more values than in x, y, w, h, in a box!"
        upper_left_x = box[0] - box[2] / 2
        upper_left_y = box[1] - box[3] / 2
        rect = patches.Rectangle(
            (upper_left_x * width, upper_left_y * height),
            box[2] * width,
            box[3] * height,
            linewidth=1,
            edgecolor="r",
            facecolor="none",
        )
        # Add the patch to the Axes
        ax.add_patch(rect)

        # Display label alongside the bounding box
        ax.text(
            (upper_left_x * width) + 5,  # x coordinate
            (upper_left_y * height) + 5,  # y coordinate
            label,  # text label
            color="white",  # text color
            # background color and transparency
            bbox=dict(facecolor="red", alpha=0.5, pad=0.5),
        )

    plt.show()

class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, img, bboxes):
        for t in self.transforms:
            img, bboxes = t(img), bboxes

        return img, bboxes

# transform the image to 448x448 image and into a tensor for GPU/CPU processing
transform = Compose([transforms.Resize((448, 448)), transforms.ToTensor(),])


def main():

    print(LOAD_MODEL_FILE)
    model = Yolov1(split_size=7, num_boxes=2, num_classes=20).to(DEVICE)

    optimizer = optim.Adam(
        model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY
    )

    loss_fn = LossFunc()

    if LOAD_MODEL:
        # load model on GPU
        # load_checkpoint(torch.load(LOAD_MODEL_FILE), model, optimizer)
        # load model on CPU
        load_checkpoint(torch.load(LOAD_MODEL_FILE, map_location=torch.device('cpu') ), model, optimizer)

    test_dataset = VOCDataset(
        "data/data_200/test_data_100.csv",
        transform=transform,
        img_dir=IMG_DIR,
        label_dir=LABEL_DIR,

      )

    test_loader = DataLoader(
        dataset=test_dataset,
        batch_size=BATCH_SIZE,
        num_workers=NUM_WORKERS,
        pin_memory=PIN_MEMORY,
        shuffle=True,
        drop_last=True,
    )


    for x, y in test_loader:
      x = x.to(DEVICE)
      for idx in range(8):
          print(idx)
          bboxes = cellboxes_to_boxes(model(x))
          bboxes = non_max_suppression(bboxes[idx], iou_threshold=0.5, threshold=0.4, box_format="midpoint")
          plot_image(x[idx].permute(1,2,0).to("cpu"), bboxes)
      break
    # sys.exit()

if __name__ == "__main__":
    main()

Output hidden; open in https://colab.research.google.com to view.

# INFERENCE IMPROVED MODEL

In [5]:
import torch
import torchvision.transforms as transforms
import torch.optim as optim
import torchvision.transforms.functional as FT
from tqdm import tqdm
from torch.utils.data import DataLoader
import random
import sys
from utility_funcs import *

from models import Resnet50TL
from loss_func import LossFunc
from data_loader import VOCDataset
config = load_config("config.yaml")
print(config)

# always make the same random split in data.
seed = config['models_config']['seed']
torch.manual_seed(seed)

# DEVICE tells code to bind data whether to GPU or CPU
DEVICE = torch.device('cpu')
if torch.cuda.is_available():
    DEVICE = torch.device('cuda')
print('DEVICE - >', DEVICE)

LEARNING_RATE = float(config['models_config']['improved']['learning_rate'])
WEIGHT_DECAY = int(config['models_config']['weight_decay'])

BATCH_SIZE = int(config['models_config']['test_batch_size'])

LOAD_MODEL = True
NUM_WORKERS = 2
PIN_MEMORY = True

# load pre-trained model path, from the disk for predictions
# put the path of model that is on the drive.
LOAD_MODEL_FILE= r'saved_models/model_full_data_TL68_tensor(0.9391).pth.tar'

IMG_DIR = "data/data_200/images_200"
LABEL_DIR = "data/data_200/labels_200"

class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, img, bboxes):
        for t in self.transforms:
            img, bboxes = t(img), bboxes

        return img, bboxes

# transform the image to 448x448 image and into a tensor for GPU/CPU processing
transform = Compose([transforms.Resize((448, 448)), transforms.ToTensor(),])

def main():
    print(LOAD_MODEL_FILE)
    model = Resnet50TL().to(DEVICE)

    optimizer = optim.Adam(
        model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY
    )

    loss_fn = LossFunc()

    if LOAD_MODEL:
        # load model on GPU
        # load_checkpoint(torch.load(LOAD_MODEL_FILE), model, optimizer)
        # load model on CPU
        load_checkpoint(torch.load(LOAD_MODEL_FILE, map_location=torch.device('cpu') ), model, optimizer)

    test_dataset = VOCDataset(
        "data/data_200/test_data_100.csv",
        transform=transform,
        img_dir=IMG_DIR,
        label_dir=LABEL_DIR,

      )

    test_loader = DataLoader(
        dataset=test_dataset,
        batch_size=BATCH_SIZE,
        num_workers=NUM_WORKERS,
        pin_memory=PIN_MEMORY,
        shuffle=True,
        drop_last=True,
    )


    for x, y in test_loader:
      x = x.to(DEVICE)
      for idx in range(8):
          bboxes = cellboxes_to_boxes(model(x))
          bboxes = non_max_suppression(bboxes[idx], iou_threshold=0.5, threshold=0.4, box_format="midpoint")
          plot_image(x[idx].permute(1,2,0).to("cpu"), bboxes)
      break
    # sys.exit()

if __name__ == "__main__":
    main()

Output hidden; open in https://colab.research.google.com to view.