In [1]:
import os 
import torch
import cv2
import numpy as np
from torch.utils.data import Dataset, DataLoader

In [2]:
NO_OF_ANCHOR_BOX = N = 5
S = 13
NO_OF_CLASS = C =  1
HEIGHT = H = 416
WIDTH = W = 416
SCALE = 32

DEVICE =device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
BATCH_SIZE = batch_size = 32


ANCHOR_BOXES = A = [[1, 1],
 [ 1,  1.1],
 [1, 1.4],
 [1, 1.3],
 [1, 1.2]]

In [3]:
def convert_to_corners(boxes):

    x_center, y_center, width, height = boxes.unbind(1)
    x_min = x_center - width / 2
    y_min = y_center - height / 2
    x_max = x_center + width / 2
    y_max = y_center + height / 2
    return torch.stack((x_min, y_min, x_max, y_max), dim=1)

def match_anchor_box(bbox_w, bbox_h, to_exclude = [], anchor_boxes =ANCHOR_BOXES):
    iou = []
    for i, box in enumerate(anchor_boxes):
        if i in to_exclude:
            iou.append(0)
            continue
        intersection_width = min(box[0], bbox_w)  # Scale up as h, w in range 0-13
        intersection_height = min(box[1], bbox_h)  
        I = intersection_width * intersection_height
        IOU = I / (bbox_w * bbox_h + box[0] * box[1] - I)
        iou.append(IOU)
    
    iou = torch.tensor(iou)
    return torch.argmax(iou, dim = 0).item()
def intersection_over_union(bb1, bb2):
    bb1 = bb1.to(DEVICE)
    bb2 = bb2.to(DEVICE)
    bboxes = torch.vstack((bb1, bb2))
    # Convert center-width-height format to top-left and bottom-right format
    bboxes = convert_to_corners(bboxes)
    bb1_x1, bb1_y1, bb1_x2, bb1_y2 = bboxes[0]
    bb2_x1, bb2_y1, bb2_x2, bb2_y2 = bboxes[1]
    if  bb1_x1 > bb1_x2 or bb1_y1 > bb1_y2 or bb2_x1 > bb2_x2 or bb2_y1 > bb2_y2:
        return 0
    x_left = max(bb1_x1, bb2_x1)
    y_top = max(bb1_y1, bb2_y1)
    x_right = min(bb1_x2, bb2_x2)
    y_bottom = min(bb1_y2, bb2_y2)

    if x_right < x_left or y_bottom < y_top:
        return 0.0
    intersection_area = (x_right - x_left) * (y_bottom - y_top)
    bb1_area = (bb1_x2 - bb1_x1) * (bb1_y2 - bb1_y1)
    bb2_area = (bb2_x2 - bb2_x1) * (bb2_y2 - bb2_y1)
    iou = intersection_area / float(bb1_area + bb2_area - intersection_area)

    return iou
           

In [4]:
from torchvision.transforms import v2
from torchvision.io import read_image
from torchvision import tv_tensors

class GDITAerialDataset(Dataset):
    def __init__(self, rootdir, transform = None):
        self.root_dir = rootdir
        self.image_paths = []
        self.transform = transform
        
        class_names = os.listdir(self.root_dir)
        for directory in class_names:
            files = os.listdir(os.path.join(self.root_dir, directory))
    
            self.image_paths+= [os.path.join(directory, file) for file in files if os.path.splitext(file)[1] =='.jpg']
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        
        sample = self._make_sample(idx)
        img, labels, bboxes = sample['image'], sample['labels'],sample['bbox']
        _, height, width, = img.size()

        target = self._make_target(bboxes, labels, width, height)        
        
        return img, target

    def _make_sample(self, idx):
        img_path = os.path.join(self.root_dir, self.image_paths[idx])
        target_path = os.path.splitext(img_path)[0]+'.txt'
        img = read_image(img_path)
        _, height, width, = img.size()
        bbox = []
        labels = [] 
    
        with open(target_path, 'r') as f:
            data = f.readlines()
            for line in data:
                values = line.split()
                labels.append(int(values[0]))
                
                temp_bbox = [float(val) for val in values[1:]]
                
                x, y = temp_bbox[0] * width, temp_bbox[1] * height  # center of the bounding box
                box_width, box_height = temp_bbox[2] * width, temp_bbox[3] * height
                bbox+=[[x,y,box_width,box_height]]

        bboxes = tv_tensors.BoundingBoxes(bbox, format="CXCYWH", canvas_size=img.shape[-2:])
        
        sample = {
            'image':img,
            'labels': torch.tensor(labels),
            'bbox': bboxes
        }
        
        if self.transform is not None:
            sample = self.transform(sample)

        return sample

    def _make_target(self, bboxes, labels, height=H, width=W):
        to_exclude = []
        target = torch.zeros(S, S, NO_OF_ANCHOR_BOX, 1+4+C )
        for bbox, label in zip(bboxes, labels):
            cx, cy = bbox[0]/SCALE, bbox[1]/SCALE 
            pos = (int(cx), int(cy))
            pos = min(pos[0], 12), min(pos[1], 12)
            
            bx, by = cx - int(cx), cy - int(cy)
            box_widht, box_height = bbox[2]/ SCALE, bbox[3]/SCALE
            assigned_anchor_box = match_anchor_box(box_widht, box_height, to_exclude)
            anchor_box = ANCHOR_BOXES[assigned_anchor_box] 
            
            bw_by_Pw, bh_by_ph = box_widht/anchor_box[0], box_height/anchor_box[1]
            target[pos[0], pos[1],assigned_anchor_box, 0:5] = torch.tensor([1, bx, by, bw_by_Pw, bh_by_ph])
            target[pos[0], pos[1],assigned_anchor_box, 5+int(label)] = 1
            
            to_exclude.append(assigned_anchor_box) 
            
        return target
    def inverse_target(self, ground_truth, S=S, SCALE=SCALE, anchor_boxes=ANCHOR_BOXES):
        device = ground_truth.device
        cx = cy = torch.tensor([i for i in range(13)], device=device)
        ground_truth = ground_truth.permute(0, 3, 4, 2, 1)
        ground_truth[..., 1:2, :, :] += cx
        ground_truth = ground_truth.permute(0, 1, 2, 4, 3)
        ground_truth[..., 2:3, :, :] += cy                             
        ground_truth = ground_truth.permute(0, 3, 4, 1, 2)
    
        ground_truth[..., 1:3] *= 32
        ground_truth[..., 3:5] *= torch.tensor(anchor_boxes, device=device)  
        ground_truth[..., 3:5] *= 32 
    
        bbox = ground_truth[ground_truth[..., 0] == 1][..., 1:5]
        if ground_truth[ground_truth[..., 0] == 1][..., 5:].numel() % 4 == 0:
            _, labels = torch.max(ground_truth[ground_truth[..., 0] == 1][..., 5:].view(-1, 4), dim=-1)
        else:
            _, labels = torch.max(ground_truth[ground_truth[..., 0] == 1][..., 5:], dim=-1)
    
        return bbox, labels


In [5]:
transforms = v2.Compose([
    v2.RandomResizedCrop(size=(416, 416), scale=(0.9,1), antialias=True),
    v2.RandomPhotometricDistort(p=0.2),
    v2.RandomHorizontalFlip(p=0.2),
    v2.RandomZoomOut(p=0.2, side_range=(1.0,1.5), fill={tv_tensors.Image: (0, 100, 0), "others": 0}),
    v2.RandomIoUCrop(min_scale = 0.9, max_scale = 1, max_aspect_ratio=1.25, min_aspect_ratio=0.75),
    v2.Resize((416,416), antialias=True),
    v2.ToDtype(torch.float32, scale=True),  # Normalize expects float input
    v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    v2.SanitizeBoundingBoxes(),
])

tests_transforms = v2.Compose([
            v2.ToDtype(torch.float32, scale=True),  
            v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),]
            )


rev_transform = v2.Compose([
     v2.Normalize(mean = [ 0., 0., 0. ], std = [ 1/0.229, 1/0.224, 1/0.225 ]),
    v2.Normalize(mean = [ -0.485, -0.456, -0.406 ], std = [ 1., 1., 1. ])
])

In [6]:
## Intatntiate dataset 
data = GDITAerialDataset('/kaggle/input/gdit-0-1', transform = transforms)
device = DEVICE
batch_size = BATCH_SIZE

In [7]:
train_data, test_data = torch.utils.data.random_split(data, [0.9, 0.1])

In [8]:
train_loader = torch.utils.data.DataLoader(
        train_data,
        batch_size=batch_size, 
        shuffle = True,
        num_workers=0,  
        pin_memory=True,
        drop_last=False,
    )

val_loader = torch.utils.data.DataLoader(
        test_data,
        batch_size=batch_size, 
        shuffle = False,
        num_workers=0,  
        pin_memory=True,
        drop_last=False,
    )

print('The ImageNet train set is ready. Size : {}'.format(len(train_loader)*batch_size))

The ImageNet train set is ready. Size : 704


In [9]:
dataloaders = {}

dataloaders['train'] = train_loader
dataloaders['val'] = val_loader

dataset_sizes = {'train': len(train_loader)*batch_size,
                'val': len(val_loader)*batch_size}
print(dataset_sizes)

{'train': 704, 'val': 96}


In [10]:
import torch
import torch.nn as nn


# tuple -> (out_channels, kernel_size)
# M -> MaxPool, stage1 and stage2 are backbone
DARKNET_BACKBONE = {
    "stage1_conv": [
        (32, 3),
        "M",
        (64, 3),
        "M",
        (128, 3),
        (64, 1),
        (128, 1),
        "M",
        (256, 3),
        (128, 1),
        (256, 3),
        "M",
        (512, 3),
        (256, 1),
        (512, 3),
        (256, 1),
        (512, 3),
    ],
    "stage2_conv": ["M", (1024, 3), (512, 1), (1024, 3), (512, 3), (1024, 3)],
    "fcn_layer_in_channel": 3072,
    "fcn_layers": [(1024, 3), (1024, 3), (1024, 3)],
}


def make_conv_layers(arch_config, in_channels=3):
    layers = []
    in_channels = in_channels
    for value in arch_config:
        if type(value) == tuple:
            out_channels, kernel_size = value
            layers += [
                nn.Conv2d(
                    in_channels, out_channels, kernel_size, padding="same", bias=False
                ),  # not using bias as batchnorm
                nn.BatchNorm2d(value[0]),
                nn.LeakyReLU(negative_slope=0.1),
            ]

            in_channels = out_channels

        elif value == "M":
            layers += [nn.MaxPool2d(kernel_size=2)]

    return nn.Sequential(*layers)

DARKNET_BACKBONE = {
    "stage1_conv": [
        (32, 3),
        "M",
        (64, 3),
        "M",
        (128, 3),
        (64, 1),
        (128, 1),
        "M",
        (256, 3),
        (128, 1),
        (256, 3),
        "M",
        (512, 3),
        (256, 1),
        (512, 3),
        (256, 1),
        (512, 3),
    ],
    "stage2_conv": ["M", (1024, 3), (512, 1), (1024, 3), (512, 3), (1024, 3)],
    "fcn_layer_in_channel": 3072,
    "fcn_layers": [(1024, 3), (1024, 3), (1024, 3)],
}



class YOLOv2(nn.Module):
    def __init__(
        self, backbone_config=DARKNET_BACKBONE, no_of_classes=C, no_of_anchor_box=N
    ):
        super().__init__()
        self.in_channels = 3
        self.arch_config = backbone_config
        self.no_of_anchor_box = no_of_anchor_box
        self.no_of_classes = no_of_classes
        self.output_layer_in_channels = self.arch_config["fcn_layers"][-1][0]

        # no of anchor boxes * (4 bb + 4 class prob + object confidence score)*13*13 for dataset with 4 classes
        self.output_channels = self.no_of_anchor_box * (self.no_of_classes + 1 + 4)

        # Conv Layers
        self.stage1_conv_layers = make_conv_layers(self.arch_config["stage1_conv"])
        self.stage2_conv_layers = make_conv_layers(
            self.arch_config["stage2_conv"],
            in_channels=self.arch_config["stage1_conv"][-1][0],
        )
        self.fcn_layers = make_conv_layers(
            self.arch_config["fcn_layers"],
            in_channels=self.arch_config["fcn_layer_in_channel"],
        )

        self.ouput_layer = nn.Conv2d(
            in_channels=self.output_layer_in_channels,
            out_channels=self.output_channels,
            kernel_size=1,
            padding="same",
        )

    def forward(self, x):
        x1 = self.stage1_conv_layers(x)
        x2 = self.stage2_conv_layers(x1)
        _, _, height, width = x1.size()

        part1 = x1[:, :, : height // 2, : width // 2]
        part2 = x1[:, :, : height // 2, width // 2 :]
        part3 = x1[:, :, height // 2 :, : width // 2]
        part4 = x1[:, :, height // 2 :, width // 2 :]
        residual = torch.cat((part1, part2, part3, part4), dim=1)
        x_concat = torch.cat((x2, residual), dim=1)
        x3 = self.fcn_layers(x_concat)
        out = self.ouput_layer(x3)
        new_out = out.permute(0,2,3,1).contiguous()
        
        return new_out.view(new_out.size(0),new_out.size(1),new_out.size(2), self.no_of_anchor_box, 5+self.no_of_classes)

In [11]:
from torch.nn import CrossEntropyLoss, MSELoss, BCEWithLogitsLoss

class YoloV2_Loss(torch.nn.Module):
    def __init__(self, C=1 ):
        super().__init__()
        self.lambda_no_obj = torch.tensor(1.3, device=DEVICE)
        self.lambda_obj = torch.tensor(1.0, device=DEVICE)
#         self.lambda_prior = torch.tensor(1.0, device=DEVICE)
        self.lambda_class = torch.tensor(1.0, device=DEVICE)
        self.lambda_bb_cord = torch.tensor(5.0, device=DEVICE)
        self.C = C
        self.binary_loss = (
            BCEWithLogitsLoss()
        ) 
        self.logistic_loss = CrossEntropyLoss() 
        self.regression_loss = MSELoss()

    def forward(self, pred, ground_truth):
        obj = ground_truth[..., 0] == 1
        no_obj = ground_truth[..., 0] == 0
        no_obj_loss = self.binary_loss(
            pred[no_obj][[..., 0]], ground_truth[no_obj][[..., 0]]
        )
        obj_loss = self.binary_loss(pred[obj][[..., 0]], ground_truth[obj][[..., 0]])
        pred_bb = torch.cat(
            (torch.sigmoid(pred[obj][..., 1:3]), torch.exp(pred[obj][..., 3:5])), dim=-1  # B*S*S*A, 4
        )  
        gt_bb = ground_truth[obj][..., 1:5]
        bb_cord_loss = self.regression_loss(pred_bb, gt_bb)
        pred_prob = pred[obj][..., 5:] 
    
        class_loss = self.logistic_loss(
            pred_prob, ground_truth[obj][..., 5:]
        )
    
        total_loss = (
            self.lambda_bb_cord * bb_cord_loss
            + self.lambda_no_obj * no_obj_loss
            + self.lambda_obj * obj_loss
            + self.lambda_class * class_loss
        )
        
        return total_loss

In [12]:
g = torch.Generator().manual_seed(0)

pred = torch.randn((32, 13,13,5,6),generator=g)
ground_truth = torch.randn((32, 13,13,5,6),generator=g)
ground_truth[..., 0] = torch.empty_like(ground_truth[..., 0], dtype=torch.long).random_(2)
ground_truth[...,5:] = torch.empty_like(ground_truth[..., 5:], dtype=torch.long).random_(2)
loss = YoloV2_Loss()
print(loss(pred, ground_truth))

tensor(25.8758, device='cuda:0')


In [13]:
model = YOLOv2()

In [14]:
img, target = data[0]
target = target.unsqueeze(dim=0)
pred = model(img.unsqueeze_(dim=0))
loss(pred, target)

tensor(2.4562, device='cuda:0', grad_fn=<AddBackward0>)

In [15]:
darknet19_wts = torch.load('/kaggle/input/darknet-19/pytorch/default/1/darknet_19_state.pt')
yolo_state = model.state_dict()

In [16]:
match_keys =[] 

for key, value in yolo_state.items():
    if key.startswith('stage'):
        match_keys.append(key)

print('Total Layers Matched:', len(match_keys)/6)

print('To Verify, Before:', yolo_state[match_keys[0]].sum())

Total Layers Matched: 18.0
To Verify, Before: tensor(2.8270)


In [17]:
with torch.no_grad():
    for des_key, src_key in zip(match_keys, darknet19_wts.keys()):
        if yolo_state[des_key].shape == darknet19_wts[src_key].shape:
            yolo_state[des_key] = darknet19_wts[src_key]
        else:
            print('Weight Transfer Failed')
            break
        
print('To Verify, After:', yolo_state[match_keys[0]].sum())
model.load_state_dict(yolo_state)

print('Weight transfer compelte')

To Verify, After: tensor(-13.6342)
Weight transfer compelte


In [18]:
!pip install torcheval
from torcheval.metrics import AUC
def mean_average_precision(predictions, targets, iou_thres_nms = 0.4, iou_thres_for_corr_predn =0.4, C=C,):
    ep = 1e-6
    processed_preds = process_preds(predictions).clone()
    pr_matrix = torch.empty(9,C, 2) 

    for thres in range(1, 10, 1):
        
        ground_truth = targets.clone()
        
        conf_thres = thres/10
        
        local_pr_matrix = torch.zeros(C, 3)  
        
        
        for i in range(processed_preds.size(0)):
            preds = processed_preds[i]
            obj = preds[...,0] > conf_thres

            bboxes = torch.flatten(preds[obj][...,1:5], end_dim = -2)
            scores = torch.flatten(preds[obj][...,0])
            _, ind = torch.max(preds[obj][..., 5:],dim = -1)
            classes = torch.flatten(ind)


            best_boxes = non_max_suppression(bboxes, scores, iou_thres_nms)



            filtered_bbox = bboxes[best_boxes]
            filtered_classes = classes[best_boxes]
            gt_bboxes, labels = data.inverse_target(ground_truth[i].unsqueeze(0)) 
            tracker = torch.zeros_like(labels)   # to keep track of matched boxes
             
            for c in range(C):
                total_preds = torch.sum(filtered_classes==c)
                corr_preds = 0
                actual_count = torch.sum(labels==c)
                for box in filtered_bbox[filtered_classes==c]:
                    best_iou = 0
                    for index, value in enumerate(labels) :
                        if c == value:

                            iou = intersection_over_union(box, gt_bboxes[index])  #format is cx,cy, w,h

                            if iou > best_iou and tracker[index]==0:
                                best_iou = iou
                                temp = index
                            
                    if best_iou > iou_thres_for_corr_predn:
                        tracker[temp] = 1
                        corr_preds+=1


                local_pr_matrix[c] += torch.tensor([corr_preds, total_preds, actual_count])
            
            precision , recall = local_pr_matrix[:,0]/(local_pr_matrix[:,1]+ep), local_pr_matrix[:,0]/(local_pr_matrix[:,2]+ep) 

            pr_matrix[thres-1] = torch.cat((precision.view(-1,1), recall.view(-1,1)), dim=1) 
        
    pr_matrix = pr_matrix.permute(1,0,2) 

    metric = AUC(n_tasks=C)
    metric.update(pr_matrix[...,0], pr_matrix[...,1])
    average_precision = metric.compute()
    average_precision = metric.compute()

    return average_precision.mean()          
    for thres in range(1, 10, 1):
        
        ground_truth = targets.clone()
        
        conf_thres = thres/10
        
        local_pr_matrix = torch.zeros(C, 3) 
        
        
        for i in range(processed_preds.size(0)): 
            preds = processed_preds[i]
            obj = preds[...,0] > conf_thres

            bboxes = torch.flatten(preds[obj][...,1:5], end_dim = -2)
            scores = torch.flatten(preds[obj][...,0])
            _, ind = torch.max(preds[obj][..., 5:],dim = -1)
            classes = torch.flatten(ind)

            best_boxes = non_max_suppression(bboxes, scores, iou_thres_nms)

            filtered_bbox = bboxes[best_boxes]
            filtered_classes = classes[best_boxes]

            gt_bboxes, labels = data.inverse_target(ground_truth[i].unsqueeze(0))
            tracker = torch.zeros_like(labels)
             
            for c in range(C):
                total_preds = torch.sum(filtered_classes==c)
                corr_preds = 0
                actual_count = torch.sum(labels==c)
                for box in filtered_bbox[filtered_classes==c]:
                    best_iou = 0
                    for index, value in enumerate(labels) :
                        if c == value:

                            iou = intersection_over_union(box, gt_bboxes[index])  #format is cx,cy, w,h

                            if iou > best_iou and tracker[index]==0:
                                best_iou = iou
                                temp = index
                            
                    if best_iou > iou_thres_for_corr_predn:
                        tracker[temp] = 1
                        corr_preds+=1


                local_pr_matrix[c] += torch.tensor([corr_preds, total_preds, actual_count])

            
            pr_matrix[thres-1] = torch.cat((precision.view(-1,1), recall.view(-1,1)), dim=1) 

    pr_matrix = pr_matrix.permute(1,0,2)  
    metric = AUC(n_tasks=C)
    metric.update(pr_matrix[...,0], pr_matrix[...,1])
    average_precision = metric.compute()
    average_precision = metric.compute()

    return average_precision.mean()          



In [19]:
pred = model(img)
loss(pred, target)

tensor(3.6153, device='cuda:0', grad_fn=<AddBackward0>)

In [20]:
def check_model_accuracy(preds, targets ,thres = 0.5 ):
    total_class, class_corr = 0,0
    total_obj, obj_corr = 0,0
    total_no_obj, no_obj_corr = 0, 0
    sig = torch.nn.Sigmoid()
    
    obj = targets[..., 0] == 1 
    no_obj = targets[..., 0] == 0 
    
    preds[..., 0] = sig(preds[..., 0])
    
    
    class_corr = torch.sum((torch.argmax(preds[obj][..., 5:], dim=-1) == torch.argmax(targets[obj][...,5:],dim=-1)))

    total_class = torch.sum(obj)
    obj_corr = torch.sum(preds[obj][..., 0]>thres)
    total_obj = torch.sum(obj)+ 1e-6  
    
    no_obj_corr = torch.sum(preds[no_obj][..., 0]<thres)
   
    total_no_obj = torch.sum(no_obj)
    
  
    return torch.tensor([total_class, class_corr, total_obj, obj_corr, total_no_obj, no_obj_corr])

def cal_epoch_acc(total_class_pred, class_corr, total_obj_prd, obj_corr, total_no_obj, no_obj_corr):
    print('Class Score (R)', 100*class_corr/total_class_pred)
    print('Object Score (R)', 100*obj_corr/total_obj_prd)
    print('No object Score (R)', 100*no_obj_corr/total_no_obj)

In [21]:
def process_preds(preds, anchor_boxes= ANCHOR_BOXES, device = DEVICE):
    sig = torch.nn.Sigmoid()
    preds[..., 0:1] = sig(preds[..., 0:1] ) 
    preds[..., 1:3] = sig(preds[..., 1:3]) 
    
    cx = cy = torch.tensor([i for i in range(S)], device=device)
    preds = preds.permute((0,3,4, 2,1)) 
    
    preds[...,1:2,:,:] += cx
    preds = preds.permute(0,1,2,4,3)
    preds[...,2:3,:,:] += cy                             
    preds = preds.permute((0,3,4, 1,2)) 
    
    preds[..., 1:3]*=32 
    preds[..., 3:5] = torch.exp(preds[..., 3:5])  
    preds[...,3:5]*=torch.tensor(anchor_boxes, device=device)  
    preds[..., 3:5] = preds[..., 3:5]*32  # back to pixel values
    
    
    return preds

In [22]:
import torchvision


def non_max_suppression(boxes, scores, io_threshold = 0.4):
    boxes = convert_to_corners(boxes)
    keep = torchvision.ops.nms(boxes, scores, io_threshold)

    return keep

In [23]:
import time
from tqdm.auto import tqdm
import shutil
import math

def train_model(model, criterion, optimizer, scheduler, num_epochs=5):
    since = time.time()
   
    best_map = 0
    
    tempdir = '/kaggle/working/temp'
    os.makedirs(tempdir, exist_ok=True)
    best_model_params_path = os.path.join(tempdir, 'best_model_params.pt')
    one_batch_data = {'train': next(iter(dataloaders['train'])),
                     'val':next(iter(dataloaders['val']))
                     }
    
    
    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('-' * 10)
        
        store_preds = 0   #this w
        
        
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()
                ##For mAP Calculations
                no_of_batches = math.ceil(dataset_sizes[phase]/batch_size)
                all_preds = torch.zeros((no_of_batches, batch_size, S, S, N, C+5))
                all_targets = torch.zeros((no_of_batches, batch_size, S, S, N, C+5))
            
            
            running_loss = 0.0
            running_acc = torch.zeros(6)
            
            i = 0
            inputs, targets = one_batch_data[phase]
            for inputs, targets in tqdm(dataloaders[phase], leave=False):
                inputs = inputs.to(DEVICE)
                targets = targets.to(DEVICE)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss = criterion(outputs, targets)
                    
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                   
                    else:
                        #for mAP
                        try:
                            all_preds[i] = outputs.detach().to(DEVICE)
                            all_targets[i] = targets.detach().to(DEVICE)

                        except:
                            pass
                  
                        i+=1
                        

                running_loss += loss.item() * inputs.size(0)

                running_acc = running_acc + check_model_accuracy(outputs.detach(), targets.detach() )
                
                

            if phase == 'train':
                scheduler.step()
                
        
            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_acc / dataset_sizes[phase]
            
            print(f'{phase} Loss: {epoch_loss:.4f}')
            if phase =='val':
                all_preds = all_preds.view(-1, 13,13,5,6)
                all_targets = all_targets.view((-1, 13,13,5,6))
                mAP = mean_average_precision(all_preds.to("cuda"), all_targets.to("cuda") )
                print('Mean Average Precision : ' , mAP.item())    
            
                if epoch % 1 == 0:
                    cal_epoch_acc(*(running_acc.tolist()))

                # Also saving model associated with best val loss
                if mAP>best_map:
                    best_map = mAP
                    torch.save(model.state_dict(), best_model_params_path)
                
        print()

    time_elapsed = time.time() - since
    print('Mode with Best mAP: ', best_map)
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    
    model.load_state_dict(torch.load(best_model_params_path))


    return model

In [24]:
print(f'Training \n')

torch.cuda.empty_cache()

# model = torch.load('/kaggle/working/yolo_100_epo.pth')

model = model.to(device)
criterion = YoloV2_Loss()

optimizer_ft = torch.optim.Adam(model.parameters(), lr=0.00005)

exp_lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer_ft, step_size=10, gamma=0.1)

Training 



In [25]:
#model = model.to(device)
#model = train_model(model, criterion, optimizer_ft, exp_lr_scheduler,
#                   num_epochs=0)

In [26]:
torch.save(model, 'YoloModel.pth')

In [27]:
import torch
import math

# Create an instance of your model
model = YOLOv2()

# Load the state dictionary
state_dict = torch.load('/kaggle/input/bestparams/pytorch/default/1/best_model_params.pt')
model.load_state_dict(state_dict)

# Save the model
torch.save(model, 'YoloModel.pth')

# Move the model to the appropriate device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model.eval()

# Define phase and calculate number of batches
phase = 'val'
no_of_batches = math.ceil(dataset_sizes[phase] / batch_size)

# Initialize all_preds and all_targets
all_preds = torch.empty((no_of_batches, batch_size, S, S, N, C+5), device=device)
all_targets = torch.empty((no_of_batches, batch_size, S, S, N, C+5), device=device)

# Iterate through the dataloader
for i, (imge, target) in enumerate(dataloaders[phase]):
    imge = imge.to(device)
    preds = model(imge)
    
    # Handle the last batch size
    if preds.size(0) != batch_size:
        all_preds = all_preds[:i * batch_size]
        all_targets = all_targets[:i * batch_size]
        break
    
    try:
        all_preds[i] = preds.detach()
        all_targets[i] = target.detach().to(device)
    except Exception as e:
        print(f'Error in batch {i}: {e}')
        print('Last batch has shape', preds.shape)

# Reshape all_preds and all_targets
all_preds = all_preds.view(-1, 13, 13, 5, 6)
all_targets = all_targets.view(-1, 13, 13, 5, 6)

print(all_preds.shape, all_targets.shape)


torch.Size([96, 13, 13, 5, 6]) torch.Size([96, 13, 13, 5, 6])


In [None]:
mean_average_precision(all_preds.to(device), all_targets)