In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pathlib
import os
import pandas as pd
from pathlib import Path
import cv2
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchvision
from torchvision import datasets, transforms
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.transforms import functional as F
from torchvision.transforms import ToTensor
from torchvision.datasets import ImageFolder
from PIL import Image

In [None]:
!pip install tqdm
from tqdm import tqdm

In [None]:
class myDataset(Dataset):
    def __init__(self, main_d, train=True,mask=False, transform=None):
        self.split = "train" if train else "test"
        self.dset_dir = Path(main_d)/self.split
        self.transform = transform
        self.files = []
        self.mask=mask
        self.df_mask=pd.DataFrame()
        folders = sorted(os.listdir(self.dset_dir))
        
        if self.split=="train" :
            if  mask:
                self.df_mask=pd.read_csv(main_d+"train.csv")
            for folder in folders:
                class_idx= folders.index(folder)
                folder_dir = self.dset_dir/folder
                files = os.listdir(folder_dir)
                self.files += [{"mask":folder+"/"+x,"file": folder_dir/x, "class": class_idx+1} for x in files]
                
        else:
            self.file=folders
            for file in folders:
                 self.files.append(self.dset_dir/file)
    
    def __len__(self):
        return len(self.files)
    
    def __getitem__(self, i):
        
        if self.split == "train":
            item = self.files[i]
            file = item['file']
            
            # reading the images and converting them to correct size and color    
            img = cv2.imread(str(file))
            img_res = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)
            img_res /= 255.0
            wt = 350
            ht = 350
            
            # recover bounding boxes
            name_file_img=item['mask']
            mask_data=self.df_mask[self.df_mask["image"]==name_file_img]
            xmin=int(mask_data["x1"])
            ymin=int(mask_data["y1"])
            xmax=int(mask_data["x2"])
            ymax=int(mask_data["y2"])
            
            # resize bounding boxes
            boxes = []
            xmin_corr = xmin+1
            xmax_corr = xmax-1
            ymin_corr = ymin+1
            ymax_corr = ymax-1
            
            boxes.append([xmin_corr, ymin_corr, xmax_corr, ymax_corr])
            # convert boxes into a torch.Tensor
            boxes = torch.as_tensor(boxes, dtype=torch.int64)
            
            # getting the areas of the boxes
            area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
            

            # suppose all instances are not crowd
            iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64)
           
            
            labels = torch.tensor(item['class'],dtype=torch.int64)
            labels=labels.unsqueeze(0)
            
            target = {}
            target["boxes"] = boxes
            target["labels"] = labels
            target["area"] = area
            target["iscrowd"] = iscrowd
           
            # image_id
            target["image_id"] = torch.tensor([i])
            
            if self.transform:
            
                sample = {'image' : img_res,
                          'bboxes' : target['boxes'],
                          'labels' : labels
                         }
               
                sample = self.transform(**sample)
                img_res = sample['image']
               
                target['boxes'] = torch.as_tensor((sample['bboxes']),dtype=torch.int64)
                
                
               
            return img_res, target
        else:
            file = self.files[i]
            
            
            # reading the images and converting them to correct size and color    
            img = cv2.imread(str(file))
            img_res = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)
            # diving by 255
            img_res /= 255.0
            
            if self.transform:
                sample = {
                    'image': img_res,
                }
                sample = self.transform(**sample)
                image = sample['image']
            return image,self.file[i]
        

In [None]:
main_d = "../input/aiunict-2023/"

In [None]:
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
from albumentations.core.transforms_interface import ImageOnlyTransform

def train_transf():
    return A.Compose([
        A.HorizontalFlip(p=0.5),
        A.RandomBrightnessContrast(p=0.5),
        A.GaussianBlur(p=0.3),
        A.Rotate(limit=15, p=0.5),
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

def test_transf():
    return A.Compose([
        ToTensorV2(p=1.0)
    ])

In [None]:
train = myDataset(main_d, train=True, mask=True, transform=train_transf())
def collate_fn(batch):
    return tuple(zip(*batch))
train_dl = DataLoader(train, batch_size = 8, shuffle = True, num_workers = 4, pin_memory=True,collate_fn= collate_fn)
test_set = myDataset(main_d, train=False, transform= test_transf())

In [None]:
import random

images, targets = next(iter(train_dl))
indices = random.sample(range(len(images)), 8)
fig, axs = plt.subplots(2, 4, figsize=(15, 10))

# Iterate over the sampled indices and plot images with bounding boxes
for idx, i in enumerate(indices):
    ax = axs[idx // 4, idx % 4]
    image = images[i].permute(1, 2, 0).numpy()
    ax.imshow(image)
    
    if "boxes" in targets[i]:
        bboxes = targets[i]["boxes"]
        for bbox in bboxes:
            xmin, ymin, xmax, ymax = bbox.tolist()
            rect = patches.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, linewidth=1, edgecolor='r', facecolor='none')
            ax.add_patch(rect)
    ax.axis('off')

plt.tight_layout()
plt.show()


In [None]:
from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

model = fasterrcnn_resnet50_fpn_v2(weights='DEFAULT')  # Using the updated model version with default weights
in_features = model.roi_heads.box_predictor.cls_score.in_features
num_classes = 9  #(1 background + 8 object classes)
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

# Move the model to the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


In [None]:
from torch.optim.lr_scheduler import OneCycleLR
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = OneCycleLR(optimizer, max_lr=0.01, steps_per_epoch=len(train_dl), epochs=5)


In [None]:
class Averager:
    def __init__(self):
        self.current_total = 0.0
        self.iterations = 0.0

    def send(self, value):
        self.current_total += value
        self.iterations += 1

    @property
    def value(self):
        if self.iterations == 0:
            return 0
        else:
            return 1.0 * self.current_total / self.iterations

    def reset(self):
        self.current_total = 0.0
        self.iterations = 0.0

In [None]:
import time
num_epochs = 6  
loss_hist = Averager()
best_loss = float('inf')
itr = 1
lossHistoryiter = []
lossHistoryepoch = []

start = time.time()

for epoch in range(num_epochs):
    loss_hist.reset()
    model.train()  

    # Wrap the data loader with tqdm to add a progress bar
    for images, targets in tqdm(train_dl, desc=f"Epoch {epoch+1}/{num_epochs}", unit="batch"):
        # Move images and targets to the CUDA device
        images = [image.to(device) for image in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        loss_value = losses.item()
        
        #loss_hist.send(loss_value)
        lossHistoryiter.append(loss_value)
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
        
        if itr % 50 == 0:
            print(f"Iteration #{itr} loss: {loss_value}")
        itr += 1
        
        # update the learning rate
    if lr_scheduler is not None:
        lr_scheduler.step()
        
    lossHistoryepoch.append(loss_hist.value)
    print(f"Epoch #{epoch} loss: {loss_hist.value}")
    
torch.save(model, "/kaggle/working/model2.pth")

end = time.time()
hours, rem = divmod(end-start, 3600)
minutes, seconds = divmod(rem, 60)
print(f"Time taken to Train the model: {int(hours):0>2}:{int(minutes):0>2}:{seconds:05.2f}")

In [None]:
def apply_nms(orig_prediction, iou_thresh=0.3):
    keep = torchvision.ops.nms(orig_prediction['boxes'], orig_prediction['scores'], iou_thresh)
    final_prediction = orig_prediction
    final_prediction['boxes'] = final_prediction['boxes'][keep]
    final_prediction['scores'] = final_prediction['scores'][keep]
    final_prediction['labels'] = final_prediction['labels'][keep]
    
    return final_prediction

In [None]:
image_id=[]
image_class=[]
for idx in range(test_set.__len__()):
    img,name_file = test_set[idx]
    model.eval()
    with torch.no_grad():
        prediction = model([img.to(device)])[0]
    nms_prediction = apply_nms(prediction, iou_thresh=0.2)
    pred=nms_prediction['labels'].cpu().numpy()[0]
    
    image_id.append(name_file)
    image_class.append(pred-1)
    
d = {'image': image_id, 'class': image_class}
df = pd.DataFrame(data=d)

In [None]:
df.to_csv("submission.csv",index=False)