In [52]:
import os

# MAIN CONFIGURATIONS
create_log_file = True
save_to_drive = False
model_id = '0'  # We will train multiple models with the same settings. Keep it as a string!
num_epochs = 10
batch_size = 32
data_augmentation_type = 'noaug'  # Which data augmentation tecnique are we using?
                                  # 'noaug':     no data augmentation

# !tree # Prints folder structure

test_only = False # when true it doesn't train the model, but it just tests an existing one

model_filepath = f"model_epochs{str(num_epochs)}_{data_augmentation_type}_id{model_id}"
model_filepath = os.path.join("models", model_filepath)
print(f"{model_filepath = }")

os.makedirs(model_filepath, exist_ok=True)

model_filepath = 'models/model_epochs10_noaug_id0'


In [53]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data
import torch.nn.functional as F
import torchvision
import torchvision.models as models
from PIL import Image
import matplotlib.pyplot as plt
import glob
import pandas as pd
from torchvision.io import read_image
from torchvision.transforms.functional import rotate
import numpy as np

# !pip install torchsummary
# from torchsummary import summary

In [54]:
import logging
from datetime import datetime

if create_log_file:
    logger = logging.getLogger('RootLogger')
    log_filepath = datetime.now().strftime("%m-%d_%H.%M.%S")
    log_filepath = os.path.join(model_filepath, f"log_{log_filepath}" + ".txt")
    print(f"{log_filepath = }")
    
    logging.basicConfig(filename=log_filepath,
                        filemode='a',
                        format='%(asctime)s %(levelname)s %(message)s',
                        level=logging.DEBUG,
                        datefmt='%m-%d %H:%M:%S',
                        force=True)
else:
    logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
                        level=logging.DEBUG,
                        datefmt='%m-%d %H:%M:%S',
                        force=True)

log_filepath = 'models/model_epochs10_noaug_id0/log_05-19_08.30.26.txt'


In [55]:
# TRANSFORMATIONS

from torchvision.transforms import v2

img_dimensions = 224

# Normalize to the ImageNet mean and standard deviation
# Could calculate it for the cats/dogs data set, but the ImageNet
# values give acceptable results here.
img_train_transforms = v2.Compose([
    # transforms.RandomRotation(50),
    # transforms.RandomAffine(degrees = 0, translate = (0.2, 0.2)),
    # transforms.RandomHorizontalFlip(p=0.5),
    v2.Resize((img_dimensions, img_dimensions)),
    # transforms.ToTensor(),
    # transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225] )
    ])

img_validation_transforms = v2.Compose([
    v2.Resize((img_dimensions, img_dimensions)),
    # transforms.ToTensor(),
    # transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225] )
    ])

In [56]:
import torch
import matplotlib.pyplot as plt
import torchvision.transforms.functional as F

def rl_decode(rl_str, height, length):
  mask = np.zeros(shape=(1,height,length))
  couples = rl_str.split()
  for i in range(0, len(couples)-1, 2):
    # print(i)
    el = int(couples[i])
    qty = int(couples[i+1])
    r,c = np.unravel_index(el,(height,length))
    for j in range(qty):
      # mask[0, r, c+j] = 1
      mask[0, c+j-1, r-1] = 1

    # print(torch.Tensor(mask))
  return torch.Tensor(mask).reshape((768, 768)).gt(0)

def show(imgs, rotation=None):

    if rotation:
          imgs = rotate(imgs, rotation)

    if not isinstance(imgs, list):
        imgs = [imgs]
    fix, axs = plt.subplots(ncols=len(imgs), squeeze=False)
    for i, img in enumerate(imgs):
        img = img.detach()
        img = F.to_pil_image(img)
        axs[0, i].imshow(np.asarray(img))
        axs[0, i].set(xticklabels=[], yticklabels=[], xticks=[], yticks=[])

class ShipsDataset(torch.utils.data.Dataset):
    def __init__(self, file_list, targets, transforms = None, target_transforms = None):
        self.file_list = file_list
        self.targets = targets
        self.transform = transforms

    def __len__(self):
        self.filelength = len(self.file_list)
        return self.filelength

    def __getitem__(self, idx):
        image = read_image(self.file_list[idx])    # numpy tensor

        image = F.convert_image_dtype(image)
        # Added this line to fix this problem (ENF) during training
        # TypeError: Expected input images to be of floating type (in range [0, 1]), but found type torch.uint8 instead

        # try:
        label = self.targets[idx]       # dictionary {"boxes": , "label": }
        label['boxes'] = torch.Tensor(label['boxes'])
        label['labels'] = torch.Tensor(label['labels']).to(dtype=torch.int64).reshape((-1,))
        # except IndexError as e:
        #     Warning(f'Errore con {idx = }')
        #     plt.imshow(image.permute(1, 2, 0))
        #     plt.show()

        if self.transform:
            image = self.transform(image, label)

            # prova ad indagare da qui
            # image = self.transform(image)
            # image = image.numpy()
            # return image, label
            # print(f"{image = }")
            # print(f"{label = }")

        return image, label

In [57]:
from sklearn.model_selection import train_test_split
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision import tv_tensors

# DATASET_DIR = os.path.join(".")
TRAIN_DIR = "/kaggle/input/airbus-ship-detection/train_v2"
TEST_DIR = "/kaggle/input/airbus-ship-detection/test_v2"
# print(DATASET_DIR, TRAIN_DIR, TEST_DIR)

train_list = glob.glob(os.path.join(TRAIN_DIR,'*.jpg'))
train_list, test_list = train_test_split(train_list, test_size = 0.99)
train_list, val_list = train_test_split(train_list, test_size = 0.2)
test_list, _ = train_test_split(test_list, test_size = 0.7)


# train_data = ShipsDataset(train_list, transforms = img_train_transforms, targets=np.load('rcnn_targets.npy', allow_pickle='TRUE'))
# # test_data = ShipsDataset(train_list, transforms = img_train_transforms)
# val_data = ShipsDataset(val_list, transforms = img_validation_transforms,targets=np.load('rcnn_targets.npy', allow_pickle='TRUE') )

train_data = ShipsDataset(train_list, transforms = img_train_transforms, targets=np.load('/kaggle/input/rcnn-dataset-py/rcnn_targets.npy', allow_pickle='TRUE'))
test_data = ShipsDataset(test_list, transforms = img_train_transforms, targets=np.load('/kaggle/input/rcnn-dataset-py/rcnn_targets.npy', allow_pickle='TRUE'))
val_data = ShipsDataset(val_list, transforms = img_train_transforms,targets=np.load('/kaggle/input/rcnn-dataset-py/rcnn_targets.npy', allow_pickle='TRUE') )

train_loader = torch.utils.data.DataLoader(dataset = train_data, batch_size = batch_size, shuffle = True, collate_fn=lambda x: x)
val_loader = torch.utils.data.DataLoader(dataset = val_data, batch_size = batch_size, shuffle = True, collate_fn=lambda x: x)
test_loader = torch.utils.data.DataLoader(dataset = test_data, batch_size = batch_size, shuffle = True, collate_fn=lambda x: x)

print(len(train_data),len(train_loader))
print(len(val_data), len(val_loader))

model_rcnn = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights='DEFAULT')

# https://pytorch.org/vision/main/models/generated/torchvision.models.detection.fasterrcnn_resnet50_fpn.html#torchvision.models.detection.fasterrcnn_resnet50_fpn
# La documentazione non è chiara sulla posizione dei punti per le ground-truth!
# /Users/ludovicamazzucco/Library/Python/3.9/lib/python/site-packages/torchvision/models/detection/generalized_rcnn.py"

1540 49
385 13


In [58]:
## STEP 1. freeze backbone layers, add final layers and train the network

for name, param in model_rcnn.named_parameters():
      param.requires_grad = False

num_classes = 2 # background, ship
in_features = model_rcnn.roi_heads.box_predictor.cls_score.in_features
model_rcnn.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)


In [59]:

# How to save in google drive something else
# if save_to_drive:
#   with open('/content/drive/MyDrive/MLVM_project/file.txt', 'w') as f:
#     f.write('content')

print(f"{model_filepath = }")

def save_checkpoint(epoch, model, optimizer, train_loss, val_loss=0, model_name="model.tar"):
    torch.save({
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'train_loss': train_loss,
        'val_loss': val_loss
    }, os.path.join(model_filepath, model_name))
    print("Saved model")

model_filepath = 'models/model_epochs10_noaug_id0'


In [60]:
# TRAIN
import gc

def train(model, optimizer, loss_fn, train_loader, val_loader, epochs=1, device="cpu"):
 
    for epoch in range(epochs):
        training_loss = 0.0
        batch_cumsum = 0
        model.train()

        for i, batch in enumerate(train_loader):
            logger.info(f"E: {str(epoch)} B: {str(i)}")
            print(f"epoch {epoch} batch {i}")
            batch_cumsum += len(batch) # needed to compute the training loss later
            optimizer.zero_grad()
            # inputs, targets = batch
            """ inputs = [img for i,el in enumerate(batch)]
            targets = [lab for img,lab in batch] """

            # filtering out empty images (model does not accept empty targets)
            inputs = []
            targets = []
            for el in batch:       # el = (image,labels)
                if el[1]['boxes'].size()[0] != 0:
                    inputs.append(el[0][0])
                    targets.append(el[0][1])
                    # print(f"{el = }")
                    # Example el
                    # el = (tensor([[[0.1006, 0.1249, 0.1552,  ..., 0.1552, 0.1395, 0.1321],
                    #          [0.1224, 0.1331, 0.1243,  ..., 0.1218, 0.1260, 0.1410],
                    #          [0.0948, 0.1149, 0.1300,  ..., 0.1381, 0.1356, 0.1356],
                    #          ...,
                    #          [0.1789, 0.1738, 0.1818,  ..., 0.1401, 0.1428, 0.1169],
                    #          [0.1591, 0.1532, 0.1752,  ..., 0.1555, 0.1481, 0.1131],
                    #          [0.1664, 0.1698, 0.1564,  ..., 0.1268, 0.1538, 0.1393]],

                    #         [[0.2291, 0.2504, 0.2689,  ..., 0.2807, 0.2650, 0.2576],
                    #          [0.2510, 0.2586, 0.2380,  ..., 0.2473, 0.2515, 0.2664],
                    #          [0.2234, 0.2404, 0.2437,  ..., 0.2636, 0.2611, 0.2611],
                    #          ...,
                    #          [0.2966, 0.2914, 0.2995,  ..., 0.2460, 0.2486, 0.2228],
                    #          [0.2768, 0.2709, 0.2928,  ..., 0.2613, 0.2540, 0.2190],
                    #          [0.2840, 0.2874, 0.2741,  ..., 0.2327, 0.2596, 0.2452]],

                    #         [[0.2880, 0.3092, 0.3317,  ..., 0.3396, 0.3238, 0.3164],
                    #          [0.3098, 0.3174, 0.3007,  ..., 0.3062, 0.3103, 0.3253],
                    #          [0.2822, 0.2993, 0.3064,  ..., 0.3224, 0.3199, 0.3199],
                    #          ...,
                    #          [0.3358, 0.3306, 0.3387,  ..., 0.2813, 0.2918, 0.2659],
                    #          [0.3160, 0.3101, 0.3320,  ..., 0.2966, 0.2971, 0.2622],
                    #          [0.3232, 0.3266, 0.3133,  ..., 0.2680, 0.3028, 0.2883]]]), {'boxes': tensor([[0.3932, 0.8464, 0.5208, 0.8776],
                    #         [0.2331, 0.2643, 0.3268, 0.3060],
                    #         [0.2435, 0.2995, 0.4062, 0.3724],
                    #         [0.7188, 0.6198, 0.8281, 0.6784],
                    #         [0.2279, 0.3229, 0.4154, 0.4128]]), 'labels': tensor([1, 1, 1, 1, 1])})
                    # el = (tensor([[[0.0880, 0.0855, 0.0718,  ..., 0.0976, 0.1902, 0.0165],
            if len(inputs) == 0:
                continue
            
           # inputs = inputs.to(device)
           # targets = targets.to(device)
            output = model(inputs,targets)  # NOTE: output is a dict with already computed losses within!

            """ EXAMPLE :
            {'loss_classifier': tensor(1.0206, grad_fn=<NllLossBackward0>),
             'loss_box_reg': tensor(0.0071, grad_fn=<DivBackward0>),
             'loss_objectness': tensor(1.8541), 'loss_rpn_box_reg': tensor(1.8591)} """
          
            loss = sum(loss for loss in output.values())
            #train_loss_list.append(loss.detach().cpu().numpy())
            loss.backward()
            optimizer.step()
            training_loss += loss.data.item() * len(inputs)
            
            del inputs
            del targets
            gc.collect()
            
            if i % 5 == 0:
                save_checkpoint(epoch, model, optimizer, training_loss/batch_cumsum)
        
        # VALIDATION
           
        model.train()
        num_correct = 0
        num_examples = 0
        valid_loss = 0

        for i,batch in enumerate(val_loader):
            print("batch", i)
            inputs = []
            targets = []

            for el in batch:       # el = (image,labels)
                if el[1]['boxes'].size()[0] != 0:
                    inputs.append(el[0][0])
                    targets.append(el[0][1])
            
            if len(inputs) == 0:
                continue
            # inputs = inputs.to(device)
            output = model(inputs, targets)
            # print(f"{output = }")
            # targets = targets.to(device)
            loss = output['loss_box_reg']

            valid_loss += loss.data.item() 

           # correct = torch.eq(torch.max(F.softmax(output, dim=1), dim=1)[1], targets).view(-1)
           # num_correct += torch.sum(correct).item()
           # num_examples += correct.shape[0]

            del inputs
            del targets
            gc.collect()

        valid_loss /= len(val_loader.dataset)
        print( 'Epoch: {}, Training Loss: {:.4f}, Validation Loss: {:.4f}, accuracy = {:.4f}'.format(epoch, training_loss,
        valid_loss, 1))

        logger.info('Epoch: {}, Training Loss: {:.4f}, Validation Loss: {:.4f}, accuracy = {:.4f}'.format(epoch, training_loss,
        valid_loss, 1))
        
        
# TEST

# from torchvision.utils import draw_bounding_boxes
# score_threshold = .5

In [61]:
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

In [62]:
model = model_rcnn.to(device)
torch.compile(model)
optimizer = optim.Adam(params = model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()

In [63]:
# START MODEL TRAINING
if not test_only:
    train(model, optimizer, torch.nn.CrossEntropyLoss(), train_loader, val_loader, epochs=num_epochs, device=device)
    torch.save(model.state_dict(), 'model_state_dict')

batch 0
Saved model
batch 1
batch 2
batch 3
batch 4
batch 5
Saved model
batch 6
batch 7
batch 8
batch 9
batch 10
Saved model
batch 11
batch 12
batch 13
batch 14
batch 15
Saved model
batch 16
batch 17
batch 18
batch 19
batch 20
Saved model
batch 21
batch 22
batch 23
batch 24
batch 25
Saved model
batch 26
batch 27
batch 28
batch 29
batch 30
Saved model
batch 31
batch 32
batch 33
batch 34
batch 35
Saved model
batch 36
batch 37
batch 38
batch 39
batch 40
Saved model
batch 41
batch 42
batch 43
batch 44
batch 45
Saved model
batch 46
batch 47
batch 48
batch 0
batch 1
batch 2
batch 3
batch 4
batch 5
batch 6
batch 7
batch 8
batch 9
batch 10
batch 11
batch 12
Epoch: 0, Training Loss: 1625.3812, Validation Loss: 0.0002, accuracy = 1.0000
batch 0
Saved model
batch 1
batch 2
batch 3
batch 4
batch 5
Saved model
batch 6
batch 7
batch 8
batch 9
batch 10
Saved model
batch 11
batch 12
batch 13
batch 14
batch 15
Saved model
batch 16
batch 17
batch 18
batch 19
batch 20
Saved model
batch 21
batch 22
batch 

In [68]:
# Per scaricare il contenuto di kaggle/working (e quindi recuperare i modelli)
# Crea lo zip della cartella che è stata creata contenente il modello e i log

if not test_only:
    from IPython.display import FileLink
    !zip -r file.zip {model_filepath}
    FileLink(r'file.zip')

updating: models/model_epochs10_noaug_id0/ (stored 0%)
updating: models/model_epochs10_noaug_id0/log_05-19_08.30.26.txt (deflated 82%)
updating: models/model_epochs10_noaug_id0/model.tar (deflated 7%)


In [65]:
def test(model, test_loader, device="cpu"):   
    model.eval()
    num_correct = 0
    num_examples = 0
    test_loss = 0
    
    for i,batch in enumerate(val_loader):
        print("batch", i)
        
        inputs = []
        targets = []
        
        for el in batch:       # el = (image,labels)
            if el[1]['boxes'].size()[0] != 0:
                inputs.append(el[0][0])
                targets.append(el[0][1])
        
        # if len(inputs) == 0:
            # continue
        
        # inputs = inputs.to(device)
        output = model(inputs)
        output = output[0]
        targets = targets[0]
        
        #     # Example output
        #     {'boxes': tensor([[ 0.3801,  0.3060,  3.5638,  3.0348],
        #     [ 0.2037,  0.6570,  1.9538,  4.9389],
        #     [ 0.4993,  0.7045,  5.1531,  5.5368],
        #     [ 0.7172,  0.0860,  8.0819,  3.2724],
        #     [ 0.3548,  1.4842,  3.9183,  9.8673],
        #     [ 0.9226,  0.4096, 11.7943,  6.0310]], grad_fn=<StackBackward0>),
        #     labels': tensor([1, 1, 1, 1, 1, 1]), 'scores': tensor([0.9762, 0.9498, 0.9188, 0.8941, 0.3722, 0.2909],
        #     grad_fn=<IndexBackward0>)},
        
        # loss = sum(output['scores'])
        # print(f"{output = }")
        # test_loss += loss * len(inputs)
        # print(f"{test_loss = }")

        # print(f"{output['scores'] = }")
        
        # res_softmax = torch.softmax(torch.FloatTensor(output['scores']), dim=-1)
        # print(f"{res_softmax = }")
        # print(f"{targets = }")
        
        # correct = torch.eq(torch.max(torch.softmax(torch.FloatTensor(output['scores']), dim=0), dim=0)[1], targets).view(-1)
        # num_correct += torch.sum(correct).item()
        # num_examples += correct.shape[0]
                                    
        
        
        del inputs
        del targets
        gc.collect()
        
    valid_loss /= len(test_loader.dataset)
    print( 'Test Loss: {:.4f}, accuracy = {:.4f}'.format(
    test_loss, 1))

In [66]:
# START MODEL TEST
# checkpoint = torch.load(os.path.join(model_filepath, "model.tar"))
# model.load_state_dict(checkpoint['model_state_dict'])
# test(model, test_loader)

In [67]:
# correct = 0
# total = 0
# with torch.no_grad():
#     for data in val_loader:
#         images, labels = data[0].to(device), data[1].to(device)
#         predictions = torch.argmax(model(images),dim=1)

#         total += labels.size(0)
#         correct += (predictions == labels).sum().item()

# print('accuracy = {:f}'.format(correct / total))
# print('correct: {:d}  total: {:d}'.format(correct, total))