In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [None]:
import pandas as pd
import numpy as np
import os
from tqdm import tqdm

import matplotlib.image as mpimg
import json

from skimage.color import rgb2gray
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA


import matplotlib.pyplot as plt


dataFolder = "/kaggle/input/ships-in-satellite-imagery/shipsnet/shipsnet/"


with open('/kaggle/input//ships-in-satellite-imagery/shipsnet.json') as data_file:
    data = json.load(data_file)
labelDf = pd.DataFrame(data)


imgFiles = os.listdir(dataFolder)

In [None]:
import torch
import time
import copy
import torchvision
import torchvision.transforms as transforms
import torch
import torch.nn as nn
import torch.nn.functional as func
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
from torchvision.transforms import ToTensor,Resize

from torchvision import models

from torch.optim import lr_scheduler

from PIL import Image

In [None]:
#Defining the data augmentation steps
transform = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [None]:
#Defining the PyTorch Dataset for retrieving the ships data
class ShipDataset(torch.utils.data.Dataset):
  def __init__(self, imgFiles, transform, index=None):
    
    
    self.imgFiles = imgFiles
    self.transform = transform

    
    
  def __len__(self):
    
    return len(self.imgFiles)
    
  def __getitem__(self, index):
    
    filename = self.imgFiles[index]
    
    img =  Image.open(os.path.join(dataFolder, filename)).convert('RGB')
    
    
    label = int(filename.split("_")[0])
    
    
    if self.transform is not None:
        img = self.transform(img)
        
    img = img/255.0
      
#     label = torch.FloatTensor([label])

    return img, label

In [None]:
device='cuda'

#Splitting data for train/validation
trainDataset = ShipDataset(imgFiles[:3500], transform)
testDataset = ShipDataset(imgFiles[3500:], transform)

#Defining dataloaders
trainLoader = torch.utils.data.DataLoader(trainDataset, batch_size=30, 
                                          shuffle=True, num_workers=2)


testLoader = torch.utils.data.DataLoader(testDataset, batch_size=40, 
                                          shuffle=False, num_workers=2)


#Define the loss functions
criteria = nn.CrossEntropyLoss()



In [None]:
image_datasets = {'train':trainDataset, 'val':testDataset}

dataloaders = {"train":trainLoader, "val":testLoader}

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}


In [None]:
#Defining model architecture. Transfer Learning using Resnet34
net = models.resnet34(pretrained=True)
net.fc = nn.Linear(512,2)


net.to(device)

criterion = nn.CrossEntropyLoss()

#Defining Learning Rate scheduler
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.1)

In [None]:
def freezeLayers(net):
    
    ## Freeze all layers
    for child in net.children():
        for param in child.parameters():
            param.requires_grad = False

    ## Unfreezing the last FC layer        
    for param in list(net.children())[-1].parameters():
        param.requires_grad = True
        
    return net
    
    
def unfreezeLayers(net):
    
    ## Freeze all layers
    for child in net.children():
        for param in child.parameters():
            param.requires_grad = True
            
    return net

In [None]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, epoch_acc

In [None]:
#Freezing the backbone and training
net = freezeLayers(net)
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, net.parameters()), 1e-3)

train_model(net,criterion, optimizer, exp_lr_scheduler, num_epochs = 6)

#Training the whole network
net = unfreezeLayers(net)
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, net.parameters()), 1e-4)

train_model(net,criterion, optimizer, exp_lr_scheduler, num_epochs = 6)

In [None]:
def runTest(net, criterion, optimizer,exp_lr_scheduler ):
    
    net = freezeLayers(net)
    model, acc = train_model(net,criterion, optimizer, exp_lr_scheduler, num_epochs = 6)
    
    net = unfreezeLayers(net)
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, net.parameters()), 1e-4)

    model, acc = train_model(net,criterion, optimizer, exp_lr_scheduler, num_epochs = 6)
    
    print(f"Best Accuracy : {acc}")
    
    return acc

In [None]:
#Performing 10-Fold Cross Validation
imgFiles = np.array(imgFiles)

accList = []


for i in tqdm(range(10)):
    
    test = list(range(i*setSize, (i+1)*setSize))
    train = list(set(list(range(4000))).difference(set(test)))
    
    trainDataset = ShipDataset(imgFiles[train], transform)
    testDataset = ShipDataset(imgFiles[test], transform)


    trainLoader = torch.utils.data.DataLoader(trainDataset, batch_size=30, 
                                              shuffle=True, num_workers=2)


    testLoader = torch.utils.data.DataLoader(testDataset, batch_size=40, 
                                              shuffle=False, num_workers=2)
    
    
    image_datasets = {'train':trainDataset, 'val':testDataset}
    dataloaders = {"train":trainLoader, "val":testLoader}
    dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
    
    
    net = models.resnet34(pretrained=True)
    net.fc = nn.Linear(512,2)
    net.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, net.parameters()), 1e-3)
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.1)
    
    
    acc = runTest(net, criterion, optimizer,exp_lr_scheduler)
    accList.append(acc)

    
    
    

In [None]:
accList

In [None]:
np.mean([x.cpu().numpy() for x in accList])