In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
cd "/content/drive/MyDrive/deep"

In [None]:
import os 
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.image as mpimg
import torch
import torchvision.models as models
import torch.nn as nn
from torch.utils.data.sampler import SubsetRandomSampler
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader 
import torchvision
import torchvision.transforms as transforms
from torchsummary import summary
from PIL import Image
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

#Helper Function

In [None]:
if not os.path.exists("./models"):
    os.mkdir("models")

In [None]:
def train(net, trainloader, optimizer, scheduler, num_epochs, start_epoch=0):
  
    # variables
    best_val_loss = np.inf
    patience = 15
    saturate_count = 0
    loss_stats = {
        'train': [],
        "val": []
    }
    if torch.cuda.is_available():
      net.cuda()

    # train the network
    for e in range(start_epoch, num_epochs): 
        # set to training mode
        net.train()   
        running_loss = 0.0
        running_count = 0.0
        for i, (inputs, labels) in enumerate(trainloader):
            # Clear all the gradient to 0
            optimizer.zero_grad()
            # transfer data to GPU
            if torch.cuda.is_available():
                inputs = inputs.cuda()
                labels = labels.cuda()
            # forward propagation
            outs = net(inputs)
            # compute loss 
            loss = criterion(outs, labels.float())
            # backpropagation to get dw
            loss.backward()
            # update the parameters
            optimizer.step()
            # get the loss
            running_loss += loss.item()
            running_count += 1
        # compute the averaged loss in each epoch
        train_loss = running_loss / running_count
        running_loss = 0. 
        running_count = 0.
        # track train loss
        loss_stats['train'].append(train_loss)      
        # Update the scheduler's counter
        scheduler.step()

        # set to evaluation mode
        net.eval()
        for i, (inputs, labels) in enumerate(valloader):
            # transfer data to GPU
            if torch.cuda.is_available():
                inputs = inputs.cuda()
                labels = labels.cuda()
            # forward propagation
            outs = net(inputs)
            # compute loss 
            loss = criterion(outs, labels.float())
            # get the loss
            running_loss += loss.item()
            running_count += 1
         # compute the averaged loss in each epoch
        val_loss = running_loss / running_count
        running_loss = 0. 
        running_count = 0. 
        # track validation loss
        loss_stats['val'].append(val_loss)

        print(f'Epoch {e+1:2d}/{num_epochs:d} : train_loss = {train_loss:.4f}, val_loss = {val_loss:.4f}') 

        # stop once it converge
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            saturate_count = 0
            # saving the best model 
            checkpoint_file = './models/saved_params.pt'
            torch.save({
              'epoch': e,
              'train_loss': train_loss,
              'val_loss': val_loss,
              'model_state_dict': net.state_dict(),
              'optimizer_state_dict': optimizer.state_dict(),
              'scheduler_state_dict': scheduler.state_dict()
              }, checkpoint_file)
        else:
            saturate_count += 1
            if saturate_count >= patience:
                print('Early stopping!')
                return
    return loss_stats

In [None]:
def evaluate(net, testloader):
    
    accuracy_stats = []

    # set to evaluation mode
    net.eval() 
    # running_correct
    running_corrects = 0
    running_count = 0
    # Repeat for all batch data in the test set
    for inputs, targets in testloader:
        # transfer to the GPU
        if torch.cuda.is_available():
            inputs = inputs.cuda()
            targets = targets.cuda()
	
        # disable gradient computation
        with torch.no_grad():
            # perform inference
            outputs = net(inputs)
            # predict as the best result  
            _, predicted = torch.max(outputs,1)
            _, actual = torch.max(targets,1)
            running_corrects += (actual==predicted).double().sum()

            accuracy = 100*running_corrects/len(testloader.dataset)
            accuracy_stats.append(accuracy.item())

    print('Accuracy = {:.2f}%'.format(accuracy))
  
    return accuracy_stats

# Prepare the dataset (.csv) at file level
Use the 6252 images as our dataset<br>
Dataset-->Train(0.8),Test(0.2)<br>
Train-->Train(0.9),Validation(0.1)<br>


In [None]:
df = pd.read_csv('./train/train.csv')

In [None]:
train_size=0.8
split_csv=round(len(df)*train_size)
print(split_csv)

In [None]:
train_file=df[:split_csv]
test_file=df[split_csv:]
train_file.to_csv('./train/train_file.csv', index=False)
test_file.to_csv('./train/test_file.csv', index=False)

In [None]:
df_train=pd.read_csv('./train/train_file.csv')
df_test=pd.read_csv('./train/test_file.csv')

In [None]:
df_train.head()

#Data Visualization

In [None]:
print('Number of training set:', len(df_train))
print('Number of test sample:', len(df_test))

In [None]:
ship =['Cargo', 'Military', 'Carrier', 'Cruise', 'Tankers']

# display count of ship types
fig = sns.countplot(x=train_file['category'].values)
fig.set_title('Count of each ship type')
fig.set_xlabel('Category')
fig.set_xticklabels(ship)
plt.show()

In [None]:
# display image
idx=4

image_name, category= train_file.iloc[idx]
imgFile = mpimg.imread('./train/images/{}'.format(image_name))
plt.imshow(imgFile)
print('Category =',category , ' Ship =', ship[category-1])

# Create dataset class

In [None]:
class ShipDataset(Dataset):

  def __init__(self, filename, transform=None, imgFolder='./train/images'):
    CSVfile = pd.read_csv(filename)
    self.data = CSVfile['image'].tolist() # get the image name
    self.transform = transform
    self.imgFolder = imgFolder
    ohe = OneHotEncoder(dtype='int8', sparse=False)
    self.y = ohe.fit_transform(CSVfile['category'].values.reshape(-1,1)) # encode category

  def __len__(self):
    return len(self.data)

  def __getitem__(self, idx):
    # get the image
    path = os.path.join(self.imgFolder, self.data[idx])
    image = Image.open(path).convert('RGB')

    # perform transformation
    if self.transform is not None:
      image = self.transform(image)

    # get the label
    label = self.y[idx]

    # return sample
    return image, label

In [None]:
# Augmentations
train_transform = transforms.Compose([
  transforms.Resize((256,256)),
  transforms.RandomCrop(224),
  transforms.RandomHorizontalFlip(),
  transforms.ToTensor(),
  transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

test_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [None]:
# 90-10 train-validation split
tr, val = train_test_split(train_file.category, stratify=train_file.category, test_size=0.1, random_state=42)
train_sampler = SubsetRandomSampler(list(tr.index)) 
valid_sampler = SubsetRandomSampler(list(val.index))

In [None]:
# dataloader
BS = 49

trainset = ShipDataset('./train/train_file.csv', transform=train_transform)
trainloader = DataLoader(trainset, batch_size=BS,sampler=train_sampler,num_workers=2)

valset = ShipDataset('./train/train_file.csv', transform=test_transform)
valloader =DataLoader(valset, batch_size=BS,sampler=valid_sampler,num_workers=2)

In [None]:
x, y = next(iter(trainloader))

In [None]:
print(x.shape)
print(y.shape)

# Train Model

Efficient Net B0

In [None]:
efficientNet = models.efficientnet_b0(pretrained=True)

In [None]:
efficientNet

In [None]:
for name,param in efficientNet.named_parameters():
  print(name,param.requires_grad)

In [None]:
in_c = efficientNet.classifier[1].in_features  
efficientNet.classifier[1] = nn.Sequential(
    nn.Linear(in_c, 5),
    nn.Softmax(dim = 1)
)

In [None]:
 # freeze the layers
freeze_layers = ["features.6", "features.7", "features.8", "fc"]

for name, param in efficientNet.named_parameters():
  if any([name.startswith(layer) for layer in freeze_layers]):
    param.requires_grad = False 

In [None]:
# set up criterion, optimizer, scheduler
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(efficientNet.parameters(), lr=0.01, momentum=0.9)
scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

In [None]:
efficient_train_loss = train(efficientNet, trainloader, optimizer, scheduler, num_epochs=15)

Resnet 152

In [None]:
resnet152 = models.resnet152(pretrained= True)

In [None]:
resnet152

In [None]:
for name,param in resnet152.named_parameters():
  print(name,param.requires_grad)

In [None]:
in_c = resnet152.fc.in_features  
resnet152.fc = nn.Sequential(
    nn.Linear(in_c, 5),
    nn.Softmax(dim = 1)
)

In [None]:
# set up criterion, optimizer, scheduler
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(resnet152.parameters(), lr=0.01, momentum=0.9)
scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

In [None]:
resnet_train_loss = train(resnet152, trainloader, optimizer, scheduler, num_epochs=15)

#Resume Training (if needed)

In [None]:
# # load the checkpoint file
# checkpoint = torch.load('./models/saved_params.pt')
# model_conv.load_state_dict(checkpoint['model_state_dict'])
# optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
# scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
# previous_epoch = checkpoint['epoch']
# previous_train_loss = checkpoint['train_loss']
# previous_val_loss = checkpoint['val_loss']

# # resume training
# print(f'Resuming previous epoch. Last run epoch: {previous_epoch+1}, train loss: {previous_train_loss:.4f}, validation loss: {previous_val_loss:.4f}')
# train (model_conv, trainloader, optimizer, scheduler, num_epochs=30, start_epoch=previous_epoch+1)

#Load Model (if needed)

In [None]:
# model_conv = torch.load("./models/saved_model.pt")

# Evaluation

In [None]:
# dataloader
testset = ShipDataset('./train/test_file.csv', transform=test_transform)
testloader = DataLoader(testset, batch_size=BS,num_workers=2)

In [None]:
efficient_accuracy = evaluate(efficientNet, testloader)

In [None]:
resnet152_accuracy = evaluate(resnet152, testloader)

In [None]:
# save your best model <3
# torch.save(efficientNet, "./models/saved_model_efficientNet.pt")

In [None]:
# Train Loss Graph

# plt.plot(efficient_train_loss["train"], label='Efficient Net')
plt.plot(resnet_train_loss["train"], label='Resnet152')
plt.legend()
plt.show()

In [None]:
plt.plot(efficient_train_loss["val"], label='Efficient Net')
plt.plot(resnet_train_loss["val"], label='Resnet152')
plt.legend()
plt.show()

In [None]:
plt.plot(efficient_accuracy, label='Efficient Net')
plt.plot(resnet152_accuracy, label='Resnet152')
plt.legend()
plt.show()