In [90]:
import torch
import torch.nn as nn
import torchvision
import torch.nn.functional as F
from zipfile import ZipFile
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import time

In [2]:

class ImageDataset(Dataset):
    def __init__(self, csv_file, image_path, transform=None):
        df = pd.read_csv(csv_file)
        df.head()
        self.filename_list = os.listdir(image_path)
        self.file_list = [os.path.join(image_path, file) for file in self.filename_list]
        self.labels = df[df["filename"].isin(self.filename_list)]["vessel_type"].to_numpy()
        self.transform = transform

    def __getitem__(self, index):
        img = Image.open(self.file_list[index])
        if self.transform:
            img = self.transform(img)
        label = self.labels[index]
        return img, label

    def __len__(self):
        return len(self.labels)


In [3]:
csv_file = r"D:\Databases\Patch_Uint8\dataset.csv"
train_path = r"D:\Databases\Patch_Uint8\train"
test_path = r"D:\Databases\Patch_Uint8\test"

transform = transforms.Compose([
    transforms.ToTensor(),
])

train_vessel_dataset = ImageDataset(csv_file, train_path, transform)
test_vessel_dataset = ImageDataset(csv_file, test_path, transform)


train_dataloader = DataLoader(train_vessel_dataset, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_vessel_dataset, batch_size=64, shuffle=True)


In [85]:
import torchvision.models as models
model = models.vgg16(pretrained=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") #To use the GPU if available
model.features[0] = nn.Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
model.classifier[6].out_features = 10
model.to(device)

VGG(
  (features): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [77]:
# change the number of classes 

# freeze convolution weights
for param in model.features.parameters():
    param.requires_grad = False

for param in model.features[0].parameters():
    param.requires_grad = True


# optimizer
optimizer = torch.optim.SGD(model.classifier.parameters(), lr=0.001, momentum=0.9)
# loss function
criterion = nn.CrossEntropyLoss()

In [86]:
# validation function
def validate(model, test_dataloader):
    model.eval()
    val_running_loss = 0.0
    val_running_correct = 0
    for int, data in enumerate(test_dataloader):
        data, target = data[0].to(device), data[1].to(device)
        output = model(data)
        loss = criterion(output, target)
        
        val_running_loss += loss.item()
        _, preds = torch.max(output.data, 1)
        val_running_correct += (preds == target).sum().item()
    
    val_loss = val_running_loss/len(test_dataloader.dataset)
    val_accuracy = 100. * val_running_correct/len(test_dataloader.dataset)
    
    return val_loss, val_accuracy

# training function
def fit(model, train_dataloader):
    model.train()
    train_running_loss = 0.0
    train_running_correct = 0
    for i, data in enumerate(train_dataloader):
        data, target = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        train_running_loss += loss.item()
        _, preds = torch.max(output.data, 1)
        train_running_correct += (preds == target).sum().item()
        loss.backward()
        optimizer.step()
    train_loss = train_running_loss/len(train_dataloader.dataset)
    train_accuracy = 100. * train_running_correct/len(train_dataloader.dataset)
    print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.2f}')
    
    return train_loss, train_accuracy

In [87]:
train_loss , train_accuracy = [], []
val_loss , val_accuracy = [], []
start = time.time()
for epoch in range(10):
    train_epoch_loss, train_epoch_accuracy = fit(model, train_dataloader)
    val_epoch_loss, val_epoch_accuracy = validate(model, train_dataloader)
    train_loss.append(train_epoch_loss)
    train_accuracy.append(train_epoch_accuracy)
    val_loss.append(val_epoch_loss)
    val_accuracy.append(val_epoch_accuracy)
end = time.time()
print((end-start)/60, 'minutes')

KeyboardInterrupt: 

In [69]:
model = nn.Sequential()
model.add_module('conv1',nn.Conv2d(in_channels=1, out_channels=12,kernel_size=5, padding=2))
model.add_module('relu1', nn.ReLU())
model.add_module('batch1', nn.BatchNorm2d(12))

model.add_module('conv2',nn.Conv2d(in_channels=12, out_channels=12,kernel_size=5, padding=2))
model.add_module('relu2', nn.ReLU())
model.add_module('batch2', nn.BatchNorm2d(12))
model.add_module('pool', nn.MaxPool2d(kernel_size=2))

model.add_module('conv3',nn.Conv2d(in_channels=12, out_channels=24, kernel_size=5, padding=2))
model.add_module('relu3', nn.ReLU())
model.add_module('batch3', nn.BatchNorm2d(24))


"""
model.add_module('relu1', nn.ReLU())
model.add_module('pool1', nn.MaxPool2d(kernel_size=2))

model.add_module('conv2',nn.Conv2d(in_channels=32, out_channels=32,kernel_size=5, padding=2))
model.add_module('relu2', nn.ReLU())
model.add_module('pool2', nn.MaxPool2d(kernel_size=2))


model.add_module('conv3',nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2))
model.add_module('relu3', nn.ReLU())
model.add_module('pool3', nn.MaxPool2d(kernel_size=2))


model.add_module('flatten', nn.Flatten())

model.add_module('fc1', nn.Linear(30976, 1024))
model.add_module('relu3', nn.ReLU())
model.add_module('dropout', nn.Dropout(p=0.5))
model.add_module('fc2', nn.Linear(1024, 10))

"""

"\nmodel.add_module('relu1', nn.ReLU())\nmodel.add_module('pool1', nn.MaxPool2d(kernel_size=2))\n\nmodel.add_module('conv2',nn.Conv2d(in_channels=32, out_channels=32,kernel_size=5, padding=2))\nmodel.add_module('relu2', nn.ReLU())\nmodel.add_module('pool2', nn.MaxPool2d(kernel_size=2))\n\n\nmodel.add_module('conv3',nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2))\nmodel.add_module('relu3', nn.ReLU())\nmodel.add_module('pool3', nn.MaxPool2d(kernel_size=2))\n\n\nmodel.add_module('flatten', nn.Flatten())\n\nmodel.add_module('fc1', nn.Linear(30976, 1024))\nmodel.add_module('relu3', nn.ReLU())\nmodel.add_module('dropout', nn.Dropout(p=0.5))\nmodel.add_module('fc2', nn.Linear(1024, 10))\n\n"

In [71]:
x = torch.ones((1, 1, 180, 180))
model(x).shape

torch.Size([1, 24, 90, 90])

In [58]:
device = torch.device("cpu")
model = model.to(device) 

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

def train(model, num_epochs, train_dl, valid_dl):
    loss_hist_train = [0] * num_epochs
    accuracy_hist_train = [0] * num_epochs
    loss_hist_valid = [0] * num_epochs
    accuracy_hist_valid = [0] * num_epochs
    for epoch in range(num_epochs):
        model.train()
        for x_batch, y_batch in train_dl:
            x_batch = x_batch.to(device) 
            y_batch = y_batch.to(device) 
            pred = model(x_batch)
            loss = loss_fn(pred, y_batch)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            loss_hist_train[epoch] += loss.item()*y_batch.size(0)
            is_correct = (torch.argmax(pred, dim=1) == y_batch).float()
            accuracy_hist_train[epoch] += is_correct.sum().cpu()

        loss_hist_train[epoch] /= len(train_dl.dataset)
        accuracy_hist_train[epoch] /= len(train_dl.dataset)
        
        model.eval()
        with torch.no_grad():
            for x_batch, y_batch in valid_dl:
                x_batch = x_batch.to(device) 
                y_batch = y_batch.to(device) 
                pred = model(x_batch)
                loss = loss_fn(pred, y_batch)
                loss_hist_valid[epoch] += loss.item()*y_batch.size(0) 
                is_correct = (torch.argmax(pred, dim=1) == y_batch).float() 
                accuracy_hist_valid[epoch] += is_correct.sum().cpu()

        loss_hist_valid[epoch] /= len(valid_dl.dataset)
        accuracy_hist_valid[epoch] /= len(valid_dl.dataset)
        
        print(f'Epoch {epoch+1} accuracy: {accuracy_hist_train[epoch]:.4f} val_accuracy: {accuracy_hist_valid[epoch]:.4f}')
    return loss_hist_train, loss_hist_valid, accuracy_hist_train, accuracy_hist_valid

torch.manual_seed(1)
num_epochs = 20
hist = train(model, num_epochs, train_dataloader, test_dataloader)


Epoch 1 accuracy: 0.5638 val_accuracy: 0.5797
Epoch 2 accuracy: 0.5794 val_accuracy: 0.5797
Epoch 3 accuracy: 0.5796 val_accuracy: 0.5797
Epoch 4 accuracy: 0.5791 val_accuracy: 0.5797
Epoch 5 accuracy: 0.5797 val_accuracy: 0.5797
Epoch 6 accuracy: 0.5794 val_accuracy: 0.5797
Epoch 7 accuracy: 0.5797 val_accuracy: 0.5797
Epoch 8 accuracy: 0.5798 val_accuracy: 0.5797
Epoch 9 accuracy: 0.5797 val_accuracy: 0.5797
Epoch 10 accuracy: 0.5797 val_accuracy: 0.5797


KeyboardInterrupt: 

In [59]:
x_arr = np.arange(len(hist[0])) + 1

fig = plt.figure(figsize=(12, 4))
ax = fig.add_subplot(1, 2, 1)
ax.plot(x_arr, hist[0], '-o', label='Train loss')
ax.plot(x_arr, hist[1], '--<', label='Validation loss')
ax.set_xlabel('Epoch', size=15)
ax.set_ylabel('Loss', size=15)
ax.legend(fontsize=15)
ax = fig.add_subplot(1, 2, 2)
ax.plot(x_arr, hist[2], '-o', label='Train acc.')
ax.plot(x_arr, hist[3], '--<', label='Validation acc.')
ax.legend(fontsize=15)
ax.set_xlabel('Epoch', size=15)
ax.set_ylabel('Accuracy', size=15)

#plt.savefig('figures/14_13.png')
plt.show()

NameError: name 'hist' is not defined

torch.Size([1, 10])

3852


In [None]:
#Extract photos
directory = r"D:\Python\Databases\OpenSARShip2"
files = []

for file in os.listdir(directory):
        if file.endswith('.zip'):
            name = file.split(".")[0]
            files.append(name)


for file in files:
    file = os.path.join(directory, file)
    with ZipFile(file+".zip", 'r') as zip:
        list_files = zip.namelist()
        for zip_file in list_files[:]:
            if zip_file.split("/")[0] != "Patch_Uint8":
                list_files.remove(zip_file)

        for zip_file in list_files:
            zip.extract(zip_file, path=directory)

#Tiff to png

directory = r"D:\Python\Databases\OpenSARShip2\Patch_Uint8"
name = "Visual_Anti-pollution equipment_x20994_y11371_vh.tif"

for file in os.listdir(directory):
    name = file.split(".tif")[0]
    png = os.path.join(directory, name+".png")
    tif = os.path.join(directory, file)
    with Image.open(tif) as img:
        img.save(png)
        os.remove(tif)

#Save to .npz format
directory = r"D:\Python\Databases\OpenSARShip2\Patch_Uint8"

types = []
for file in os.listdir(directory):
    _, type, _=file.split("_", 2)
    types.append(type)

types = list(dict.fromkeys(types))

types = ["Cargo",
"Dredging or underwater ops",
"Fishing",
"High speed craft",
"Law Enforcement",
"Other Type",
"Passenger",
"Tanker",
"Towing",
"Tug"]

image = []
vessel_class = []

for file in os.listdir(directory):
    _, type, _=file.split("_", 2)
    file = os.path.join(directory, file)
    im = Image.open(file)
    single_array = np.array(im)
    try:
        number_class = types.index(type)
        image.append(single_array)
        vessel_class.append(number_class)
    except:
        os.remove(file)

names = os.listdir(directory)  
header = ["filename", "vessel_type"]

csv_file = directory = r"D:\Python\Databases\OpenSARShip2\Patch_Uint8\dataset.csv"

with open(csv_file, "w", encoding="UTF8") as f:
    writer = csv.writer(f)

    writer.writerow(header)
    for name, type in zip(names, vessel_class):
        row = (name, type)
        writer.writerow(row)


#np.savez("ship_dataset.npz", image=image, vessel_class=vessel_class, dtype=object) # save all in one file

#Create balanced train and test datasets

X = df["filename"]
y = df["vessel_type"]

from sklearn.model_selection import StratifiedShuffleSplit
sss=StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=9)


for train_index, test_index in sss.split(X, y):
    train_index= train_index
    test_index = test_index
    

init_directory = r"D:\Python\Databases\Patch_Uint8"
final_directory = r"D:\Python\Databases\Patch_Uint8\test"

for index in test_index:
    name = df["filename"][index]

    init_filename = os.path.join(init_directory, name)
    final_filename =  os.path.join(final_directory, name)
    os.rename(init_filename, final_filename )

    #Resize images
def resize_image(filename):
    new_size = 180
    delta= int(new_size/2)
    with Image.open(filename) as img:
        img.load()

    width, height = img.size
    
    if height>new_size:
        new_img = img.crop((width/2 - delta, height/2 - delta, width/2 + delta, height/2 + delta))
        new_img.save(filename)  
    elif height<new_size:

        new_img = Image.new(img.mode, (new_size, new_size))

        new_img.paste(img, (delta - int(width/2), delta - int(height/2)))
        new_img.save(filename)

from PIL import Image
init_directory = r"D:\Python\Databases\Patch_Uint8\test"

files = os.listdir(init_directory)


for file in files:

    filename = os.path.join(init_directory, file)

    resize_image(filename)

#Read npz database
data = np.load(r"D:\Python\Databases\ship_dataset\ship_dataset.npz", allow_pickle=True) 
files = data.files 

print('Shape - "Image" item: ' + str(data['image'].shape))
print('Shape - "Label" item: ' + str(data['vessel_class'].shape))

index = 3454
plt.imshow(data['image'][index], cmap='Greys')
print('Ship class: ' + str(data['vessel_class'][index]))

for i in range(10):
    veces = np.count_nonzero(data['vessel_class'] == i)
    print(f"La clase {i} ha aparecido {veces} veces")

#Clean data
directory = "D:\Python\Databases\ship_dataset_reduced"

files = os.listdir(directory)
for file in files:
        if file.endswith('.jpg'):
            files.remove(file)

for file in files:
    filename = os.path.join(directory, file)
    with open(filename, 'r') as fp:
        if len(fp.readlines())>1:
            print(file)

for file in files:
    name = file.split(".")[0]
    txt = os.path.join(directory, name+".txt")
    picture = os.path.join(directory, name+".jpg")
    os.remove(txt)
    os.remove(picture)

directory = "D:\Python\Databases\ship_dataset_reduced"
files = []
for file in os.listdir(directory):
        if file.endswith('.jpg'):
            name = file.split(".")[0]
            files.append(name)

train_folder = os.path.join(directory, "train")
test_folder = os.path.join(directory, "test")

os.mkdir(train_folder)
os.mkdir(test_folder)

for file in os.listdir(directory):
    if file.endswith('.jpg'):
        name = file.split(".")[0]
        files.append(name)

vessel_classes = []
normalized_ship_widths = []
normalized_ship_lenghts = [] 

for file in files:
    txt = os.path.join(directory, file)+".txt"
    with open(txt, 'r') as fp:
        data = fp.readline().strip().split(" ")
        vessel_classes.append(data[0])
        normalized_ship_widths.append(data[3])
        normalized_ship_lenghts.append(data[4])