In [1]:
from functions import *

# in order to get reproducable results
torch.manual_seed(1)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(1)
random.seed(1)

import cmapy
from pytorchtools import EarlyStopping
os.environ["CUDA_VISIBLE_DEVICES"] = "3"  # Only GPU 3 is visible to this code
time1 = time.time()

data_folder = "/data/anyu/thebeData"
data_path = "{}/processedThebe".format(data_folder)

best_model_fpath = 'unet_96_48_seed_test.model'
best_iou_threshold=0.5
epoches = 100
patience = 20

modelNo = "unet"
if modelNo == "unet":
    from model_zoo.UNET import Unet
    model = Unet()
    print("use model Unet")
elif modelNo == "deeplab":
    from model_zoo.DEEPLAB.deeplab import DeepLab
    model = DeepLab(backbone='mobilenet', num_classes=1, output_stride=16)
    print("use model DeepLab")
elif modelNo == "hed":
    from model_zoo.HED import HED
    model = HED()
    print("use model HED")
elif modelNo == "rcf":
    from model_zoo.RCF import RCF
    model = RCF()
    print("use model RCF")
else:
    print("please select a valid model")
model.cuda();
summary(model, (1, 96, 96))

use model Unet
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 96, 96]             320
       BatchNorm2d-2           [-1, 32, 96, 96]              64
              ReLU-3           [-1, 32, 96, 96]               0
         Dropout2d-4           [-1, 32, 96, 96]               0
            Conv2d-5           [-1, 32, 96, 96]           9,248
       BatchNorm2d-6           [-1, 32, 96, 96]              64
              ReLU-7           [-1, 32, 96, 96]               0
         Dropout2d-8           [-1, 32, 96, 96]               0
       double_conv-9           [-1, 32, 96, 96]               0
        MaxPool2d-10           [-1, 32, 48, 48]               0
           Conv2d-11           [-1, 64, 48, 48]          18,496
      BatchNorm2d-12           [-1, 64, 48, 48]             128
             ReLU-13           [-1, 64, 48, 48]               0
        Dropout2d-14    

In [2]:
import torch.utils.data
from os.path import splitext
from os import listdir
from glob import glob
class faultsDataset(torch.utils.data.Dataset):
#     def __init__(self, imgs_dir, masks_dir, scale=1, mask_suffix=''):

    def __init__(self, imgs_dir, masks_dir):
#         self.train = train
        self.images_dir = imgs_dir
        self.masks_dir = masks_dir
        self.ids = [splitext(file)[0] for file in listdir(imgs_dir) if not file.startswith('.')]

    def __len__(self):
        return len(self.ids)
    
    def __getitem__(self, i):
        idx = self.ids[i]
        mask = np.load("{}/{}.npy".format(self.masks_dir,idx))
        img = np.load("{}/{}.npy".format(self.images_dir,idx))
#         mask_file = glob(self.masks_dir + idx + '.npy')
#         img_file = glob(self.images_dir + idx + '.npy')

#         assert len(mask_file) == 1, \
#             f'Either no mask or multiple masks found for the ID {idx}: {mask_file}'
#         assert len(img_file) == 1, \
#             f'Either no image or multiple images found for the ID {idx}: {img_file}'
#         mask = np.load(mask_file[0])
#         img = np.load(img_file[0])

        assert img.size == mask.size, \
            f'Image and mask {idx} should be the same size, but are {img.size} and {mask.size}'

        img = np.expand_dims(img, axis=0)
        mask = np.expand_dims(mask, axis=0)

        return (img, mask)


# In[32]:


faults_dataset_train = faultsDataset(imgs_dir = "{}/train/seismic".format(data_path), masks_dir= "{}/train/annotation".format(data_path))
faults_dataset_val = faultsDataset(imgs_dir = "{}/val/seismic".format(data_path), masks_dir= "{}/val/annotation".format(data_path))

batch_size = 64 

train_loader = torch.utils.data.DataLoader(dataset=faults_dataset_train, 
                                           batch_size=batch_size, 
                                           shuffle=True)

val_loader = torch.utils.data.DataLoader(dataset=faults_dataset_val, 
                                           batch_size=batch_size, 
                                           shuffle=False)

In [3]:
optimizer = torch.optim.SGD(model.parameters(), lr=1e-6, momentum=0.9, weight_decay=0.0002)
if modelNo == "hed" or modelNo == "rcf":
    print("optimizer = torch.optim.SGD(model.parameters(), lr=1e-6, momentum=0.9, weight_decay=0.0002)")
if modelNo == "unet" or modelNo == "deeplab":
    print("optimizer = torch.optim.Adam(model.parameters(), lr=0.01)")
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min',factor=0.1, patience=5, verbose=True)

optimizer = torch.optim.Adam(model.parameters(), lr=0.01)


In [None]:
bceloss = nn.BCELoss()
mean_train_losses = []
mean_val_losses = []
mean_train_accuracies = []
mean_val_accuracies = []
t_start = time.time()
early_stopping = EarlyStopping(patience=patience, verbose=True, delta = 0)
for epoch in range(epoches):                  
    train_losses = []
    val_losses = []
    train_accuracies = []
    val_accuracies = []
    labelled_val_accuracies = []

    model.train()
    for images, masks in train_loader: 
        torch.cuda.empty_cache()
        images = Variable(images.cuda())
        masks = Variable(masks.cuda())
        outputs = model(images)
        
        loss = torch.zeros(1).cuda()
        y_preds = outputs
        if modelNo == "unet" or modelNo == "deeplab":
            loss = bceloss(outputs, masks) 
        elif modelNo == "hed":
            for o in range(5):
                loss = loss + cross_entropy_loss_HED(outputs[o], masks)
            loss = loss + bceloss(outputs[-1],masks)
            y_preds = outputs[-1]
        elif modelNo == "rcf":
            for o in outputs:
                loss = loss + cross_entropy_loss_RCF(o, masks)
            y_preds = outputs[-1]
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        train_losses.append(loss.data)
        predicted_mask = y_preds > best_iou_threshold
        train_acc = iou_pytorch(predicted_mask.squeeze(1).byte(), masks.squeeze(1).byte())
        train_accuracies.append(train_acc.mean())        

    model.eval()
    for images, masks in val_loader:
        torch.cuda.empty_cache()
        images = Variable(images.cuda())
        masks = Variable(masks.cuda())
        outputs = model(images)
        
        loss = torch.zeros(1).cuda()
        y_preds = outputs
        if modelNo == "unet" or modelNo == "deeplab":
            loss = bceloss(outputs, masks) 
        elif modelNo == "hed":
            for o in range(5):
                loss = loss + cross_entropy_loss_HED(outputs[o], masks)
            loss = loss + bceloss(outputs[-1],masks)
            y_preds = outputs[-1]
        elif modelNo == "rcf":
            for o in outputs:
                loss = loss + cross_entropy_loss_RCF(o, masks)
            y_preds = outputs[-1]
        val_losses.append(loss.data)
        predicted_mask = y_preds > best_iou_threshold
        val_acc = iou_pytorch(predicted_mask.byte(), masks.squeeze(1).byte())
        val_accuracies.append(val_acc.mean())

        
    mean_train_losses.append(torch.mean(torch.stack(train_losses)))
    mean_val_losses.append(torch.mean(torch.stack(val_losses)))
    mean_train_accuracies.append(torch.mean(torch.stack(train_accuracies)))
    mean_val_accuracies.append(torch.mean(torch.stack(val_accuracies)))
    
    scheduler.step(torch.mean(torch.stack(val_losses)))    
    early_stopping(torch.mean(torch.stack(val_losses)), model, best_model_fpath)
    


    if early_stopping.early_stop:
        print("Early stopping")
        break
        

    torch.cuda.empty_cache()
    
    for param_group in optimizer.param_groups:
        learningRate = param_group['lr']
    
    
    # Print Epoch results
    t_end = time.time()

    print('Epoch: {}. Train Loss: {}. Val Loss: {}. Train IoU: {}. Val IoU: {}. Time: {}. LR: {}'
          .format(epoch+1, torch.mean(torch.stack(train_losses)), torch.mean(torch.stack(val_losses)), torch.mean(torch.stack(train_accuracies)), torch.mean(torch.stack(val_accuracies)), t_end-t_start, learningRate))
    
    t_start = time.time()

Validation loss decreased (inf --> 0.235574).  Saving model ...
Epoch: 1. Train Loss: 0.24820204079151154. Val Loss: 0.23557445406913757. Train IoU: 0.20784807205200195. Val IoU: 0.24321377277374268. Time: 892.1702287197113. LR: 0.01


In [None]:
mean_train_losses = np.asarray(torch.stack(mean_train_losses).cpu())
mean_val_losses = np.asarray(torch.stack(mean_val_losses).cpu())
mean_train_accuracies = np.asarray(torch.stack(mean_train_accuracies).cpu())
mean_val_accuracies = np.asarray(torch.stack(mean_val_accuracies).cpu())

fig = plt.figure(figsize=(10,5))
plt.subplot(1, 2, 1)
train_loss_series = pd.Series(mean_train_losses)
val_loss_series = pd.Series(mean_val_losses)
train_loss_series.plot(label="train_loss")
val_loss_series.plot(label="validation_loss")
plt.legend()
plt.subplot(1, 2, 2)
train_acc_series = pd.Series(mean_train_accuracies)
val_acc_series = pd.Series(mean_val_accuracies)
train_acc_series.plot(label="train_acc")
val_acc_series.plot(label="validation_acc")
plt.legend()
plt.savefig('{}_loss_acc.png'.format(best_model_fpath))

totaltime = time.time()-time1
print("total cost {} hours".format(totaltime/3600))