In [2]:
from __future__ import print_function, division
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, sampler

from torchvision import transforms, utils, models
from torch.optim import lr_scheduler

import numpy as np
from skimage import io, transform
import pandas as pd
import matplotlib.pyplot as plt

import time
import copy

from torch.nn.functional import interpolate

# import encoding

In [3]:
# Set up hyperparameters and train settings
IN_KERNEL = os.environ.get('KAGGLE_WORKING_DIR') is not None
MIN_SAMPLES_PER_CLASS = 50
BATCH_SIZE = 512
LEARNING_RATE = 1e-3
LR_STEP = 3
LR_FACTOR = 0.5
# NUM_WORKERS = multiprocessing.cpu_count()
MAX_STEPS_PER_EPOCH = 15000
NUM_EPOCHS = 2 ** 32
LOG_FREQ = 500
NUM_TOP_PREDICTS = 20
TIME_LIMIT = 9 * 60 * 60

dtype = torch.cuda.FloatTensor
device = torch.device("cuda:0")

In [3]:
# Loading Train Dataset

class LandmarksDataset(Dataset):
    """Landmarks dataset."""

    def __init__(self, csv_file, root_dir, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.landmarks_frame = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.landmarks_frame)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir,
                                self.landmarks_frame.iloc[idx, 0]) #INCORRECT, NEEDS TO FIT TRAIN FOLDER NAMING
        img_name += '.jpg'
        try:
            image = io.imread(img_name)
        except:
            return None
        landmarks = self.landmarks_frame.iloc[idx, 2]
        sample = {'image': image, 'class': landmarks}

        if self.transform:
            sample = self.transform(sample)

        return sample
    

class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, sample):
        image, landmarks = sample['image'], sample['class']

        # swap color axis because
        # numpy image: H x W x C
        # torch image: C X H X W
        image = image.transpose((2, 0, 1))
        return {'image': torch.from_numpy(image),
                'landmarks': torch.from_numpy(np.array(landmarks))}
    

landmarks_dataset = LandmarksDataset(csv_file = 'train.csv',
                                     root_dir = 'train/',
                                     transform = ToTensor()
                                     )

#Confirm shapes and labels are of correct form
for i in range(len(landmarks_dataset)):
    print(i)
    sample = landmarks_dataset[i]
    if sample:
        print(sample['image'].size(), sample['landmarks'])

    if i == 3:
        break
        
class ChunkSampler(sampler.Sampler):
    """Samples elements sequentially from some offset. 
    Arguments:
        num_samples: # of desired datapoints
        start: offset where we should start selecting from
    """
    def __init__(self, num_samples, start=0):
        self.num_samples = num_samples
        self.start = start

    def __iter__(self):
        return iter(range(self.start, self.start + self.num_samples))

    def __len__(self):
        return self.num_samples
        
dataloaders = DataLoader(landmarks_dataset, batch_size=4,
                        shuffle=True, num_workers=4)

0
torch.Size([3, 224, 224]) tensor(142820)
1
torch.Size([3, 224, 224]) tensor(104169)
2
torch.Size([3, 224, 224]) tensor(37914)
3
torch.Size([3, 224, 224]) tensor(102140)


In [None]:
# Import pre-trained FCN ResNet-50 model

#FCN_model = models.segmentation.fcn_resnet50(pretrained=True, 
#                                             progress=True, 
#                                             num_classes=200000)

# FCN_model = encoding.models.get_model('FCN_ResNet50_ADE', pretrained=True)

resnet_50 = models.segmentation.resnet50(pretrained=True,
                                         progress=True,
                                         num_classes=200000)

# print(list(FCN_model.children()))

# print(len(list(list(FCN_model.children())[0])))

for param in FCN_model.parameters():
    param.requires_grad = False

FCN_model.fc = nn.Linear(1024, 2)

#FCN_model = FCN_model.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that only parameters of final layer are being optimized as
# opposed to before.
optimizer_FCN = optim.SGD(FCN_model.fc.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_FCN, step_size=7, gamma=0.1)

# print(newmodel)


In [None]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                scheduler.step()
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

In [5]:
# FCN_model = train_model(FCN_model, criterion, optimizer_FCN, exp_lr_scheduler,
#                        num_epochs=20)

class Flatten(nn.Module):
    def forward(self, x):
        N, C, H, W = x.size() # read in N, C, H, W
        return x.view(N, -1)  # "flatten" the C * H * W values into a single vector per image
    

#https://github.com/nashory/DeLF-pytorch/blob/master/train/layers.py
    
class WeightedSum2d(nn.Module):
    def __init__(self):
        super(WeightedSum2d, self).__init__()
    def forward(self, x):
        x, weights = x
        assert x.size(2) == weights.size(2) and x.size(3) == weights.size(3),\
                'err: h, w of tensors x({}) and weights({}) must be the same.'\
                .format(x.size, weights.size)
        y = x * weights                                       # element-wise multiplication
        y = y.view(-1, x.size(1), x.size(2) * x.size(3))      # b x c x hw
        return torch.sum(y, dim=2).view(-1, x.size(1), 1, 1)  # b x c x 1 x 1
    def __repr__(self):
        return self.__class__.__name__


class SpatialAttention2d(nn.Module):
    '''
    SpatialAttention2d
    2-layer 1x1 conv network with softplus activation.
    <!!!> attention score normalization will be added for experiment.
    '''
    def __init__(self, in_c, act_fn='relu'):
        super(SpatialAttention2d, self).__init__()
        self.conv1 = nn.Conv2d(in_c, 512, 1, 1)                 # 1x1 conv
        if act_fn.lower() in ['relu']:
            self.act1 = nn.ReLU()
        elif act_fn.lower() in ['leakyrelu', 'leaky', 'leaky_relu']:
            self.act1 = nn.LeakyReLU()
        self.conv2 = nn.Conv2d(512, 1, 1, 1)                    # 1x1 conv
        self.softplus = nn.Softplus(beta=1, threshold=20)       # use default setting.
        
    def forward(self, x):
        '''
        x : spatial feature map. (b x c x w x h)
        s : softplus attention score 
        '''
        x = self.conv1(x)
        x = self.act1(x)
        x = self.conv2(x)
        x = self.softplus(x)
        return x
    
    def __repr__(self):
        return self.__class__.__name__

        
class Attention(nn.Module):
    def __init__(self, in_c, act_fn='relu'):
        super(Attention, self).__init__()
        self.SpatialAttention = nn.SpacialAttention2d(in_c)
        self.WeightedSum = nn.WeightedSum2d()
    def forward(self, x):
        weights = self.SpatialAttention(x)
        a = F.normalize(x, p=2, dim=1)
        return self.WeightedSum2d((a, weights))
        

In [None]:
model = nn.Sequential(
    nn.Conv2d(in_channels=3, out_channels=1, kernel_size=5, stride=1, padding=2),
    nn.ReLU(),
    nn.Attention(),
    Flatten(),
    nn.ReLU(),
    nn.Linear(),
    nn.ReLU(),
    nn.Linear(),
)

In [None]:
# Prediction
a = torch.Tensor(range(224))
model(a)