In [21]:
from PIL import Image as pil_image
from PIL.ImageDraw import Draw

from os.path import isfile

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [19]:
with open('../../data/humpback-whale-identification/cropping.txt', 'rt') as f:
    data = f.read().split('\n')[:-1]
    data = [line.split(',') for line in data]
    data = [(p,[(int(coord[i]),int(coord[i+1])) for i in range(0,len(coord),2)]) for p,*coord in data]
    
data[0]

('88532e70.jpg',
 [(195, 293),
  (269, 115),
  (868, 158),
  (888, 170),
  (641, 496),
  (512, 546),
  (321, 524)])

In [26]:
def expand_path(p):
    if isfile('../../data/humpback-whale-identification/train/' + p):
        return '../../data/humpback-whale-identification/train/' + p
    if isfile('../../data/humpback-whale-identification/test/' + p):
        return '../../data/humpback-whale-identification/test/' + p
    
    return p

def read_raw_image(p):
    return pil_image.open(expand_path(p))

def draw_dot(draw, x, y):
    draw.ellipse(((x-5, y-5), (x+5, y+5)), fill='red', outline='red')
    
def draw_dots(draw, coordinates):
    for x, y in coordinates: 
        draw_dot(draw, x, y)
        
def bounding_rectangle(list):
    x0, y0 = list[0]
    x1, y1 = x0, y0
    for x, y in list[1:]:
        x0 = min(x0, x)
        y0 = min(y0, y)
        x1 = max(x1, x)
        y1 = max(y1, y)
    return x0, y0, x1, y1

filename, coordinates = data[17]
box = bounding_rectangle(coordinates)
img = read_raw_image(filename)
draw = Draw(img)
draw_dots(draw, coordinates)
draw.rectangle(box, outline='red')
img

FileNotFoundError: [Errno 2] No such file or directory: '2b3cb2e7.jpg'

In [3]:
def conv9x9(inchannel, outchannel, stride=1):
    """9x9 convolution with padding"""
    return nn.Conv2d(inchannel, outchannel, kernel_size=9, stride=stride, padding=1, bias=True)

def conv3x3(inchannel, outchannel, stride=1):
    """9x9 convolution with padding"""
    return nn.Conv2d(inchannel, outchannel, kernel_size=3, stride=stride, padding=1, bias=True)

def conv2x2(inchannel, outchannel, stride=1):
    """9x9 convolution with padding"""
    return nn.Conv2d(inchannel, outchannel, kernel_size=2, stride=stride, padding=1, bias=True)

In [4]:
class BasicBlock(nn.Module):
    def __init__(self, inchannel, outchannel, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = conv2x2(inchannel, outchannel, stride=2)
        self.conv2 = conv3x3(outchannel, outchannel, stride)
        self.conv3 = conv3x3(outchannel, outchannel, stride)
        self.relu = nn.ReLU(inplace=True)
        self.bn = nn.BatchNorm2d(outchannel)
        
    def forward(self, x):
        out = self.conv1(x)
        out = self.relu(x)
        
        out = self.conv2(x)
        out = self.relu(x)
        
        out = self.conv3(x)
        out = self.relu(x)
        
        out = self.bn(x)
        
        return out

In [10]:
class BBModel(nn.Module):
    def __init__(self, block, blocks):
        super(BBModel, self).__init__()
        
        self.inchannel = 64
        
        self.conv1 = conv9x9(3, 64)       
        self.conv2 = conv3x3(64, 64)
        self.relu = nn.ReLU(inplace=True)
        self.bn = nn.BatchNorm2d(64)
        
        self.layer = self._make_layer(block, 64, blocks)
        
        self.pool_h = nn.MaxPool2d((1, 4))   
        self.pool_v = nn.MaxPool2d((4, 1))
        
        self.fc_h = nn.Linear(256, 16)
        self.fc_v = nn.Linear(256, 16)
        
        self.fc = nn.Linear(32,4)
        
        
    def _make_layer(self, block, outchannel, blocks, stride=1):
        layers = []
        for _ in range(blocks):
            layers.append(block(self.inchannel, outchannel))
            
        return nn.Sequential(*layers)
    
    def forward(self, x):
        out = self.conv1(x)
        out = self.relu(x)
        
        out = self.conv2(x)
        out = self.relu(x)
        out = self.bn(x)
        
        out = self.layer(x)
        
        h = self.pool_h(out)
        h = torch.flatten(h)
        h = self.fc_h(h)
        h = self.relu(h)
        
        v = self.pool_v(out)
        v = torch.flatten(v)
        v = self.fc_v(v)
        v = self.relu(v)
        
        out = torch.concate((h, v), -1)
        out = self.fc(out)
        
        return out

In [11]:
model = BBModel(BasicBlock, 5)
model

BBModel(
  (conv1): Conv2d(3, 64, kernel_size=(9, 9), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu): ReLU(inplace)
  (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(2, 2), stride=(2, 2), padding=(1, 1))
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (relu): ReLU(inplace)
      (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(2, 2), stride=(2, 2), padding=(1, 1))
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (relu): ReLU(inplace)
      (bn): BatchNorm

In [14]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.032)

In [None]:
epochs = 5
for e in range(epochs):
    running_loss = 0
    
    