In [14]:
import pretrainedmodels
import os

from helper import *

In [15]:
path = Path('/home/jupyter/Kaggle/kaggle_grapheme')
TRAIN_LABELS = path/'data/train.csv'
TRAIN_IMG_FILES = path/'data/'

In [16]:
df_train = pd.read_csv(TRAIN_LABELS)

In [17]:
#Mish - "Mish: A Self Regularized Non-Monotonic Neural Activation Function"
#https://arxiv.org/abs/1908.08681v1
#implemented for PyTorch / FastAI by lessw2020 
#github: https://github.com/lessw2020/mish
class Mish(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, x):
        #inlining this saves 1 second per epoch (V100 GPU) vs having a temp x and then returning x(!)
        return x * (torch.tanh(F.softplus(x)))
    
def to_mish(model):
    for name,child in model.named_children():
        if isinstance(child,nn.ReLU):
            setattr(model,name,Mish())
        else:
            to_mish(child)

In [18]:
class Model_Head(nn.Module):
    def __init__(self,ni,nc,ps=0.25):
        '''
        ni : input filter size
        nc : output class size
        ps : dropout rate
        '''
        super().__init__()
        layers = ([Mish(),conv2d(ni,ni),batchnorm_2d(ni),AdaptiveConcatPool2d(),Flatten()] 
                  + bn_drop_lin(ni*2,512,p=ps,actn=Mish()) 
                  + bn_drop_lin(512,nc,p=ps*2))
        self.head = nn.Sequential(*layers)
    
    def forward(self,xb):
        return self.head(xb)

class SE_Resnet_1ch(nn.Module):
    def __init__(self,arch,nc=[168,11,7],pretrained='imagenet'):
        super().__init__()
        self.body = nn.Sequential(*list(arch(num_classes=1000,pretrained=pretrained).children())[:-2])
       
        # change input filter size to 1
        
        nf,ni,h,w = self.body[0].conv1.weight.shape
        w = self.body[0].conv1.weight.sum(dim=1,keepdim=True)
        self.body[0].conv1 = conv2d(1,nf,ks=h,stride=2)
        self.body[0].conv1.weight = nn.Parameter(w)
        
        # multi-head output
        # 168,11,7 from num of unique labels
        ni = num_features_model(self.body)
        self.head_grapheme = Model_Head(ni,nc[0])
        self.head_vowel = Model_Head(ni,nc[1])
        self.head_consonant = Model_Head(ni,nc[2])
    
    def forward(self,x):
        x = self.body(x)
        return (self.head_grapheme(x),self.head_vowel(x),self.head_consonant(x))


In [19]:
model = SE_Resnet_1ch(pretrainedmodels.se_resnext50_32x4d)

In [20]:
to_mish(model)

In [12]:
tfms = get_transforms(do_flip=False,max_rotate=40.,max_warp=0.,xtra_tfms=[cutout(p=0.5)]) # updated argumentation 
stats = ([0.0692], [0.2051])
bs = 128

In [13]:
data = (ImageList
        .from_df(df_train,path=TRAIN_IMG_FILES,cols=0,convert_mode='L',suffix='.png')
        .split_by_rand_pct(seed=42)
        .label_from_df(cols=['grapheme_root','vowel_diacritic','consonant_diacritic'])
        .transform(tfms,size=(128,128),padding_mode='zeros')
        .databunch(bs=bs,num_workers=os.cpu_count()*4)
        .normalize(stats)
       )

In [14]:
class Loss_multi_head(nn.Module):
    def __init__(self,weights=[1,1,1]):
        super().__init__()
        self.weights = weights
        
    def forward(self,preds,target,reduction='mean'):
        outp_1,outp_2,outp_3 = preds
        outp_1,outp_2,outp_3 = outp_1.float(),outp_2.float(),outp_3.float()
        target = target.long()
        return (
            self.weights[0] * F.cross_entropy(outp_1,target[:,0],reduction=reduction) 
            + self.weights[1] * F.cross_entropy(outp_2,target[:,1],reduction=reduction) 
            + self.weights[2] * F.cross_entropy(outp_3,target[:,2],reduction=reduction)
               )

In [15]:
Metric_grapheme = partial(Metric_idx,0)
Metric_vowel = partial(Metric_idx,1)
Metric_consonant = partial(Metric_idx,2)

In [17]:
loss_func = Loss_multi_head([0.7,0.1,0.2])

In [18]:
learn = Learner(data,model,loss_func=loss_func,
                metrics=[Metric_grapheme(),Metric_vowel(),Metric_consonant(),Metric_tot()],
                model_dir=path/'models/se_resnet').to_fp16()

In [None]:
learn.split(lambda m: (m.body,m.head_grapheme));
learn.freeze_to(-1);
learn.to_fp16();

In [5]:
model = pretrainedmodels.se_resnext50_32x4d(num_classes=1000,pretrained='imagenet')

Downloading: "http://data.lip6.fr/cadene/pretrainedmodels/se_resnext50_32x4d-a260b3a4.pth" to /home/jupyter/.cache/torch/checkpoints/se_resnext50_32x4d-a260b3a4.pth
100%|██████████| 105M/105M [05:10<00:00, 356kB/s]  


In [13]:
list(model.children())[:-2][0].conv1

Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)

In [21]:
model

SE_Resnet_1ch(
  (body): Sequential(
    (0): Sequential(
      (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu1): Mish()
      (pool): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
    )
    (1): Sequential(
      (0): SEResNeXtBottleneck(
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): Mish()
        

In [25]:
models.resnet50(pretrained=False)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 