In [1]:
!pip install segmentation_models_pytorch 
%pip install --no-index --find-links /kaggle/input/install-nvidia-tensorrt/nvidia/ tensorrt
%pip install --no-index --find-links /kaggle/input/install-nvidia-tensorrt/torch2trt/ torch2trt
%pip install --no-index --find-links /kaggle/input/install-nvidia-tensorrt/torch2trt/ nvidia-pyindex
%pip install --no-index --find-links /kaggle/input/install-nvidia-tensorrt/torch2trt/ onnx-graphsurgeon
%pip install --no-index --find-links /kaggle/input/install-nvidia-tensorrt/torch2trt/ onnxruntime

Collecting segmentation_models_pytorch
  Downloading segmentation_models_pytorch-0.3.3-py3-none-any.whl.metadata (30 kB)
Collecting pretrainedmodels==0.7.4 (from segmentation_models_pytorch)
  Downloading pretrainedmodels-0.7.4.tar.gz (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.8/58.8 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting efficientnet-pytorch==0.7.1 (from segmentation_models_pytorch)
  Downloading efficientnet_pytorch-0.7.1.tar.gz (21 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting timm==0.9.2 (from segmentation_models_pytorch)
  Downloading timm-0.9.2-py3-none-any.whl.metadata (68 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m68.5/68.5 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
Collecting munch (from pretrainedmodels==0.7.4->segmentation_models_pytorch)
  Downloading munch-4.0.0-py2.py3-none-any.whl.metadata (5.9 kB)
Downloading segm

In [2]:
import tensorrt
from torch2trt import torch2trt
from torch import nn
import torch as tc
from dotenv import load_dotenv
import segmentation_models_pytorch as smp
from torch.cuda.amp import autocast

In [9]:
class CFG:
# ============== model CFG =============
    model_name = 'Unet'
    backbone1 = 'se_resnext50_32x4d'
    in_chans1 = 1 #5 # 65
    
    backbone2 = 'inceptionresnetv2'
    in_chans2 = 1 #5 # 65
    
    model1_path = '/kaggle/input/sn-hoa-8e-5-27-rot0-5/se_resnext50_32x4d_26_loss0.10_score0.90_val_loss0.12_val_score0.88_midd_1024.pt'
    model2_path = '/kaggle/input/sennet-hoa-models/unet_inceptionresnetv2_41_loss0.05_score0.89_val_loss0.24_val_score0.82_midd_1024_final.pt'
    ensemble_weight = 0.7
    num_rot90=4
    num_flip=4
    
    target_size = 1
    image_size = 1024 #512
    input_size= 1024 #512
    
    batch=32

In [10]:
def rle_encode(mask):
    pixel = mask.flatten()
    pixel = np.concatenate([[0], pixel, [0]])
    run = np.where(pixel[1:] != pixel[:-1])[0] + 1
    run[1::2] -= run[::2]
    rle = ' '.join(str(r) for r in run)
    if rle == '':
        rle = '1 0'
    return rle

def min_max_normalization(x:tc.Tensor)->tc.Tensor:
    """input.shape=(batch,f1,...)"""
    shape=x.shape
    if x.ndim>2:
        x=x.reshape(x.shape[0],-1)
    
    min_=x.min(dim=-1,keepdim=True)[0]
    max_=x.max(dim=-1,keepdim=True)[0]
    if min_.mean()==0 and max_.mean()==1:
        return x.reshape(shape)
    
    x=(x-min_)/(max_-min_+1e-9)
    return x.reshape(shape)

def norm_with_clip(x:tc.Tensor,smooth=1e-5):
    dim=list(range(1,x.ndim))
    mean=x.mean(dim=dim,keepdim=True)
    std=x.std(dim=dim,keepdim=True)
    x=(x-mean)/(std+smooth)
    x[x>5]=(x[x>5]-5)*1e-3 +5
    x[x<-3]=(x[x<-3]+3)*1e-3-3
    return x

In [11]:
class UnetModel(nn.Module):
    def __init__(self, backbone, in_chans, target_size, weight):
        super().__init__()
        self.model = smp.Unet( #FPN
            encoder_name=backbone, 
            encoder_weights=weight,
            in_channels=in_chans,
            classes=target_size,
            activation=None,
        )
    def forward(self, x):
        output = self.model(x)
        return output[:, 0]
class UnetPlusPlusModel(nn.Module):
    def __init__(self, backbone, in_chans, target_size, weight):
        super().__init__()
        self.model = smp.UnetPlusPlus( #FPN
            encoder_name=backbone, 
            encoder_weights=weight,
            in_channels=in_chans,
            classes=target_size,
            activation=None,
        )
    def forward(self, x):
        output = self.model(x)
        return output[:, 0]
class FPNModel(nn.Module):
    def __init__(self, backbone, in_chans, target_size, weight):
        super().__init__()
        self.model = smp.FPN( #FPN
            encoder_name=backbone, 
            encoder_weights=weight,
            in_channels=in_chans,
            classes=target_size,
            activation=None,
        )
    def forward(self, x):
        output = self.model(x)
        return output[:, 0]

class EnsembleModel(nn.Module):
    def __init__(self, CFG, weight1=None, weight2=None):
        super().__init__()
        self.CFG = CFG        
        self.model1 = UnetModel(backbone=CFG.backbone1,
                                in_chans=CFG.in_chans1,
                                target_size=CFG.target_size,
                                weight=weight1)
        self.model2 = UnetModel(backbone=CFG.backbone2,
                                in_chans=CFG.in_chans2,
                                target_size=CFG.target_size,
                                weight=weight2)
        self.batch=CFG.batch
        self.ensemble_weight = CFG.ensemble_weight
    
    def forward(self,x:tc.Tensor):
        #x.shape=(batch,c,h,w)
        x=x.to(tc.float32)
        x=norm_with_clip(x.reshape(-1,*x.shape[2:])).reshape(x.shape)
        
        if CFG.input_size!=CFG.image_size:
            x=nn.functional.interpolate(x,size=(CFG.input_size,CFG.input_size),mode='bilinear',align_corners=True)
        
        shape=x.shape
        x=[tc.rot90(x,k=i,dims=(-2,-1)) for i in range(CFG.num_rot90)]
        x.extend([tc.flip(x[i], dims=(-1,)) for i in range(CFG.num_flip)])

        x=tc.cat(x,dim=0)
        with autocast():
            with tc.no_grad():
                x1=[self.model1(x[i*self.batch:(i+1)*self.batch]) for i in range(x.shape[0]//self.batch+1)]
                # batch=64,64...48
                x1=tc.cat(x1,dim=0)
                
                x2=[self.model2(x[i*self.batch:(i+1)*self.batch]) for i in range(x.shape[0]//self.batch+1)]
                # batch=64,64...48
                x2=tc.cat(x2,dim=0)
        x1=x1.sigmoid()
        x1=x1.reshape(CFG.num_rot90+CFG.num_flip,shape[0],*shape[2:])
        x1_=[tc.rot90(x1[i],k=-i,dims=(-2,-1)) for i in range(CFG.num_rot90)]
        x1_.extend([tc.rot90(tc.flip(x1[i+4], dims=(-1,)),k=-i,dims=(-2,-1)) for i in range(CFG.num_flip)])
        x1=tc.stack(x1_,dim=0).mean(0)
        
        
        x2=x2.sigmoid()
        x2=x2.reshape(CFG.num_rot90+CFG.num_flip,shape[0],*shape[2:])
        x2_=[tc.rot90(x2[i],k=-i,dims=(-2,-1)) for i in range(CFG.num_rot90)]
        x2_.extend([tc.rot90(tc.flip(x2[i+4], dims=(-1,)),k=-i,dims=(-2,-1)) for i in range(CFG.num_flip)])
        x2=tc.stack(x2_,dim=0).mean(0)
        
        x = x1*self.ensemble_weight + x2*(1-self.ensemble_weight)
        if CFG.input_size!=CFG.image_size:
            x=nn.functional.interpolate(x[None],size=(CFG.image_size,CFG.image_size),mode='bilinear',align_corners=True)[0]
        return x


def build_model(weight1=None, weight2=None):
    load_dotenv()

    print('model_name', CFG.model_name)
    print('backbone1', CFG.backbone1)
    print('backbone2', CFG.backbone2)


    model = EnsembleModel(CFG, weight1, weight2)

    return model.cuda()

In [12]:
model=build_model()
model.model1.load_state_dict(tc.load(CFG.model1_path,"cpu"))
model.model2.load_state_dict(tc.load(CFG.model2_path,"cpu"))


model_name Unet
backbone1 se_resnext50_32x4d
backbone2 inceptionresnetv2


<All keys matched successfully>

In [13]:
x = tc.zeros(1, 1, 1024,1024).to('cuda')
# model1_trt = torch2trt(model.model1, [x], use_onnx=True, fp16_mode=True, max_batch_size=16, max_workspace_size=int(1.2e+10))
model2_trt = torch2trt(model.model2, [x], use_onnx=True, fp16_mode=True, max_batch_size=16, max_workspace_size=int(1.2e+10))

  if h % output_stride != 0 or w % output_stride != 0:


In [14]:
import torch
model_pth = 'model27.pth'
# torch.save(model1_trt.state_dict(), model_pth)
torch.save(model2_trt.state_dict(), model_pth)

In [15]:
from torch2trt import TRTModule

model_trt = TRTModule()

model_trt.load_state_dict(torch.load(model_pth))

<All keys matched successfully>

In [None]:
import torch
x = torch.zeros(1,1,1024,1024).cuda()
model_trt(x)

In [None]:
import gc
gc.collect()
torch.cuda.empty_cache()