### kaggle要求notebook文件且不能联网，所以需要用到的未原装包要自行放到kaggle dataset里，再用sys.path.append()添加路径

In [None]:
import sys
sys.path.append('../input/labnn/labml-nn')
sys.path.append('../input/labnn/helpers')


In [3]:
import os
import numpy as np
import pandas as pd
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import collections
from PIL import Image
from torchvision import transforms
from torchvision import transforms as T
from torch.autograd import Variable
import torch.nn.functional as F

from labml_helpers.module import Module
from labml_nn.utils import clone_module_list

In [4]:
WIDTH = 704
HEIGHT = 520

BATCH_SIZE = 32

PATCH_SIZE = 64

TEST_CSV = "../input/sartorius-cell-instance-segmentation/sample_submission.csv"
TEST_PATH = "../input/sartorius-cell-instance-segmentation/test" # guanhui
RESULT_DIR = "./" 
LOG_PATH = './exp.log'

PRETRAIN_MODEL = '../input/cell-pretrain-model/convmixer_iou061.pth'

## DATASET

In [26]:

class CellDataset_submit(Dataset):
    def __init__(self, image_dir, df, transforms=None, patch_size=16):
        self.transforms = transforms
        self.image_dir = image_dir
        self.df = df
        self.patch_size = patch_size
        self.height = HEIGHT
        self.width = WIDTH

        self.row_sum = self.height // self.patch_size +1
        self.col_sum = self.width // self.patch_size
        self.patch_num = self.row_sum * self.col_sum

        self.image_info = collections.defaultdict(dict)
#         temp_df_11 = self.df.groupby('id')['annotation'].agg(lambda x: list(x)).reset_index()

#         train_df, val_df = train_test_split(temp_df_11, test_size=0.2, random_state=42)
        temp_df = self.df

        ## 将id, path, annotation转成字典
        ii = 0
        for index, row in temp_df.iterrows():
            self.image_info[ii] = {
                'image_id': row['id'],
                'image_path': os.path.join(self.image_dir, row['id'] + '.png'),
            }
            ii += 1

    def __getitem__(self, idx):
        ''' Get the image and the target'''
        img_idx = idx // self.patch_num
        patch_idx = idx % self.patch_num

        img_path = self.image_info[img_idx]["image_path"]
        img_name = self.image_info[img_idx]['image_id']
        img = Image.open(img_path)

        info = self.image_info[img_idx]
        img = np.array(img)

        # 计算patch位置
        cur_row = patch_idx // self.col_sum
        cur_col = patch_idx % self.col_sum
        
        if cur_row == 8:
            start_row = self.height - 64
            end_row = start_row + self.patch_size
            start_col = cur_col * self.patch_size
            end_col = start_col + self.patch_size
        else:
            start_row = cur_row * self.patch_size
            end_row = start_row + self.patch_size
            start_col = cur_col * self.patch_size
            end_col = start_col + self.patch_size
        
        # 切片
        image_clip = img[start_row:end_row, start_col:end_col]

        transform = transforms.Compose([
            transforms.ToTensor()
        ])
        image_clip = transform(image_clip)

        image_clip = torch.as_tensor(image_clip, dtype=torch.float)
        
        return image_clip,img_name

    def __len__(self):
        return len(self.image_info) * (self.height // self.patch_size +1) * (704 // self.patch_size)


In [6]:
class ConvMixerLayer(Module):
    """
    <a id="ConvMixerLayer"></a>
    ## ConvMixer layer
    This is a single ConvMixer layer. The model will have a series of these.
    """

    def __init__(self, d_model: int, kernel_size: int):
        """
        * `d_model` is the number of channels in patch embeddings, $h$
        * `kernel_size` is the size of the kernel of spatial convolution, $k$
        """
        super().__init__()
        # Depth-wise convolution is separate convolution for each channel.
        # We do this with a convolution layer with the number of groups equal to the number of channels.
        # So that each channel is it's own group.
        self.depth_wise_conv = nn.Conv2d(d_model, d_model,
                                         kernel_size=kernel_size,
                                         groups=d_model,
                                         padding=(kernel_size - 1) // 2)
        # Activation after depth-wise convolution
        self.act1 = nn.GELU()
        # Normalization after depth-wise convolution
        self.norm1 = nn.BatchNorm2d(d_model)

        # Point-wise convolution is a $1 \times 1$ convolution.
        # i.e. a linear transformation of patch embeddings
        self.point_wise_conv = nn.Conv2d(d_model, d_model, kernel_size=1)
        # Activation after point-wise convolution
        self.act2 = nn.GELU()
        # Normalization after point-wise convolution
        self.norm2 = nn.BatchNorm2d(d_model)

    def forward(self, x: torch.Tensor):
        # For the residual connection around the depth-wise convolution
        residual = x

        # Depth-wise convolution, activation and normalization
        x = self.depth_wise_conv(x)
        x = self.act1(x)
        x = self.norm1(x)

        # Add residual connection
        x += residual

        # Point-wise convolution, activation and normalization
        x = self.point_wise_conv(x)
        x = self.act2(x)
        x = self.norm2(x)
        #
        return x


class PatchEmbeddings(Module):
    """
    <a id="PatchEmbeddings"></a>
    ## Get patch embeddings
    This splits the image into patches of size $p \times p$ and gives an embedding for each patch.
    """

    def __init__(self, d_model: int, patch_size: int, in_channels: int):
        """
        * `d_model` is the number of channels in patch embeddings $h$
        * `patch_size` is the size of the patch, $p$
        * `in_channels` is the number of channels in the input image (3 for rgb)
        """
        super().__init__()

        # We create a convolution layer with a kernel size and and stride length equal to patch size.
        # This is equivalent to splitting the image into patches and doing a linear
        # transformation on each patch.
        self.conv = nn.Conv2d(in_channels, d_model, kernel_size=patch_size, stride=patch_size)
        # Activation function
        self.act = nn.GELU()
        # Batch normalization
        self.norm = nn.BatchNorm2d(d_model)

    def forward(self, x: torch.Tensor):
        """
        * `x` is the input image of shape `[batch_size, channels, height, width]`
        """
        # Apply convolution layer
        x = self.conv(x)
        # Activation and normalization
        x = self.act(x)
        x = self.norm(x)

        #
        return x


class ClassificationHead(Module):
    """
    <a id="ClassificationHead"></a>
    ## Classification Head
    They do average pooling (taking the mean of all patch embeddings) and a final linear transformation
    to predict the log-probabilities of the image classes.
    """

    def __init__(self, d_model: int):
        """
        * `d_model` is the number of channels in patch embeddings, $h$
        * `n_classes` is the number of classes in the classification task
        """
        super().__init__()
        # Average Pool
        # self.pool = nn.AdaptiveAvgPool2d((1, 1))
        self.act = nn.GELU()
        self.convtrans = nn.ConvTranspose2d(d_model, d_model, kernel_size=10, stride=8, padding=1)
        self.batchnorm = nn.BatchNorm2d(d_model)
        #这里尽量不要考虑上采用函数，因为这个线性插值的纯粹的数值计算是不能学习的，反卷积可以做到上采样
        # self.upsample = nn.UpsamplingBilinear2d(scale_factor=2)
        #1*1卷积这里，无论前面输出多好channel， 这里直接拿来作为输入就行了
        self.adjust = nn.Conv2d(d_model, 2, kernel_size=1, stride=1, padding=0)
        #由于目前用的交叉商函数自带softmax， 所以这里就不需要加入softmax了
        # Linear layer
        # self.linear = nn.Linear(d_model, n_classes)
        # self.softmax = nn.Softmax(dim=1)

    def forward(self, x: torch.Tensor):
        # Average pooling
        # x = self.pool(x)
        x = self.act(x)
        x = self.convtrans(x)
        x = self.batchnorm(x)
        # x = self.upsample(x)
        # Get the embedding, `x` will have shape `[batch_size, d_model, 1, 1]`
        # x = x[:, :, 0, 0]
        # Linear layer
        # x = self.linear(x)

        # print(x)
        # print('*'*25)
        x = self.adjust(x)
        # x = self.softmax(x)
        # print(x.shape)
        # print(x)

        #
        return x


class ConvMixer(Module):
    """
    ## ConvMixer
    This combines the patch embeddings block, a number of ConvMixer layers and a classification head.
    """
    def __init__(self, conv_mixer_layer: ConvMixerLayer, n_layers: int,
                 patch_emb: PatchEmbeddings,
                 classification: ClassificationHead):
        """
        * `conv_mixer_layer` is a copy of a single [ConvMixer layer](#ConvMixerLayer).
         We make copies of it to make ConvMixer with `n_layers`.
        * `n_layers` is the number of ConvMixer layers (or depth), $d$.
        * `patch_emb` is the [patch embeddings layer](#PatchEmbeddings).
        * `classification` is the [classification head](#ClassificationHead).
        """
        super().__init__()
        # Patch embeddings
        self.patch_emb = patch_emb
        # Classification head
        self.classification = classification
        # Make copies of the [ConvMixer layer](#ConvMixerLayer)
        self.conv_mixer_layers = clone_module_list(conv_mixer_layer, n_layers)

    def forward(self, x: torch.Tensor):
        """
        * `x` is the input image of shape `[batch_size, channels, height, width]`
        """
        # Get patch embeddings. This gives a tensor of shape `[batch_size, d_model, height / patch_size, width / patch_size]`.
        x = self.patch_emb(x)

        # Pass through [ConvMixer layers](#ConvMixerLayer)
        for layer in self.conv_mixer_layers:
            x = layer(x)

        # Classification head, to get logits
        x = self.classification(x)

        #
        return x

In [27]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [28]:
df_all = pd.read_csv(TEST_CSV)
test_dataset = CellDataset_submit(TEST_PATH, df_all, patch_size=PATCH_SIZE)
# batch_size must be 1 and shuffle must be False
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=2) 


In [29]:
# Size of a patch, $p$
patch_size: int = 8
# Number of channels in patch embeddings, $h$
d_model: int = 256
# Number of [ConvMixer layers](#ConvMixerLayer) or depth, $d$
n_layers: int = 20
# Kernel size of the depth-wise convolution, $k$
kernel_size: int = 7

In [30]:
model = ConvMixer(ConvMixerLayer(d_model, kernel_size), n_layers,
                      PatchEmbeddings(d_model, patch_size, 1),
                      ClassificationHead(d_model))

model.load_state_dict(torch.load(PRETRAIN_MODEL))
model = model.to(device)

In [62]:
## all predicted masks
predicted_masks_list=np.zeros((len(df_all),576,704)) #576是为了拼64倍数
names = ['a' for i in range(len(df_all))]

In [63]:
def merge_final8pixels(ls):
    ls[:,512:520,:] = ls[:,-8:,:]
    ls = ls[:,:520,:]
    return ls

In [64]:
patch_id = 0
img_id = 0
row_sums = HEIGHT // PATCH_SIZE +1
col_sums = WIDTH // PATCH_SIZE
patch_nums = row_sums * col_sums
model.eval()
with torch.no_grad():
    for batch_idx, (X_batch,y_name) in enumerate(test_loader): 
        X_batch = Variable(X_batch.to(device))
        y_out = model(X_batch)
        y_out = F.softmax(y_out, dim=1)[:, 1:].squeeze(1)
        
        patch_id = batch_idx % patch_nums
        img_id = batch_idx // patch_nums
        
        cur_r = patch_id // col_sums
        cur_c = patch_id % col_sums

        start_r = cur_r * PATCH_SIZE
        end_r = start_r + PATCH_SIZE
        start_c = cur_c * PATCH_SIZE
        end_c = start_c + PATCH_SIZE
        
        predicted_masks_list[img_id, start_r:end_r, start_c:end_c] = y_out.cpu()
        names[img_id] = y_name
        
predicted_masks_list = merge_final8pixels(predicted_masks_list)

In [65]:
import matplotlib.pyplot as plt
plt.imshow(predicted_masks_list[0])
plt.show

# predicting on test set is finish, waiting for next step

### test on validation data 

In [66]:
class IoUScore(nn.Module):
    def __init__(self, weight=None, size_average=True):
        super(IoUScore, self).__init__()

    def forward(self, inputs, targets, smooth=1):



        inputs = F.softmax(inputs, dim=1)[:, 1:]



        #flatten label and prediction tensors
        inputs = inputs.reshape(-1)
        targets = targets.reshape(-1)

        #intersection is equivalent to True Positive count
        #union is the mutually inclusive area of all labels & predictions
        intersection = (inputs * targets).sum()
        total = (inputs + targets).sum()
        union = total - intersection

        IoU = (intersection + smooth)/(union + smooth)


        return IoU.cpu().numpy()

In [67]:
def rle_decode(mask_rle, shape):
    """
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return
    Returns numpy array, 1 - mask, 0 - background
    """
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0] * shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape)

In [68]:
class CellDataset(Dataset):
    def __init__(self, image_dir, df, split='train', transforms=None, resize=False, patch_size=16):
        self.transforms = transforms
        self.image_dir = image_dir
        self.df = df
        self.patch_size = patch_size
        self.split = split

        self.should_resize = False
        if self.should_resize:
            self.height = int(HEIGHT * resize)
            self.width = int(WIDTH * resize)
        else:
            self.height = HEIGHT
            self.width = WIDTH

        self.row_sum = self.height // self.patch_size +1
        self.col_sum = self.width // self.patch_size
        self.patch_num = self.row_sum * self.col_sum

        self.image_info = collections.defaultdict(dict)
        ##这一步将相同ID的annotation组成在一起。比如原文件关于id=001的annotation有400条(行），操作过后temp_df中id=001的len(annotation)=400（一行）.
        temp_df_11 = self.df.groupby('id')['annotation'].agg(lambda x: list(x)).reset_index()

        train_df, val_df = train_test_split(temp_df_11, test_size=0.2, random_state=42)
        if self.split == 'train':
            temp_df = train_df
        elif self.split == 'val':
            temp_df = val_df
        print(len(temp_df))
        ## 将id, path, annotation转成字典
        ii = 0
        for index, row in temp_df.iterrows():
            self.image_info[ii] = {
                'image_id': row['id'],
                'image_path': os.path.join(self.image_dir, row['id'] + '.png'),
                'annotations': row["annotation"]
            }
            ii += 1

    def __getitem__(self, idx):
        ''' Get the image and the target'''
        img_idx = idx // self.patch_num
        patch_idx = idx % self.patch_num

        img_path = self.image_info[img_idx]["image_path"]
        img = Image.open(img_path)
        img_name = self.image_info[img_idx]['image_id']

        if self.should_resize:
            img = img.resize((self.width, self.height), resample=Image.BILINEAR)

        info = self.image_info[img_idx]

        n_objects = len(info['annotations'])
        # ********************************************************************************#
        ##这一步得到的masks是将每一个a_mask放在不同的通道上，所有有多少a_mask就有多少通道
        ## boxes记录着每个a_mask的边框的顶点
        # masks = np.zeros((len(info['annotations']), self.height, self.width), dtype=np.uint8)
        # boxes = []

        # for i, annotation in enumerate(info['annotations']):
        #     a_mask = rle_decode(annotation, (HEIGHT, WIDTH))
        #     a_mask = Image.fromarray(a_mask)

        #     if self.should_resize:
        #         a_mask = a_mask.resize((self.width, self.height), resample=Image.BILINEAR)

        #     a_mask = np.array(a_mask) > 0
        #     masks[i, :, :] = a_mask

        #     boxes.append(self.get_box(a_mask))
        # ********************************************************************************#

        # ********************************************************************************#
        ##  这个写法可以让所有a_mask加在同一个通道上，看起来更正常点
        masks = np.zeros((self.height, self.width))

        for i, annotation in enumerate(info['annotations']):
            a_mask = rle_decode(annotation, (HEIGHT, WIDTH))
            a_mask = Image.fromarray(a_mask)

            if self.should_resize:
                a_mask = a_mask.resize((self.width, self.height), resample=Image.BILINEAR)

            a_mask = np.array(a_mask) > 0
            masks += a_mask

        # ********************************************************************************#

        masks = np.where(masks > 0, 1, 0)  # 大于0的地方取0，否则取1. 因为前面的a_mask在一些像素上重叠了，所以需要改成1
        img = np.array(img)

        # 计算patch位置
        cur_row = patch_idx // self.col_sum
        cur_col = patch_idx % self.col_sum

        if cur_row == 8:
            start_row = self.height - 64
            end_row = start_row + self.patch_size
            start_col = cur_col * self.patch_size
            end_col = start_col + self.patch_size
        else:
            start_row = cur_row * self.patch_size
            end_row = start_row + self.patch_size
            start_col = cur_col * self.patch_size
            end_col = start_col + self.patch_size
        # 切片
        mask_clip = masks[start_row:end_row, start_col:end_col]
        image_clip = img[start_row:end_row, start_col:end_col]

        transform = transforms.Compose([
            transforms.ToTensor()
        ])
        image_clip = transform(image_clip)
        mask_clip = transform(mask_clip)

        mask_clip = torch.as_tensor(mask_clip, dtype=torch.long).squeeze(0)
        image_clip = torch.as_tensor(image_clip, dtype=torch.float)

        # if self.transforms is not None:
        #     img, masks = self.transforms(img, masks)

        return image_clip, mask_clip,img_name,masks

    def __len__(self):
        return len(self.image_info) * (520 // self.patch_size +1) * (704 // self.patch_size)


In [69]:
from sklearn.model_selection import train_test_split
TRAIN_PATH = '../input/sartorius-cell-instance-segmentation/train'
TRAIN_CSV='../input/sartorius-cell-instance-segmentation/train.csv'
df_all = pd.read_csv(TRAIN_CSV)
val_dataset = CellDataset(TRAIN_PATH, df_all, patch_size=PATCH_SIZE, split='val')
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=2)

In [70]:
predicted_masks_list_val=np.zeros((len(df_all),576,WIDTH))
true_masks_val = np.zeros((len(df_all),HEIGHT,WIDTH))
names_val = ['a' for i in range(len(df_all))]

In [71]:
patch_id = 0
img_id = 0
row_sums = HEIGHT // PATCH_SIZE +1
col_sums = WIDTH // PATCH_SIZE
patch_nums = row_sums * col_sums

model.eval()
iou_list = list()
metric = IoUScore()
with torch.no_grad():
    # for batch_idx, (X_batch, y_batch, *rest) in enumerate(val_loader): #Wang Yu
    for batch_idx, (X_batch, y_batch, y_name,true_mask) in enumerate(val_loader):  # Guan Hui
        X_batch = Variable(X_batch.to(device='cuda'))
        y_batch = Variable(y_batch.to(device='cuda'))
        # start = timeit.default_timer()
        y_out = model(X_batch)
#         iou_score = metric(y_out, y_batch)
#         iou_list.append(iou_score)
        y_out = F.softmax(y_out, dim=1)[:, 1:].squeeze(1)
        
        patch_id = batch_idx % patch_nums
        img_id = batch_idx // patch_nums
        
        cur_r = patch_id // col_sums
        cur_c = patch_id % col_sums

        start_r = cur_r * PATCH_SIZE
        end_r = start_r + PATCH_SIZE
        start_c = cur_c * PATCH_SIZE
        end_c = start_c + PATCH_SIZE
        
        predicted_masks_list_val[img_id, start_r:end_r, start_c:end_c] = y_out.cpu()
        names_val[img_id] = y_name
        true_masks_val[img_id] = true_mask
        
        if img_id >25:
            break

#     avg_iou = np.mean(iou_list)
#     print("current epoch:  current mean iou: {:.4f}".format(avg_iou))

    model.train()
predicted_masks_list_val = merge_final8pixels(predicted_masks_list_val)

In [81]:
import matplotlib.pyplot as plt
plt.imshow(predicted_masks_list_val[5])
plt.show

In [82]:
plt.imshow(true_masks_val[5])
plt.show