In [None]:
import numpy as np
from mxnet import nd
from mxnet import image
import sys

sys.path.append('..')
import utils
from time import time
from mxnet import gluon
from mxnet.gluon import nn
from mxnet.gluon.model_zoo import vision as models
from mxnet import init

classes = ['background', 'p1', 'p2', 'p3', 'p4',
           'p5', 'p6', 'p7', 'p8', 'p9', 'p10',
           'p11', 'p12', 'p13', 'p14', 'p15',
           'p16', 'p17', 'p18', 'p19']
colormap = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0], [0, 0, 128],
            [128, 0, 128], [0, 64, 128], [128, 128, 128], [64, 0, 0], [192, 0, 0],
            [64, 128, 0], [192, 128, 0], [64, 0, 128], [192, 0, 128],
            [64, 128, 128], [192, 128, 128], [0, 64, 0], [128, 64, 0],
            [0, 192, 0], [128, 192, 0]]
min_coordinate = [[504, 697], [1063, 606], [948, 888], [286, 902], [1079, 1191], [1053, 1507], [1048, 1661],
                  [993, 1729], [1025, 1706], [415, 1391], [1121, 1355], [1138, 1388], [1245, 1294],
                  [1212, 1477], [1170, 1152], [1114, 1756], [635, 1090], [1081, 1100], [350, 1006]]
# expand_size = 55
landmark_index = 2
data_root = '../data'
image_root = data_root + '/CephalometricLandmark/CroppedImage'
txt_root = data_root + '/CephalometricLandmark/AnnotationsByMD'
rgb_mean = nd.array([0.485, 0.456, 0.406])
rgb_std = nd.array([0.229, 0.224, 0.225])

for _ in range(10):
    for expand_size in range(80, 160, 5):
        print()


        def read_images(dataset_num=0):

            if dataset_num == 0:
                begin_index = 1
                end_index = 151
            elif dataset_num == 1:
                begin_index = 151
                end_index = 301
            else:
                begin_index = 301
                end_index = 401

            data, label = [None] * (end_index - begin_index), [None] * (end_index - begin_index)
            index = 0
            for i in range(begin_index, end_index):
                image_filename = image_root + "/%02d/%03d.bmp" % (landmark_index + 1, i)
                txt_filename1 = txt_root + '/400_senior' + "/%03d.txt" % i

                with open(txt_filename1, 'r') as f:
                    txts = f.read().split()
                x = int(txts[landmark_index].split(',')[0]) - min_coordinate[landmark_index][0]
                y = int(txts[landmark_index].split(',')[1]) - min_coordinate[landmark_index][1]

                minx = x - expand_size
                maxx = x + expand_size
                if minx < 0:
                    minx = 0
                if maxx >= 640:
                    maxx = 639

                miny = y - expand_size
                maxy = y + expand_size
                if miny < 0:
                    miny = 0
                if maxy >= 640:
                    maxy = 639

                data[index] = image.imread(image_filename)
                label[index] = nd.zeros((data[index].shape[0], data[index].shape[1]))
                label[index][miny:maxy, minx:maxx] = 1
                index += 1
            return data, label


        def normalize_image(data):
            return (data.astype('float32') / 255 - rgb_mean) / rgb_std


        class VOCSegDataset(gluon.data.Dataset):

            def __init__(self, dataset_num, crop_size):
                self.crop_size = crop_size
                self.data, self.label = read_images(dataset_num=dataset_num)
                self.data[:] = [normalize_image(im) for im in self.data]

            def __getitem__(self, idx):
                return self.data[idx].transpose((2, 0, 1)), self.label[idx]

            def __len__(self):
                return len(self.data)


        input_shape = (640, 640)
        voc_train = VOCSegDataset(0, input_shape)
        voc_test1 = VOCSegDataset(1, input_shape)

        batch_size = 16
        train_data = gluon.data.DataLoader(
            voc_train, batch_size, shuffle=True, last_batch='discard')
        test_data = gluon.data.DataLoader(
            voc_test1, batch_size, last_batch='discard')

        conv = nn.Conv2D(10, kernel_size=4, padding=1, strides=2)
        conv_trans = nn.Conv2DTranspose(3, kernel_size=4, padding=1, strides=2)

        conv.initialize()
        conv_trans.initialize()

        pretrained_net = models.resnet18_v2(pretrained=True)

        net = nn.HybridSequential()
        for layer in pretrained_net.features[:-2]:
            net.add(layer)

        num_classes = len(classes)

        with net.name_scope():
            net.add(
                nn.Conv2D(2, kernel_size=1),
                nn.Conv2DTranspose(2, kernel_size=64, padding=16, strides=32)
            )


        def bilinear_kernel(in_channels, out_channels, kernel_size):
            factor = (kernel_size + 1) // 2
            if kernel_size % 2 == 1:
                center = factor - 1
            else:
                center = factor - 0.5
            og = np.ogrid[:kernel_size, :kernel_size]
            filt = (1 - abs(og[0] - center) / factor) * \
                   (1 - abs(og[1] - center) / factor)
            weight = np.zeros(
                (in_channels, out_channels, kernel_size, kernel_size),
                dtype='float32')
            weight[range(in_channels), range(out_channels), :, :] = filt
            return nd.array(weight)


        conv_trans = net[-1]
        conv_trans.initialize(init=init.Zero())
        net[-2].initialize(init=init.Xavier())

        x = nd.zeros((batch_size, 3, *input_shape))
        net(x)

        shape = conv_trans.weight.data().shape
        conv_trans.weight.set_data(bilinear_kernel(*shape[0:3]))

        for round_num in range(5):
            loss = gluon.loss.SoftmaxCrossEntropyLoss(axis=1)

            ctx = utils.try_all_gpus()
            net.collect_params().reset_ctx(ctx)

            trainer = gluon.Trainer(net.collect_params(),
                                    'sgd', {'learning_rate': .1, 'wd': 1e-3})

            utils.train(train_data, test_data, net, loss,
                        trainer, ctx, num_epochs=10)


            def predict(im):
                data = normalize_image(im)
                data = data.transpose((2, 0, 1)).expand_dims(axis=0)
                yhat = net(data.as_in_context(ctx[0]))
                pred = nd.argmax(yhat, axis=1)
                return pred.reshape((pred.shape[1], pred.shape[2]))


            def label2image(pred):
                x = pred.astype('int32').asnumpy()
                cm = nd.array(colormap).astype('uint8')
                return nd.array(cm[x, :])


            def evaluate_acc(result, label):
                if (len(np.where(result.asnumpy() > 0)[1]) == 0):
                    return False, False
                result_maxx = np.max(np.where(result.asnumpy() > 0)[1])
                result_minx = np.min(np.where(result.asnumpy() > 0)[1])
                result_maxy = np.max(np.where(result.asnumpy() > 0)[0])
                result_miny = np.min(np.where(result.asnumpy() > 0)[0])

                result_centerx = int((result_maxx + result_minx) / 2)
                result_centery = int((result_maxy + result_miny) / 2)

                result_avgx = np.average(np.where(result.asnumpy() > 0)[1])
                result_avgy = np.average(np.where(result.asnumpy() > 0)[0])

                label_maxx = np.max(np.where(label.asnumpy() > 0)[1])
                label_minx = np.min(np.where(label.asnumpy() > 0)[1])
                label_maxy = np.max(np.where(label.asnumpy() > 0)[0])
                label_miny = np.min(np.where(label.asnumpy() > 0)[0])

                label_centerx = int((label_maxx + label_minx) / 2)
                label_centery = int((label_maxy + label_miny) / 2)

                # lable_avgx = np.average(np.where(label.asnumpy() > 0)[1])
                # lable_avgy = np.average(np.where(label.asnumpy() > 0)[0])

                d1 = pow((result_centerx - label_centerx), 2) + pow((result_centery - label_centery), 2)
                d2 = pow((result_avgx - label_centerx), 2) + pow((result_avgy - label_centery), 2)

                return (d1 < 400, d2 < 400)


            test_images, test_labels = read_images(2)
            n = len(test_images)
            imgs = []
            acc1 = 0
            acc2 = 0
            for i in range(n):
                x = test_images[i]
                result = predict(x)
                f1, f2 = evaluate_acc(result, test_labels[i])
                if f1:
                    acc1 += 1
                if f2:
                    acc2 += 1

            print("expand_size : ", expand_size, " round : ", 10 + 10 * round_num, " acc1 : ", acc1 / n, " acc2 : ",
                  acc2 / n)



expand_size :  80  round :  10  acc1 :  0.01  acc2 :  0.01
expand_size :  80  round :  20  acc1 :  0.01  acc2 :  0.03
expand_size :  80  round :  30  acc1 :  0.02  acc2 :  0.06
expand_size :  80  round :  40  acc1 :  0.01  acc2 :  0.01
expand_size :  80  round :  50  acc1 :  0.01  acc2 :  0.02

expand_size :  85  round :  10  acc1 :  0.01  acc2 :  0.02
expand_size :  85  round :  20  acc1 :  0.01  acc2 :  0.03
expand_size :  85  round :  30  acc1 :  0.01  acc2 :  0.01
expand_size :  85  round :  40  acc1 :  0.01  acc2 :  0.01
expand_size :  85  round :  50  acc1 :  0.02  acc2 :  0.02

expand_size :  90  round :  10  acc1 :  0.04  acc2 :  0.03
expand_size :  90  round :  20  acc1 :  0.02  acc2 :  0.02
expand_size :  90  round :  30  acc1 :  0.04  acc2 :  0.04
expand_size :  90  round :  40  acc1 :  0.04  acc2 :  0.04
expand_size :  90  round :  50  acc1 :  0.03  acc2 :  0.03

expand_size :  95  round :  10  acc1 :  0.01  acc2 :  0.02
expand_size :  95  round :  20  acc1 :  0.02  acc2 :