In [15]:
import numpy as np
import math
import os
import glob
import json
import cv2

这段代码实现了一个将一个图像中的多个感兴趣区域（RoI）剪裁出来的功能。代码首先将原始图像进行了边界填充，然后依次对每个RoI进行如下处理：

对RoI所在的掩膜进行边界填充。
计算RoI的中心点坐标。
以RoI的中心点为基准，将RoI剪裁出来并进行透视变换，得到变换后的RoI图像。
将变换后的RoI图像与掩膜相乘，得到变换后的RoI的分割结果。
将变换后的RoI图像、分割结果以及RoI的中心点坐标保存到磁盘上。
这个功能的实现涉及到了一些图像处理的基础知识，包括边界填充、图像中心点的计算、透视变换等。其中，透视变换的计算涉及到了相机模型和相机内参的计算。在代码中，computeR函数用于计算相机的旋转矩阵，computeK函数用于计算相机的内参矩阵，而transformation函数则利用旋转矩阵和内参矩阵对图像进行透视变换。

fov_x_base 是基础图像的水平视场角（Field of View，FOV），表示在相机成像平面上可以看到的水平角度范围。在这段代码中，它被用来计算基础图像的垂直视场角 fov_y_base。

patch_size 是感兴趣区域（Region of Interest，RoI）的大小，也就是从基础图像中裁剪出来的每个叶片的大小。在这段代码中，它被用来计算 RoI 的新的垂直视场角和水平视场角。

In [62]:

fov_x_base = 40
patch_size = 256  # size of cropped RoI (depends on leaf size)

058
071
057
025
050
069
023
009
066
020
048
068
044
002
027
001
065
060
036
067
015
064
017
040
005
049
024
072
070
047
034
008
019
062
011
054
031
041
030
007
016
042
014
055
035
018
033
022
056
043
063
053
073
032
045
003
051
028
026
010
052
006
013
038
039
061
004
012
037
046
029
021
059


In [66]:
import os
origin_path = "../../data/DenseLeaves/bush/"  # name of base image (bush)
imgs = os.listdir(origin_path)
for file in imgs:
    im_name = os.path.splitext(file)[0]
    # print(im_name)
    save_dir = "./test/crop/{}".format(im_name)

    img = cv2.imread("../../data/DenseLeaves/bush/{}.png".format(im_name))
    mask_list = glob.glob("./test/inst/{}/*.png".format(im_name))
    mask_list.sort()

    os.makedirs(os.path.join(save_dir, 'mask'), exist_ok=True)
    os.makedirs(os.path.join(save_dir, 'seg'), exist_ok=True)
    os.makedirs(os.path.join(save_dir, 'img'), exist_ok=True)

    #print(mask_list)
    print(img.shape)

(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024, 3)
(768, 1024

In [45]:
def computeR(target_pixel, img_center, fov_x, W):
    # compute transformation matrix (Base Camera to RoI Camera, !!Rotation Only)
    # target_pixel: RoI center on base image (u, v)
    # img_center: image center pixel in base image (cx, cy)
    # fov_x: x fov for base image
    # W: width of base image
    
    view_z = np.array([[target_pixel[0]-img_center[0],
                        target_pixel[1]-img_center[1],
                        W/(2*math.tan(math.radians(fov_x/2)))]])
    view_z = view_z / np.linalg.norm(view_z)
    axis_y = np.array([[0, 1, 0]])
    view_x = np.cross(axis_y, view_z)
    view_y = np.cross(view_z, view_x)
    
    R = np.linalg.inv(np.concatenate([view_x.T, view_y.T, view_z.T], 1))
    
    return R


def computeK(fov, img_size, img_center):
    # compute intrinsic
    # fov: (fov_y, fov_x)
    # img_size: (H, W)
    # img_center: center pixel (cx, cy)
    
    fx = img_size[1] / (2 * math.tan(math.radians(fov[1]/2)))
    fy = img_size[0] / (2 * math.tan(math.radians(fov[0]/2)))
    
    K = np.array([[fx, 0, img_center[0]],
                  [0, fy, img_center[1]],
                  [0, 0, 1]])
    
    return K


def transformation(img, target_pixel, patch_size):
    # target_pixel: RoI center on base image (u, v)
    
    base_shape = img.shape[:2]
    fov_y_base = 2 * math.degrees(math.atan(math.tan(math.radians(fov_x_base/2)) * base_shape[0]/base_shape[1]))
    new_fov_x = 2 * math.degrees(math.atan(math.tan(math.radians(fov_x_base/2)) * patch_size/base_shape[1]))
    new_fov_y = new_fov_x

    # compute rotatinon matrix and intrinsic matrix
    R = computeR(target_pixel=target_pixel, img_center=(base_shape[1]//2, base_shape[0]//2), fov_x=fov_x_base, W=base_shape[1])
    K1 = computeK(fov=(fov_y_base, fov_x_base), img_size=base_shape, img_center=(base_shape[1]//2, base_shape[0]//2))
    K2 = computeK(fov=(new_fov_y, new_fov_x), img_size=(patch_size, patch_size), img_center=(patch_size//2, patch_size//2))

    H = K2 @ R @ np.linalg.inv(K1)
    H = H / H[2, 2]
              
    dst = cv2.warpPerspective(img, H, (patch_size, patch_size))
    
    return dst


In [65]:
import os
origin_path = "../../data/DenseLeaves/bush/"  # name of base image (bush)
imgs = os.listdir(origin_path)
for file in imgs:
    im_name = os.path.splitext(file)[0]
    # print(im_name)
    save_dir = "./test/crop_sam/{}".format(im_name)

    img = cv2.imread("../../data/DenseLeaves/bush/{}.png".format(im_name))
    #mask_list = glob.glob("./test/inst/{}/*.png".format(im_name))
    mask_list = glob.glob("../../data/DenseLeaves/mask/{}/*.png".format(im_name))
    mask_list.sort()

    os.makedirs(os.path.join(save_dir, 'mask'), exist_ok=True)
    os.makedirs(os.path.join(save_dir, 'seg'), exist_ok=True)
    os.makedirs(os.path.join(save_dir, 'img'), exist_ok=True)

    #print(mask_list)
    
    pad_img = np.pad(img, ((patch_size//2, patch_size//2), (patch_size//2, patch_size//2), (0, 0)))

    count = 1
    data = dict()
    for i in mask_list:
        
        mask = cv2.imread(i)
        pad_mask = np.pad(mask, ((patch_size//2, patch_size//2), (patch_size//2, patch_size//2), (0, 0)))
        
        # compute centroid of mask (target pixel)
        mu = cv2.moments(pad_mask[:, :, 0], False)
        x, y = int(mu["m10"]/mu["m00"]) , int(mu["m01"]/mu["m00"])
        
        new_img = transformation(pad_img, (x, y), patch_size)
        new_mask = transformation(pad_mask, (x, y), patch_size)
        
        new_seg = new_mask//255 * new_img
        cv2.imwrite(os.path.join(save_dir, 'mask/{:03}.png'.format(count)), new_mask[:, :, 0])
        cv2.imwrite(os.path.join(save_dir, 'img/{:03}.png'.format(count)), new_img)
        cv2.imwrite(os.path.join(save_dir, 'seg/{:03}.png'.format(count)), new_seg.astype(np.uint8))
        
        data['{:03}'.format(count)] = {'x': x - patch_size//2, 'y': y - patch_size//2}
        count += 1
        
    with open(os.path.join(save_dir, "img.json"), "w") as f:
        json.dump(data, f)
