In [1]:
import torch
import torch.nn as nn

def extract_glimpse(img_batch, size, offsets, centered=True, normalized=True):
    W, H = img_batch.size(-1), img_batch.size(-2)

    if normalized and centered:
        offsets = (offsets+1) * offsets.new_tensor([W/2,H/2])
    elif normalized:
        offsets = offsets * offsets.new_tensor([W,H])
    elif centered:
        raise ValueError(f'Invalid parameter that offsets centered but now normalized')

    h, w = size
    xs = torch.arange(0, w, dtype=img_batch.dtype, device=img_batch.device) - (w-1)/2.0
    ys = torch.arange(0, h, dtype=img_batch.dtype, device=img_batch.device) - (h-1)/2.0

    vy, vx = torch.meshgrid(ys, xs)
    grid = torch.stack([vx, vy], dim=-1)

    offsets_grid = offsets[:,None, None, :] + grid[None, ...]
    offsets_grid = (
            offsets_grid - offsets_grid.new_tensor([W/2,H/2])) / offsets_grid.new_tensor([W/2,H/2])

    return torch.nn.functional.grid_sample(
            img_batch, offsets_grid, mode='bilinear', align_corners=False, padding_mode='zeros')

def extract_multiple_glimpse(img_batch, size, offsets, centered=True, normalized=True):
    patches = []

    for i in range(offsets.size(-2)):
        patch = extract_glimpse(
                img_batch, size, offsets[:, i, :], centered, normalized)
        patches.append(patch)

    return torch.stack(patches, dim=1)

In [15]:
img_batch = torch.zeros([8,3,64,64], dtype=torch.float32)
glimpse_size = tuple([3,3])
offsets = torch.ones([8,2],dtype=torch.float32)

In [19]:
a=torch.zeros([8,2], dtype=torch.float32)

In [21]:
torch.shape(a)

AttributeError: module 'torch' has no attribute 'shape'

In [18]:
extract_multiple_glimpse(img_batch, glimpse_size, offsets)

IndexError: too many indices for tensor of dimension 2

In [1]:
import xml.etree.ElementTree as ET
tree = ET.parse('test.xml')
root = tree.getroot()

In [20]:
root.tag

'HRSC_Image'

In [3]:
for child in root:
    print(child.tag, child.attrib)

Img_ID {}
Place_ID {}
Source_ID {}
Img_NO {}
Img_FileName {}
Img_FileFmt {}
Img_Date {}
Img_CusType {}
Img_Des {}
Img_Location {}
Img_SizeWidth {}
Img_SizeHeight {}
Img_SizeDepth {}
Img_Resolution {}
Img_Resolution_Layer {}
Img_Scale {}
Img_SclPxlNum {}
segmented {}
Img_Havemask {}
Img_MaskFileName {}
Img_MaskFileFmt {}
Img_MaskType {}
Img_SegFileName {}
Img_SegFileFmt {}
Img_Rotation {}
Annotated {}
HRSC_Objects {}


In [None]:
img_w, img_h, obj_loc(object_x_start, object_y_start, object_x_end, object_y_end)

In [15]:
root.find('Img_SizeWidth').text
root.find('Img_SizeHeight').text

'836'

In [5]:
coordinates

[{'x_min': 522, 'y_min': 138, 'x_max': 591, 'y_max': 393},
 {'x_min': 943, 'y_min': 466, 'x_max': 1013, 'y_max': 720}]

In [7]:
import xml.etree.ElementTree as ET
def bboxParser(fileName):
    root = ET.parse(fileName + '.xml')
    coordinates = []
    for obj in root.iter('HRSC_Object'):
        coordinate = {}
        coordinate['x_min'] = int(obj.find('box_xmin').text)
        coordinate['y_min'] = int(obj.find('box_ymin').text)
        coordinate['x_max'] = int(obj.find('box_xmax').text)
        coordinate['y_max'] = int(obj.find('box_ymax').text)
        coordinates.append(coordinate)
        
    return int(root.find('Img_SizeWidth').text), int(root.find('Img_SizeHeight').text), coordinates

In [8]:
w,h,a = bboxParser('test')

In [6]:
a

[{'x_min': 522, 'y_min': 138, 'x_max': 591, 'y_max': 393},
 {'x_min': 943, 'y_min': 466, 'x_max': 1013, 'y_max': 720}]

In [9]:
w

1181

In [10]:
h

836