In [17]:
import h5py
import cv2 as cv

In [5]:
def get_img_name(f, idx=0):
    names = f['digitStruct/name']
    img_name = ''.join(map(chr, f[names[idx][0]][()].flatten()))
    return(img_name)

In [6]:
def get_img_boxes(f, idx=0):
    """
    get the 'height', 'left', 'top', 'width', 'label' of bounding boxes of an image
    :param f: h5py.File
    :param idx: index of the image
    :return: dictionary
    """
    bboxs = f['digitStruct/bbox']
    box = f[bboxs[idx][0]]
    meta = { key : [] for key in box.keys()}

    for key in box.keys():
        if box[key].shape[0] == 1:
            meta[key].append(int(box[key][0][0]))
        else:
            for i in range(box[key].shape[0]):
                meta[key].append(int(f[box[key][i][0]][()].item()))

    return meta

In [22]:
def create_annot_file(f, path, idx=0):
    # get image name and bounding info
    name = get_img_name(f, idx)
    boxes = get_img_boxes(f, idx)
    
    # get dimensions of image
    try:
        (h_img, w_img) = cv.imread(path + name).shape[:2]
    except:
        print(f"ERROR: Could not open {name} to get dimensions.")
        print("Make sure image is in same directory as digitStruct.mat")
        print(f"Tried:  {path + name}")
        
    # initialize list for annotations
    annots = []
    
    for i in range(len(boxes['label'])):
        # get original bounding values
        (x, y) = (boxes['left'][i], boxes['top'][i])
        (w, h) = (boxes['width'][i], boxes['height'][i])

        # transform x and y
        centerX = x + (w / 2)
        centerY = y + (h / 2)

        # normalize bounding values
        centerX /= w_img
        centerY /= h_img
        w /= w_img
        h /= h_img

        # get label
        label = boxes['label'][i] if boxes['label'][i] != 10 else 0

        # append annotation in Darknet format to annotation list
        annots.append(f'{label} {centerX} {centerY} {w} {h}\n' )
    
    # write annotations to file 
    annot_file = open(path + name.split('.')[0] + '.txt', 'w')
    annot_file.writelines(annots)
    annot_file.close()

In [23]:
def create_annot_files(path):
    if path[-1] != '/':
        path += '/'
    
    try:
        f = h5py.File(f'{path}digitStruct.mat', mode='r')
    except:
        print("ERROR: Could not open file.  Check path to digitStruct.mat")
        
    for i in range(len(f['digitStruct/name'])):
        create_annot_file(f, path, i)

In [24]:
path = './Data/SVHN/Full/train'

create_annot_files(path)

### Testing on single image
---

In [10]:
annots

['1 0.4117647058823529 0.5 0.14705882352941177 0.8571428571428571\n',
 '7 0.5955882352941176 0.5357142857142857 0.25 0.8571428571428571\n']

In [11]:
img = cv.imread(path + name)
h, w = img.shape[:2]

In [12]:
print(f'({h}, {w})')

(28, 68)


In [13]:
np.random.seed(42)
colors = np.random.randint(0, 255, size=(10, 3), dtype='uint8')

In [14]:
boxes = []
classIDs = []

for annot in annots:
    detection = annot.split(' ')
    detection[-1] = detection[-1][:-2]

    classIDs.append(int(detection[0]))
    box = list(map(float, detection[-4:])) * np.array([w, h, w, h])
    (centerX, centerY, width, height) = box.astype("int")
    x = int(centerX - (width / 2))
    y = int(centerY - (height / 2))
    box = [x, y, int(width), int(height)]
    boxes.append(box)

In [15]:
boxes

[[23, 2, 10, 23], [31, 3, 17, 23]]

In [16]:
classIDs

[1, 7]

In [17]:
for i in range(len(classIDs)):
    (x, y) = (boxes[i][0], boxes[i][1])
    (w, h) = (boxes[i][2], boxes[i][3])
    color = [int(c) for c in colors[classIDs[i]]]
    cv.rectangle(img, (x, y), (x + w, y + h), color, 2)
    text = str(classIDs[i])
    cv.putText(img, text, (x, y - 5), cv.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)

In [18]:
# Display the resulting frame
cv.imshow('frame',img)
cv.waitKey(1)
cv.destroyAllWindows()