Preparing dam data for darknet format:

This script has two inputs:
    - not_a_dam_images.zip
    - dam_images.zip
and two outputs (directories)
    - images
    - labels

The input zip files should be in the same directory

Each output image file will correspond to a label file, which is in darknet format (class, center x, center y, width, height)

x y w h are all normalized (between 0 and 1) relative to image dimensions

In [1]:
# import required libraries

import os
import shutil
import zipfile as zf
import json
import numpy as np
from matplotlib import image as mpimg

In [4]:
# unzip files

not_dams = zf.ZipFile('not_a_dam_images.zip', 'r')
not_dams.extractall()
not_dams.close()

dams = zf.ZipFile('dam_images.zip', 'r')
dams.extractall()
dams.close()

In [5]:
# function to move files of certain type to other folders

def create_new_folder (src, dst, filetype):
    
    # create destination folder
    os.mkdir(dst)
    
    # move files of specified format to destination folder
    for filename in os.listdir(src):
        if filename.endswith(filetype):
            os.rename(src + '/' + filename, dst + '/' + filename)

In [6]:
# there are four types of files in these folders: xml, png, png w/ bboxes, and json
# darknet needs only the images (png) and the bbox labels (json)

# move png files (not _bb.png files) to new folder
create_new_folder ('dam_images', 'dam_png', '.png') 
create_new_folder ('dam_png', 'dam_bb_images', '_bb.png')
create_new_folder ('not_a_dam_images', 'not_a_dam_png', '.png') 
create_new_folder ('not_a_dam_png', 'not_a_dam_bb_images', '_bb.png') 

# move json files to new folder
create_new_folder ('dam_images', 'dam_bboxes', '.json')
create_new_folder ('not_a_dam_images', 'not_a_dam_bboxes', '.json')

# create new folder for images and labels
os.mkdir('images')
os.mkdir('labels')

In [7]:
def create_images_labels (png_folder, bbox_folder, classtype): 
    
    """ 
    Takes in folder of dam pngs, folder of bounding box json files, and class type (dam or not_dam)
    Normalizes json data to darknet format (center x, center y, bbox width, bbox height)
    Creates new directories in darknet format
    """
    
    for png in os.listdir(png_folder):
       
        # read in image
        img = mpimg.imread(png_folder + '/' + png)

        # find width and height
        img_w = img.shape[0]
        img_h = img.shape[1]
        
        # find matching bounding box json file
        for item in os.listdir(bbox_folder):
            if item[:10] == png[:10]:
                bbox = item

        # read json bounding box coordinates
        with open(bbox_folder + '/' + bbox) as json_file:
            data = json.load(json_file)
            coords = data['pixel_bounding_box']

        # normalize to x-center, y-center, width, and height of bbox
        avg_x = (coords[2] + coords[0]) / (2 * img_w)
        avg_y = (coords[1] + coords[3]) / (2 * img_h)
        nrm_w = (coords[2] - coords[0]) / img_w
        nrm_h = (coords[1] - coords[3]) / img_h
        nrm_xywh = np.array([avg_x, avg_y, nrm_w, nrm_h])

        # create new folder for labels
        shutil.copyfile(bbox_folder + '/' + bbox, 'labels/' + bbox[:-5] + '.txt')    

        # write new label file and move to new folder
        if classtype == 'dam':
            label_str = '0 ' + str('%.6f'%nrm_xywh[0]) + ' ' + str('%.6f'%nrm_xywh[1]) + ' ' + str('%.6f'%nrm_xywh[2]) + ' ' + str('%.6f'%nrm_xywh[3])
        else:
            label_str = ''
        shutil.copyfile(bbox_folder + '/' + bbox, 'labels/' + bbox[:-5] + '.txt')   
        file = open('labels' + '/' + bbox[:-5] + '.txt', 'w')
        file.write(label_str)
        file.close()

        # move images to new folder
        shutil.copyfile(png_folder + '/' + png, 'images/' + png)

In [8]:
# create the image and label files
create_images_labels('dam_png', 'dam_bboxes', 'dam')
create_images_labels('not_a_dam_png', 'not_a_dam_bboxes', 'not_dam')

In [7]:
# TODOS
# X delete unwanted directories
# - create test and train folders
# - split test and train data into two folders
# X create .txt files for test and train data file paths
# X~ create .names file
# X~ create .data file
# X~ download cfg file
# X download pre-trained weights

In [25]:
# delete unwanted directories
delete_folders = ['dam_bb_images', 'dam_bboxes', 'dam_images', 'dam_png', 'not_a_dam_bb_images', 'not_a_dam_bboxes',
                 'not_a_dam_images', 'not_a_dam_png']
for folder in delete_folders:
    shutil.rmtree(folder)

AttributeError: module 'shutil' has no attribute 'remove'

In [23]:
# create .txt files for test adn train data file paths

def create_txt_file (split):
    files = os.listdir('images/' + split)
    cwd = os.getcwd()
    txt = open(cwd + '/' + split + '_images.txt', 'w')
    for file in files:
        if file != '.ipynb_checkpoints':
            txt.write(cwd + '/images/' + split + '/' + file + '\n')
    txt.close()

In [24]:
create_txt_file('test')
create_txt_file('train')