Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

Init create COCO #2

Merged
merged 2 commits into from Dec 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
@@ -1,3 +1,4 @@
__pycache__/
.qt_for_python/
.vscode/settings.json
data/
34 changes: 9 additions & 25 deletions deep-learning-datasets-maker/split_rs_data.py
Expand Up @@ -21,7 +21,6 @@
* *
***************************************************************************/
"""
from numpy import double
from qgis.PyQt.QtCore import QSettings, QTranslator, QCoreApplication
from qgis.PyQt.QtGui import QIcon
from qgis.PyQt.QtWidgets import QAction, QFileDialog, QTabWidget
Expand All @@ -44,7 +43,7 @@
import os
import os.path as osp
from .utils import *
from .utils.COCO import *
from .utils.COCO import clip_from_file, slice, from_mask_to_coco

# import argparse

Expand Down Expand Up @@ -379,36 +378,21 @@ def mkdir_p(path):
generate_list(args)

if self.dlg.checkBoxCOCO.isChecked():

# COCO Dataset Paths
dataset_COCO = osp.join(dataset_path, "COCO")
mkdir_p(dataset_COCO)
Ras_COCO_path = osp.join(dataset_COCO, "image/")
annotations_COCO_path = osp.join(dataset_COCO, "annotations/")
train_COCO_path = osp.join(dataset_COCO, "train/")
eval_COCO_path = osp.join(dataset_COCO, "eval/")
test_COCO_path = osp.join(dataset_COCO, "test/")

mkdir_p(Ras_COCO_path)
mkdir_p(annotations_COCO_path)
mkdir_p(train_COCO_path)
mkdir_p(eval_COCO_path)
mkdir_p(test_COCO_path)

# img_path = "/".join(ras_path.split("/")[:-1])
# shp_path = "/".join(vec_path.split("/")[:-1])
img_path = ras_path
shp_path = vec_path

# root_path_test = ""
clip_from_file(SplittingSize, dataset_COCO,
img_path, shp_path)
slice(dataset_COCO, train=Training_Set,
eval=Val_Set, test=Testing_Set)
from_mask_to_coco(dataset_COCO, "train",
"image", "annotations")
# from_mask_to_coco(ROOT_DIR, 'eval', "image", "annotations")
# from_mask_to_coco(ROOT_DIR, 'test', "image", "annotations")

# TODO: the cut image is not repeated,
# and the cut image is used to directly generate coco format
clip_from_file(SplittingSize, dataset_COCO, ras_path, vec_path)
slice(dataset_COCO, train=Training_Set, eval=Val_Set, test=Testing_Set)
from_mask_to_coco(dataset_COCO, 'train', "image", "annotations")
from_mask_to_coco(dataset_COCO, 'eval', "image", "annotations")
from_mask_to_coco(dataset_COCO, 'test', "image", "annotations")

iface.messageBar().pushMessage(
"You will find the dataset in " + dataset_path,
Expand Down
4 changes: 1 addition & 3 deletions deep-learning-datasets-maker/utils/COCO/__init__.py
@@ -1,3 +1 @@
from .shape_to_coco import clip_from_file, slice, from_mask_to_coco
from .tif_process import *
from .slice_dataset import *
from .shape_to_coco import clip_from_file, slice, from_mask_to_coco
@@ -1 +1 @@
from .pycococreatortools import resize_binary_mask, close_contour, binary_mask_to_rle, binary_mask_to_polygon, create_image_info, create_annotation_info
from .pycococreatortools import *
21 changes: 11 additions & 10 deletions deep-learning-datasets-maker/utils/COCO/shape_to_coco.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python3

import datetime
import json
import os
Expand All @@ -22,11 +23,11 @@
clip_size = 512

INFO = {
"description": "Greenhouse Dataset",
"description": "Image Dataset",
"url": "",
"version": "0.1.0",
"year": 2019,
"contributor": "DuncanChen",
"year": 2021,
"contributor": "",
"date_created": datetime.datetime.utcnow().isoformat(' ')
}

Expand All @@ -41,7 +42,7 @@
CATEGORIES = [
{
'id': 1,
'name': 'greenhouse',
'name': 'image',
'supercategory': 'building',
},
]
Expand Down Expand Up @@ -91,7 +92,7 @@ def from_mask_to_coco(root, MARK, IMAGE, ANNOTATION):
# go through each image
for image_filename in image_files:
image = Image.open(image_filename)
image_info = pycococreatortools.create_image_info(
image_info = create_image_info(
image_id, os.path.basename(image_filename), image.size)
coco_output["images"].append(image_info)

Expand All @@ -109,7 +110,7 @@ def from_mask_to_coco(root, MARK, IMAGE, ANNOTATION):
binary_mask = np.asarray(Image.open(annotation_filename)
.convert('1')).astype(np.uint8)

annotation_info = pycococreatortools.create_annotation_info(
annotation_info = create_annotation_info(
segmentation_id, image_id, category_info, binary_mask,
image.size, tolerance=2)

Expand All @@ -120,17 +121,17 @@ def from_mask_to_coco(root, MARK, IMAGE, ANNOTATION):

image_id = image_id + 1

with open('{}/instances_greenhouse_{}2019.json'.format(ROOT_DIR, MARK), 'w') as output_json_file:
with open('{}/instances_image_{}2019.json'.format(ROOT_DIR, MARK), 'w') as output_json_file:
json.dump(coco_output, output_json_file)
else:
print(ROOT_DIR + ' does not exit!')

# def main():
def main():
clip_from_file(clip_size, ROOT, img_path, shp_path)
slice(ROOT_DIR, train=0.6, eval=0.2, test=0.2)
from_mask_to_coco(ROOT_DIR, 'train', "image", "annotations")
from_mask_to_coco(ROOT_DIR, 'eval', "image", "annotations")
from_mask_to_coco(ROOT_DIR, 'test', "image", "annotations")

# if __name__ == "__main__":
# main()
if __name__ == "__main__":
main()
150 changes: 75 additions & 75 deletions deep-learning-datasets-maker/utils/COCO/slice_dataset.py
@@ -1,75 +1,75 @@
import os
import numpy as np
import shutil
import re
import fnmatch

ann_path = 'annotations'
img_path = 'image'

def filter_for_annotations(root, files, image_filename):
# file_types = ['*.png']
file_types = ['*.tif']
file_types = r'|'.join([fnmatch.translate(x) for x in file_types])
basename_no_extension = os.path.splitext(os.path.basename(image_filename))[0]
# file_name_prefix = basename_no_extension + '.*'
files = [os.path.join(root, f) for f in files]
files = [f for f in files if re.match(file_types, f)]
# files = [f for f in files if re.match(file_name_prefix, os.path.splitext(os.path.basename(f))[0])]
files = [f for f in files if basename_no_extension == os.path.splitext(os.path.basename(f))[0].split('_', 1)[0]]

return files

def copy_data(input_path, id, num, mark = 'train'):
if num != 0:
list = os.listdir(input_path + '/' + img_path)
ann_list = os.listdir(input_path + '/' + ann_path)
if not os.path.isdir(input_path + '/' + mark + '/' + img_path):
os.makedirs(input_path + '/' + mark + '/' + img_path)
if not os.path.isdir(input_path + '/' + mark + '/' + ann_path):
os.makedirs(input_path + '/' + mark + '/' + ann_path)

for i in range(num):
shutil.copy(input_path + '/' + img_path + '/' + list[id[i]], input_path + '/' + mark + '/' + img_path
+ '/' + list[id[i]])
print('From src: ' + img_path + '/' + list[id[i]] + ' =>dst:' + '/' + mark + '/' + img_path
+ '/' + list[id[i]])
annotation_files = filter_for_annotations(input_path, ann_list, list[id[i]])
for j in range(len(annotation_files)):
shutil.copy(input_path + '/' + ann_path + '/' + os.path.basename(annotation_files[j]),
input_path + '/' + mark + '/' + ann_path + '/' + os.path.basename(annotation_files[j]))

f = open(input_path + '/' + mark + '/' + mark + '.txt', 'w')
f.write(str(id))
f.close()

def slice(input_path, train=0.8, eval=0.2, test=0.0):
"""
slice the dataset into training, eval and test sub_dataset.
:param input_path: path to the original dataset.
:param train: the ratio of the training subset.
:param eval: the ratio of the eval subset.
:param test: the ratio of the test subset.
"""
list = os.listdir(input_path + '/' + img_path)
ann_list = os.listdir(input_path + '/' + ann_path)
num_list = len(list)
n_train = int(num_list * train)
if test == 0:
n_eval = num_list - n_train
n_test = 0
else:
n_eval = int(num_list * eval)
n_test = num_list - n_train - n_eval

img_id = np.arange(num_list)
np.random.shuffle(img_id)
train_id, eval_id, test_id = img_id[:n_train], img_id[n_train: n_train+n_eval], img_id[n_train+n_eval:]
copy_data(input_path, train_id, n_train, 'train')
copy_data(input_path, eval_id, n_eval, 'eval')
copy_data(input_path, test_id, n_test, 'test')

if __name__ == '__main__':
input_path = r'./example_data/original_data/dataset'
# slice(input_path, train=0.6, eval=0.2, test=0.2)
slice(input_path)
import os
import numpy as np
import shutil
import re
import fnmatch
ann_path = 'annotations'
img_path = 'image'
def filter_for_annotations(root, files, image_filename):
# file_types = ['*.png']
file_types = ['*.tif']
file_types = r'|'.join([fnmatch.translate(x) for x in file_types])
basename_no_extension = os.path.splitext(os.path.basename(image_filename))[0]
# file_name_prefix = basename_no_extension + '.*'
files = [os.path.join(root, f) for f in files]
files = [f for f in files if re.match(file_types, f)]
# files = [f for f in files if re.match(file_name_prefix, os.path.splitext(os.path.basename(f))[0])]
files = [f for f in files if basename_no_extension == os.path.splitext(os.path.basename(f))[0].split('_', 1)[0]]
return files
def copy_data(input_path, id, num, mark = 'train'):
if num != 0:
list = os.listdir(input_path + '/' + img_path)
ann_list = os.listdir(input_path + '/' + ann_path)
if not os.path.isdir(input_path + '/' + mark + '/' + img_path):
os.makedirs(input_path + '/' + mark + '/' + img_path)
if not os.path.isdir(input_path + '/' + mark + '/' + ann_path):
os.makedirs(input_path + '/' + mark + '/' + ann_path)
for i in range(num):
shutil.copy(input_path + '/' + img_path + '/' + list[id[i]], input_path + '/' + mark + '/' + img_path
+ '/' + list[id[i]])
print('From src: ' + img_path + '/' + list[id[i]] + ' =>dst:' + '/' + mark + '/' + img_path
+ '/' + list[id[i]])
annotation_files = filter_for_annotations(input_path, ann_list, list[id[i]])
for j in range(len(annotation_files)):
shutil.copy(input_path + '/' + ann_path + '/' + os.path.basename(annotation_files[j]),
input_path + '/' + mark + '/' + ann_path + '/' + os.path.basename(annotation_files[j]))
f = open(input_path + '/' + mark + '/' + mark + '.txt', 'w')
f.write(str(id))
f.close()
def slice(input_path, train=0.8, eval=0.2, test=0.0):
"""
slice the dataset into training, eval and test sub_dataset.
:param input_path: path to the original dataset.
:param train: the ratio of the training subset.
:param eval: the ratio of the eval subset.
:param test: the ratio of the test subset.
"""
list = os.listdir(input_path + '/' + img_path)
ann_list = os.listdir(input_path + '/' + ann_path)
num_list = len(list)
n_train = int(num_list * train)
if test == 0:
n_eval = num_list - n_train
n_test = 0
else:
n_eval = int(num_list * eval)
n_test = num_list - n_train - n_eval
img_id = np.arange(num_list)
np.random.shuffle(img_id)
train_id, eval_id, test_id = img_id[:n_train], img_id[n_train: n_train+n_eval], img_id[n_train+n_eval:]
copy_data(input_path, train_id, n_train, 'train')
copy_data(input_path, eval_id, n_eval, 'eval')
copy_data(input_path, test_id, n_test, 'test')
if __name__ == '__main__':
input_path = r'./example_data/original_data/dataset'
# slice(input_path, train=0.6, eval=0.2, test=0.2)
slice(input_path)