Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
464 changed files
with
2,273 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
""" | ||
To train with crowd and far regions are masked. | ||
This script will generate images to train a detector with crowd/far regions masked. | ||
DISCLAIMER: It will take time. | ||
""" | ||
import os, argparse | ||
from glob import glob | ||
from tqdm import tqdm | ||
from skimage import io | ||
from analysis.annotations import read_vatic, find_boundary | ||
|
||
CADP_IMAGE_HOME="/media/tuananhn/903a7d3c-0ce5-444b-ad39-384fcda231ed/CADP/extracted_frames/" | ||
CADP_MASK_HOME="/media/tuananhn/903a7d3c-0ce5-444b-ad39-384fcda231ed/CADP/masked_frames/" | ||
|
||
|
||
def get_crowd_far(vatic_file, output=None): | ||
if output is None: | ||
output = {} | ||
annotations = read_vatic(vatic_file) | ||
vid = os.path.basename(vatic_file).split(".")[0] | ||
boundary = find_boundary(annotations) | ||
for trackId in annotations: | ||
tracklet = annotations[trackId] | ||
label = tracklet["label"] | ||
if label in ["CrowdRegion", "FarRegion"]: | ||
for fid in tracklet["frames"]: | ||
img_path = os.path.join("{:06d}".format(int(vid)), "{}.jpg".format(fid)) | ||
if boundary[1] == -1 or fid in range(boundary[0], boundary[1] + 1): | ||
frame = tracklet["frames"][fid] | ||
if frame["visible"]: | ||
y1, x1, y2, x2 = frame["box"] | ||
if img_path not in output: | ||
output[img_path] = [[x1,y1,x2,y2]] | ||
else: | ||
output[img_path].append([x1,y1,x2,y2]) | ||
return output | ||
|
||
|
||
def create_crowd_far(anno_dir): | ||
txtfiles = glob(anno_dir + "/*.txt") | ||
for txtfile in tqdm(txtfiles): | ||
output = get_crowd_far(txtfile) | ||
vid = os.path.basename(txtfile).split(".")[0] | ||
if not os.path.exists(os.path.join(CADP_MASK_HOME, "{:06d}".format(int(vid)))): | ||
os.makedirs(os.path.join(CADP_MASK_HOME, "{:06d}".format(int(vid)))) | ||
for img_path in output: | ||
img = io.imread(os.path.join(CADP_IMAGE_HOME, img_path)) | ||
for crowd_far in output[img_path]: | ||
x1, y1, x2, y2 = crowd_far | ||
img[y1:y2, x1:x2, :] = 0 # masking the region | ||
io.imsave(os.path.join(CADP_MASK_HOME, img_path), img) | ||
return | ||
|
||
|
||
def parse_arguments(): | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument("--anno_dir", type=str) | ||
return parser.parse_args() | ||
|
||
|
||
if __name__ == "__main__": | ||
args = parse_arguments() | ||
create_crowd_far(anno_dir=args.anno_dir) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
import csv, os | ||
import argparse | ||
from glob import glob | ||
from analysis.annotations import read_vatic, find_boundary | ||
CADP_IMAGE_HOME="/media/tuananhn/903a7d3c-0ce5-444b-ad39-384fcda231ed/CADP/extracted_frames/" | ||
CADP_MASK_HOME="/media/tuananhn/903a7d3c-0ce5-444b-ad39-384fcda231ed/CADP/masked_frames/" | ||
|
||
|
||
def generate_csv(writer, vatic_file, use_mask=True): | ||
annotations = read_vatic(vatic_file) | ||
vid = os.path.basename(vatic_file).split(".")[0] | ||
boundary = find_boundary(annotations) | ||
for trackId in annotations: | ||
tracklet = annotations[trackId] | ||
label = tracklet["label"] | ||
if label not in ["Separator", "CrowdRegion", "FarRegion"]: | ||
for fid in tracklet["frames"]: | ||
img_path = os.path.join(CADP_IMAGE_HOME, "{:06d}".format(int(vid)), "{}.jpg".format(fid)) | ||
if use_mask and os.path.exists(os.path.join(CADP_MASK_HOME, "{:06d}".format(int(vid)), "{}.jpg".format(fid))): | ||
img_path = os.path.join(CADP_MASK_HOME, "{:06d}".format(int(vid)), "{}.jpg".format(fid)) | ||
if boundary[1] == -1 or fid in range(boundary[0], boundary[1]+1): | ||
frame = tracklet["frames"][fid] | ||
if frame["visible"]: | ||
y1, x1, y2, x2 = frame["box"] | ||
writer.writerow([img_path, x1, y1, x2, y2, label]) | ||
return | ||
|
||
|
||
def write_csv(anno_dir, csv_output, use_mask=True): | ||
with open(csv_output, "w") as f: | ||
writer = csv.writer(f, delimiter=',', | ||
quotechar='|', quoting=csv.QUOTE_MINIMAL) | ||
txtfiles = glob(anno_dir+"/*.txt") | ||
for txtfile in txtfiles: | ||
generate_csv(writer, txtfile, use_mask) | ||
f.close() | ||
|
||
|
||
def parse_arguments(): | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument("--anno_dir", type=str) | ||
parser.add_argument("--csv_output", type=str) | ||
parser.add_argument("--use_mask", type=bool, default=True) | ||
return parser.parse_args() | ||
|
||
|
||
if __name__ == "__main__": | ||
args = parse_arguments() | ||
write_csv(anno_dir=args.anno_dir, csv_output=args.csv_output, use_mask=args.use_mask) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
import xml.etree.ElementTree as ET | ||
import cv2 | ||
import csv, os | ||
import argparse | ||
from tqdm import tqdm | ||
|
||
""" | ||
Sample output: | ||
<annotation> | ||
<folder>GeneratedData_Train</folder> | ||
<filename>000001.png</filename> | ||
<path>/my/path/GeneratedData_Train/000001.png</path> | ||
<source> | ||
<database>Unknown</database> | ||
</source> | ||
<size> | ||
<width>224</width> | ||
<height>224</height> | ||
<depth>3</depth> | ||
</size> | ||
<segmented>0</segmented> | ||
<object> | ||
<name>21</name> | ||
<pose>Frontal</pose> | ||
<truncated>0</truncated> | ||
<difficult>0</difficult> | ||
<occluded>0</occluded> | ||
<bndbox> | ||
<xmin>82</xmin> | ||
<xmax>172</xmax> | ||
<ymin>88</ymin> | ||
<ymax>146</ymax> | ||
</bndbox> | ||
</object> | ||
</annotation> | ||
""" | ||
|
||
CADP_CLASSES = {"bg": 0, "Car": 3, "Bus": 5, "Others": 6, "Person": 1, "Two-wheeler": 2, "Three-wheeler": 4} | ||
|
||
|
||
def read_csv(csv_file_path): | ||
annotations = {} | ||
with open(csv_file_path) as f: | ||
r = csv.reader(f, delimiter=",") | ||
for row in r: | ||
# print(row) | ||
if row[0] not in annotations: | ||
annotations[row[0]] = {"objects": [{"name": row[-1], "x1": row[1], "y1": row[2], "x2": row[3], "y2": row[4]}]} | ||
else: | ||
annotations[row[0]]["objects"].append({"name": row[-1], "x1": row[1], "y1": row[2], "x2": row[3], "y2": row[4]}) | ||
f.close() | ||
return annotations | ||
|
||
|
||
def build_xml(folder, filename, path, objects): | ||
top = ET.Element("annotation") | ||
folder_tag = ET.SubElement(top, "folder") | ||
folder_tag.text = folder | ||
fn_tag = ET.SubElement(top, "filename") | ||
fn_tag.text = filename | ||
path_tag = ET.SubElement(top, "path") | ||
path_tag.text = path | ||
source_tag = ET.SubElement(top, "source") | ||
db_tag = ET.SubElement(source_tag, "database") | ||
db_tag.text = "CADP" | ||
img = cv2.imread(path) | ||
h,w,c = img.shape | ||
size_tag = ET.SubElement(top, "size") | ||
w_tag = ET.SubElement(size_tag, "width") | ||
w_tag.text = str(w) | ||
h_tag = ET.SubElement(size_tag, "height") | ||
h_tag.text = str(h) | ||
d_tag = ET.SubElement(size_tag, "depth") | ||
d_tag.text = str(c) | ||
seg_tag = ET.SubElement(top, "segmented") | ||
seg_tag.text = "0" | ||
for obj in objects: | ||
obj_tag = ET.SubElement(top, "object") | ||
name_tag = ET.SubElement(obj_tag, "name") | ||
name_tag.text = str(CADP_CLASSES[obj["name"]]) | ||
pose_tag = ET.SubElement(obj_tag, "pose") | ||
pose_tag.text = "Unknown" | ||
trunc_tag = ET.SubElement(obj_tag, "truncated") | ||
trunc_tag.text = "0" | ||
diff_tag = ET.SubElement(obj_tag, "difficult") | ||
diff_tag.text = "0" | ||
occluded_tag = ET.SubElement(obj_tag, "occluded") | ||
occluded_tag.text = "0" | ||
bndbox_tag = ET.SubElement(obj_tag, "bndbox") | ||
xmin_tag = ET.SubElement(bndbox_tag, "xmin") | ||
xmin_tag.text = str(obj["x1"]) | ||
xmax_tag = ET.SubElement(bndbox_tag, "xmax") | ||
xmax_tag.text = str(obj["x2"]) | ||
ymin_tag = ET.SubElement(bndbox_tag, "ymin") | ||
ymin_tag.text = str(obj["y1"]) | ||
ymax_tag = ET.SubElement(bndbox_tag, "ymax") | ||
ymax_tag.text = str(obj["y2"]) | ||
return top | ||
|
||
|
||
def parse_arguments(): | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument("--csv", type=str, help="Path to csv annotation file.") | ||
parser.add_argument("--output_xml_dir", type=str, help="Where to put the PASCAL VOC annotations.") | ||
parser.add_argument("--listfile", type=str, help="List of files.") | ||
return parser.parse_args() | ||
|
||
|
||
def main(csv_file_path, output_xml_dir, listfile): | ||
if not os.path.exists(output_xml_dir): | ||
os.makedirs(output_xml_dir) | ||
annotations = read_csv(csv_file_path) | ||
f = open(listfile, "w") | ||
for fp in tqdm(annotations): | ||
objects = annotations[fp]["objects"] | ||
fn = os.path.basename(fp) | ||
foldername = fp.split("/")[-2] | ||
top = build_xml(folder="", filename=fn, path=fp, objects=objects) | ||
if not os.path.exists("{}/{}".format(output_xml_dir, foldername)): | ||
os.makedirs("{}/{}".format(output_xml_dir, foldername)) | ||
ET.ElementTree(top).write("{}/{}/{}.xml".format(output_xml_dir, foldername, fn.split(".")[0])) | ||
f.write("{} {}\n".format(os.path.abspath(fp), os.path.abspath("{}/{}/{}.xml".format(output_xml_dir, foldername, fn.split(".")[0])))) | ||
f.close() | ||
return | ||
|
||
|
||
if __name__ == '__main__': | ||
args = parse_arguments() | ||
main(args.csv, args.output_xml_dir, args.listfile) |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
from keras import backend as K | ||
|
||
|
||
class Config: | ||
def __init__(self): | ||
self.verbose = True | ||
|
||
self.network = 'resnet50' | ||
|
||
# setting for data augmentation | ||
self.use_horizontal_flips = False | ||
self.use_vertical_flips = False | ||
self.rot_90 = False | ||
|
||
# anchor box scales | ||
self.anchor_box_scales = [128, 256, 512] | ||
|
||
# anchor box ratios | ||
self.anchor_box_ratios = [[1, 1], [1, 2], [2, 1]] | ||
|
||
# size to resize the smallest side of the image | ||
self.im_size = 600 | ||
|
||
# image channel-wise mean to subtract | ||
self.img_channel_mean = [103.939, 116.779, 123.68] | ||
self.img_scaling_factor = 1.0 | ||
|
||
# number of ROIs at once | ||
self.num_rois = 4 | ||
|
||
# stride at the RPN (this depends on the network configuration) | ||
self.rpn_stride = 16 | ||
|
||
self.balanced_classes = False | ||
|
||
# scaling the stdev | ||
self.std_scaling = 4.0 | ||
self.classifier_regr_std = [8.0, 8.0, 4.0, 4.0] | ||
|
||
# overlaps for RPN | ||
self.rpn_min_overlap = 0.3 | ||
self.rpn_max_overlap = 0.7 | ||
|
||
# overlaps for classifier ROIs | ||
self.classifier_min_overlap = 0.1 | ||
self.classifier_max_overlap = 0.5 | ||
|
||
# placeholder for the class mapping, automatically generated by the parser | ||
self.class_mapping = None | ||
|
||
# location of pretrained weights for the base network | ||
# weight files can be found at: | ||
# https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_th_dim_ordering_th_kernels_notop.h5 | ||
# https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5 | ||
|
||
self.model_path = 'model_trained/model_frcnn.vgg.hdf5' | ||
|
||
# params add by me | ||
self.data_dir = '.data/' | ||
self.num_epochs = 3000 | ||
|
||
self.kitti_simple_label_file = 'kitti_simple_label.txt' | ||
|
||
# TODO: this field is set to simple_label txt, which in very simple format like: | ||
# TODO: /path/image_2/000000.png,712.40,143.00,810.73,307.92,Pedestrian, see kitti_simple_label.txt for detail | ||
self.simple_label_file = 'simple_label.txt' | ||
|
||
self.config_save_file = 'config.pickle' |
Oops, something went wrong.