Added Faster R-CNN code

ankitshah009 · Aug 24, 2018 · 3484779 · 3484779
1 parent 81b7b92
commit 3484779
Show file tree

Hide file tree

Showing 464 changed files with 2,273 additions and 14 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,9 @@
+/*.csv
+/*.pickle
+/data
+/*.txt
+/jobs
+/model_trained
 # Created by .ignore support plugin (hsz.mobi)
 ### JetBrains template
 # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm

diff --git a/analysis/annotations.py b/analysis/annotations.py
@@ -1,15 +1,15 @@
-import pandas as pd
-import numpy as np
-import cv2, os
-import glog as log
-from glob import glob
+import colorsys
+import random
 from time import time
 
-from sklearn.model_selection import train_test_split
+import cv2
+import glog as log
 import matplotlib.pyplot as plt
-from matplotlib import patches,  lines
-from matplotlib.patches import Polygon
-import colorsys, random
+import numpy as np
+import os, csv
+import pandas as pd
+from matplotlib import patches
+from sklearn.model_selection import train_test_split
 
 
 def interpolation(data, index_timeseries, method="linear"):
@@ -181,7 +181,7 @@ def find_boundary(annotations):
             separators.append(anno)
     if len(separators) > 2:
         log.info("More than two separators. Only first and last separators are counted.")
-    if len(separators) == 0:
+    if len(separators) < 2: # even with one separator, the whole video will be counted.
         return [0, -1] # the whole video is counted
     # Find the boundary of the segment
     intervals = []
@@ -193,9 +193,7 @@ def find_boundary(annotations):
                 fidx.append(fid)
         intervals.append([min(fidx), max(fidx)])
     intervals = np.array(intervals)
-    if len(separators) == 1:
-        return [0,np.min(intervals[:,0])]
-    return [np.min(intervals[:,0]), np.max(intervals[:,1])]
+    return [np.min(intervals[:,0]), np.max(intervals[:,0])]
 
 
 def random_colors(N, bright=True):
@@ -211,3 +209,5 @@ def random_colors(N, bright=True):
     return colors
 
 
+
+
diff --git a/analysis/cover_crowd_far.py b/analysis/cover_crowd_far.py
@@ -0,0 +1,65 @@
+"""
+To train with crowd and far regions are masked.
+This script will generate images to train a detector with crowd/far regions masked.
+DISCLAIMER: It will take time.
+"""
+import os, argparse
+from glob import glob
+from tqdm import tqdm
+from skimage import io
+from analysis.annotations import read_vatic, find_boundary
+
+CADP_IMAGE_HOME="/media/tuananhn/903a7d3c-0ce5-444b-ad39-384fcda231ed/CADP/extracted_frames/"
+CADP_MASK_HOME="/media/tuananhn/903a7d3c-0ce5-444b-ad39-384fcda231ed/CADP/masked_frames/"
+
+
+def get_crowd_far(vatic_file, output=None):
+    if output is None:
+        output = {}
+    annotations = read_vatic(vatic_file)
+    vid = os.path.basename(vatic_file).split(".")[0]
+    boundary = find_boundary(annotations)
+    for trackId in annotations:
+        tracklet = annotations[trackId]
+        label = tracklet["label"]
+        if label in ["CrowdRegion", "FarRegion"]:
+            for fid in tracklet["frames"]:
+                img_path = os.path.join("{:06d}".format(int(vid)), "{}.jpg".format(fid))
+                if boundary[1] == -1 or fid in range(boundary[0], boundary[1] + 1):
+                    frame = tracklet["frames"][fid]
+                    if frame["visible"]:
+                        y1, x1, y2, x2 = frame["box"]
+                        if img_path not in output:
+                            output[img_path] = [[x1,y1,x2,y2]]
+                        else:
+                            output[img_path].append([x1,y1,x2,y2])
+    return output
+
+
+def create_crowd_far(anno_dir):
+    txtfiles = glob(anno_dir + "/*.txt")
+    for txtfile in tqdm(txtfiles):
+        output = get_crowd_far(txtfile)
+        vid = os.path.basename(txtfile).split(".")[0]
+        if not os.path.exists(os.path.join(CADP_MASK_HOME, "{:06d}".format(int(vid)))):
+            os.makedirs(os.path.join(CADP_MASK_HOME, "{:06d}".format(int(vid))))
+        for img_path in output:
+            img = io.imread(os.path.join(CADP_IMAGE_HOME, img_path))
+            for crowd_far in output[img_path]:
+                x1, y1, x2, y2 = crowd_far
+                img[y1:y2, x1:x2, :] = 0 # masking the region
+            io.imsave(os.path.join(CADP_MASK_HOME, img_path), img)
+    return
+
+
+def parse_arguments():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--anno_dir", type=str)
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = parse_arguments()
+    create_crowd_far(anno_dir=args.anno_dir)
+
+
diff --git a/analysis/generate_csv.py b/analysis/generate_csv.py
@@ -0,0 +1,51 @@
+import csv, os
+import argparse
+from glob import glob
+from analysis.annotations import read_vatic, find_boundary
+CADP_IMAGE_HOME="/media/tuananhn/903a7d3c-0ce5-444b-ad39-384fcda231ed/CADP/extracted_frames/"
+CADP_MASK_HOME="/media/tuananhn/903a7d3c-0ce5-444b-ad39-384fcda231ed/CADP/masked_frames/"
+
+
+def generate_csv(writer, vatic_file, use_mask=True):
+    annotations = read_vatic(vatic_file)
+    vid = os.path.basename(vatic_file).split(".")[0]
+    boundary = find_boundary(annotations)
+    for trackId in annotations:
+        tracklet = annotations[trackId]
+        label = tracklet["label"]
+        if label not in ["Separator", "CrowdRegion", "FarRegion"]:
+            for fid in tracklet["frames"]:
+                img_path = os.path.join(CADP_IMAGE_HOME, "{:06d}".format(int(vid)), "{}.jpg".format(fid))
+                if use_mask and os.path.exists(os.path.join(CADP_MASK_HOME, "{:06d}".format(int(vid)), "{}.jpg".format(fid))):
+                    img_path = os.path.join(CADP_MASK_HOME, "{:06d}".format(int(vid)), "{}.jpg".format(fid))
+                if boundary[1] == -1 or fid in range(boundary[0], boundary[1]+1):
+                    frame = tracklet["frames"][fid]
+                    if frame["visible"]:
+                        y1, x1, y2, x2 = frame["box"]
+                        writer.writerow([img_path, x1, y1, x2, y2, label])
+    return
+
+
+def write_csv(anno_dir, csv_output, use_mask=True):
+    with open(csv_output, "w") as f:
+        writer = csv.writer(f, delimiter=',',
+                            quotechar='|', quoting=csv.QUOTE_MINIMAL)
+        txtfiles = glob(anno_dir+"/*.txt")
+        for txtfile in txtfiles:
+            generate_csv(writer, txtfile, use_mask)
+    f.close()
+
+
+def parse_arguments():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--anno_dir", type=str)
+    parser.add_argument("--csv_output", type=str)
+    parser.add_argument("--use_mask", type=bool, default=True)
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = parse_arguments()
+    write_csv(anno_dir=args.anno_dir, csv_output=args.csv_output, use_mask=args.use_mask)
+
+
diff --git a/analysis/generate_voc_xml.py b/analysis/generate_voc_xml.py
@@ -0,0 +1,129 @@
+import xml.etree.ElementTree as ET
+import cv2
+import csv, os
+import argparse
+from tqdm import tqdm
+
+"""
+Sample output:
+<annotation>
+	<folder>GeneratedData_Train</folder>
+	<filename>000001.png</filename>
+	<path>/my/path/GeneratedData_Train/000001.png</path>
+	<source>
+		<database>Unknown</database>
+	</source>
+	<size>
+		<width>224</width>
+		<height>224</height>
+		<depth>3</depth>
+	</size>
+	<segmented>0</segmented>
+	<object>
+		<name>21</name>
+		<pose>Frontal</pose>
+		<truncated>0</truncated>
+		<difficult>0</difficult>
+		<occluded>0</occluded>
+		<bndbox>
+			<xmin>82</xmin>
+			<xmax>172</xmax>
+			<ymin>88</ymin>
+			<ymax>146</ymax>
+		</bndbox>
+	</object>
+</annotation>
+"""
+
+CADP_CLASSES = {"bg": 0, "Car": 3, "Bus": 5, "Others": 6, "Person": 1, "Two-wheeler": 2, "Three-wheeler": 4}
+
+
+def read_csv(csv_file_path):
+    annotations = {}
+    with open(csv_file_path) as f:
+        r = csv.reader(f, delimiter=",")
+        for row in r:
+            # print(row)
+            if row[0] not in annotations:
+                annotations[row[0]] = {"objects": [{"name": row[-1], "x1": row[1], "y1": row[2], "x2": row[3], "y2": row[4]}]}
+            else:
+                annotations[row[0]]["objects"].append({"name": row[-1], "x1": row[1], "y1": row[2], "x2": row[3], "y2": row[4]})
+    f.close()
+    return annotations
+
+
+def build_xml(folder, filename, path, objects):
+    top = ET.Element("annotation")
+    folder_tag = ET.SubElement(top, "folder")
+    folder_tag.text = folder
+    fn_tag = ET.SubElement(top, "filename")
+    fn_tag.text = filename
+    path_tag = ET.SubElement(top, "path")
+    path_tag.text = path
+    source_tag = ET.SubElement(top, "source")
+    db_tag = ET.SubElement(source_tag, "database")
+    db_tag.text = "CADP"
+    img = cv2.imread(path)
+    h,w,c = img.shape
+    size_tag = ET.SubElement(top, "size")
+    w_tag = ET.SubElement(size_tag, "width")
+    w_tag.text = str(w)
+    h_tag = ET.SubElement(size_tag, "height")
+    h_tag.text = str(h)
+    d_tag = ET.SubElement(size_tag, "depth")
+    d_tag.text = str(c)
+    seg_tag = ET.SubElement(top, "segmented")
+    seg_tag.text = "0"
+    for obj in objects:
+        obj_tag = ET.SubElement(top, "object")
+        name_tag = ET.SubElement(obj_tag, "name")
+        name_tag.text = str(CADP_CLASSES[obj["name"]])
+        pose_tag = ET.SubElement(obj_tag, "pose")
+        pose_tag.text = "Unknown"
+        trunc_tag = ET.SubElement(obj_tag, "truncated")
+        trunc_tag.text = "0"
+        diff_tag = ET.SubElement(obj_tag, "difficult")
+        diff_tag.text = "0"
+        occluded_tag = ET.SubElement(obj_tag, "occluded")
+        occluded_tag.text = "0"
+        bndbox_tag = ET.SubElement(obj_tag, "bndbox")
+        xmin_tag = ET.SubElement(bndbox_tag, "xmin")
+        xmin_tag.text = str(obj["x1"])
+        xmax_tag = ET.SubElement(bndbox_tag, "xmax")
+        xmax_tag.text = str(obj["x2"])
+        ymin_tag = ET.SubElement(bndbox_tag, "ymin")
+        ymin_tag.text = str(obj["y1"])
+        ymax_tag = ET.SubElement(bndbox_tag, "ymax")
+        ymax_tag.text = str(obj["y2"])
+    return top
+
+
+def parse_arguments():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--csv", type=str, help="Path to csv annotation file.")
+    parser.add_argument("--output_xml_dir", type=str, help="Where to put the PASCAL VOC annotations.")
+    parser.add_argument("--listfile", type=str, help="List of files.")
+    return parser.parse_args()
+
+
+def main(csv_file_path, output_xml_dir, listfile):
+    if not os.path.exists(output_xml_dir):
+        os.makedirs(output_xml_dir)
+    annotations = read_csv(csv_file_path)
+    f = open(listfile, "w")
+    for fp in tqdm(annotations):
+        objects = annotations[fp]["objects"]
+        fn = os.path.basename(fp)
+        foldername = fp.split("/")[-2]
+        top = build_xml(folder="", filename=fn, path=fp, objects=objects)
+        if not os.path.exists("{}/{}".format(output_xml_dir, foldername)):
+            os.makedirs("{}/{}".format(output_xml_dir, foldername))
+        ET.ElementTree(top).write("{}/{}/{}.xml".format(output_xml_dir, foldername, fn.split(".")[0]))
+        f.write("{} {}\n".format(os.path.abspath(fp), os.path.abspath("{}/{}/{}.xml".format(output_xml_dir, foldername, fn.split(".")[0]))))
+    f.close()
+    return
+
+
+if __name__ == '__main__':
+    args = parse_arguments()
+    main(args.csv, args.output_xml_dir, args.listfile)
diff --git a/analysis/keras_frcnn/__init__.py b/analysis/keras_frcnn/__init__.py
diff --git a/analysis/keras_frcnn/config.py b/analysis/keras_frcnn/config.py
@@ -0,0 +1,68 @@
+from keras import backend as K
+
+
+class Config:
+    def __init__(self):
+        self.verbose = True
+
+        self.network = 'resnet50'
+
+        # setting for data augmentation
+        self.use_horizontal_flips = False
+        self.use_vertical_flips = False
+        self.rot_90 = False
+
+        # anchor box scales
+        self.anchor_box_scales = [128, 256, 512]
+
+        # anchor box ratios
+        self.anchor_box_ratios = [[1, 1], [1, 2], [2, 1]]
+
+        # size to resize the smallest side of the image
+        self.im_size = 600
+
+        # image channel-wise mean to subtract
+        self.img_channel_mean = [103.939, 116.779, 123.68]
+        self.img_scaling_factor = 1.0
+
+        # number of ROIs at once
+        self.num_rois = 4
+
+        # stride at the RPN (this depends on the network configuration)
+        self.rpn_stride = 16
+
+        self.balanced_classes = False
+
+        # scaling the stdev
+        self.std_scaling = 4.0
+        self.classifier_regr_std = [8.0, 8.0, 4.0, 4.0]
+
+        # overlaps for RPN
+        self.rpn_min_overlap = 0.3
+        self.rpn_max_overlap = 0.7
+
+        # overlaps for classifier ROIs
+        self.classifier_min_overlap = 0.1
+        self.classifier_max_overlap = 0.5
+
+        # placeholder for the class mapping, automatically generated by the parser
+        self.class_mapping = None
+
+        # location of pretrained weights for the base network
+        # weight files can be found at:
+        # https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_th_dim_ordering_th_kernels_notop.h5
+        # https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
+
+        self.model_path = 'model_trained/model_frcnn.vgg.hdf5'
+
+        # params add by me
+        self.data_dir = '.data/'
+        self.num_epochs = 3000
+
+        self.kitti_simple_label_file = 'kitti_simple_label.txt'
+
+        # TODO: this field is set to simple_label txt, which in very simple format like:
+        # TODO: /path/image_2/000000.png,712.40,143.00,810.73,307.92,Pedestrian, see kitti_simple_label.txt for detail
+        self.simple_label_file = 'simple_label.txt'
+
+        self.config_save_file = 'config.pickle'