Merge pull request #165 from alexander-lynch/dev/novelty_detection

Dev/novelty detection
VIAME · Sep 24, 2023 · ed083ef · ed083ef
2 parents 41df5d5 + a773a5b
commit ed083ef
Show file tree

Hide file tree

Showing 63 changed files with 14,460 additions and 0 deletions.
diff --git a/cmake/add_project_learn.cmake b/cmake/add_project_learn.cmake
@@ -2,6 +2,8 @@
 set( VIAME_PROJECT_LIST ${VIAME_PROJECT_LIST} learn )
 
 set( LEARN_DIR ${VIAME_SOURCE_DIR}/packages/learn )
+set( REMAX_DIR ${VIAME_SOURCE_DIR}/plugins/pytorch/remax )
+set( REMAX_OPS_DIR ${VIAME_SOURCE_DIR}/plugins/pytorch/remax/model/ops )
 
 set( PYDENSECRF_DIR ${LEARN_DIR}-deps/pydensecrf )
 set( PANOPTICAPI_DIR ${LEARN_DIR}-deps/panopticapi )
@@ -54,6 +56,9 @@ else()
     -P ${VIAME_CMAKE_DIR}/install_python_wheel.cmake )
 endif()
 
+set( REMAX_BUILD_CMD
+    ${Python_EXECUTABLE} setup.py build install)
+
 if( Python_VERSION VERSION_LESS "3.7" )
   set( FINAL_PATCH_COMMAND ${CMAKE_COMMAND} -E copy_directory
         ${VIAME_PATCHES_DIR}/timm
@@ -76,3 +81,12 @@ ExternalProject_Add( learn
     INSTALL_COMMAND ${LEARN_INSTALL_CMD}
     LIST_SEPARATOR "----"
     )
+
+ExternalProject_Add( remax
+    PREFIX ${VIAME_BUILD_PREFIX}
+    SOURCE_DIR ${VIAME_PACKAGES_DIR}
+    BUILD_COMMAND ${LEARN_REQ_PIP_CMD} -r ${REMAX_DIR}/requirements.txt
+          COMMAND cd ${REMAX_OPS_DIR} && ${REMAX_BUILD_CMD}
+          COMMAND ${FINAL_PATCH_COMMAND}
+    LIST_SEPARATOR "----"
+    )
diff --git a/configs/pipelines/detector_remax_convnext_novelty_detection.pipe b/configs/pipelines/detector_remax_convnext_novelty_detection.pipe
@@ -0,0 +1,139 @@
+# ==============================================================================
+# Remax Novelty Detection
+# ==============================================================================
+
+config _pipeline:_edge
+  :capacity                                    1
+
+config _scheduler
+  :type                                        pythread_per_process
+
+# ==============================================================================
+
+process input
+  :: video_input
+  :video_filename                              input_list.txt
+  :frame_time                                  1
+  :exit_on_invalid                             false
+
+  :video_reader:type                           image_list
+
+  block video_reader:vidl_ffmpeg
+    :time_source                               start_at_0
+  endblock
+
+  block video_reader:image_list
+    :image_reader:type                         vxl
+    :skip_bad_images                           true
+
+    block image_reader:vxl
+      :force_byte                              true
+    endblock
+
+    block image_reader:add_timestamp_from_filename
+      :image_reader:type                       vxl
+
+      block image_reader:vxl
+        :force_byte                            true
+      endblock
+    endblock
+  endblock
+
+process downsampler
+  :: downsample
+  :target_frame_rate                           5
+  :burst_frame_count                           0
+  :burst_frame_break                           0
+  :renumber_frames                             true
+
+connect from input.image
+        to   downsampler.input_1
+connect from input.file_name
+        to   downsampler.input_2
+connect from input.frame_rate
+        to   downsampler.frame_rate
+connect from input.timestamp
+        to   downsampler.timestamp
+
+process detector_input
+  :: image_filter
+  :filter:type                                 vxl_convert_image
+
+  block filter:vxl_convert_image
+    :format                                    byte
+    :force_three_channel                       true
+  endblock
+
+connect from downsampler.output_1
+        to   detector_input.image
+
+# ==============================================================================
+
+process detector1
+  :: image_object_detector
+  :detector:type                               ocv_windowed
+
+  block detector:ocv_windowed
+    :detector:type                             detector_remax_convnext
+
+    :mode                                      adaptive
+    :chip_adaptive_thresh                      2000000
+    :scale                                     1.25
+    :chip_width                                1333
+    :chip_height                               800
+    :chip_step_height                          1100
+    :chip_step_width                           600
+    :chip_edge_filter                          -10
+    :batch_size                                1
+
+    block detector:detector_remax_convnext
+      # Network config, weights, and names
+      relativepath net_config  =               models/convnext_extra_large_config.py
+      relativepath weight_file =               models/convnext_xview_base.pth
+      relativepath class_names =               models/convnext_xview_labels.txt
+      relativepath remax_model_file =          models/convnext_remax.pkl
+      # Detector parameters
+      :norm_degree                             1
+      :device                                  cuda
+      :thresh                                  0.001
+      :gpu_index                               0
+    endblock
+  endblock
+
+connect from detector_input.image
+        to   detector1.image
+
+# ==============================================================================
+
+process detector_merger
+  :: merge_detection_sets
+
+connect from detector1.detected_object_set
+        to   detector_merger.detected_object_set1
+
+process detector_output
+  :: refine_detections
+  :refiner:type                                nms
+
+  block refiner:nms
+    :max_overlap                               0.50
+    :nms_scale_factor                          1.0
+    :output_scale_factor                       1.0
+  endblock
+
+connect from detector_merger.detected_object_set
+        to   detector_output.detected_object_set
+
+# ==============================================================================
+
+process detector_writer
+  :: detected_object_output
+
+  # Type of file to output
+  :file_name                                   computed_detections.csv
+  :writer:type                                 viame_csv
+
+connect from detector_output.detected_object_set
+        to   detector_writer.detected_object_set
+connect from downsampler.output_2
+        to   detector_writer.image_file_name
diff --git a/configs/pipelines/detector_remax_dino_novelty_detection.pipe b/configs/pipelines/detector_remax_dino_novelty_detection.pipe
@@ -0,0 +1,139 @@
+# ==============================================================================
+# Remax Novelty Detection
+# ==============================================================================
+
+config _pipeline:_edge
+  :capacity                                    1
+
+config _scheduler
+  :type                                        pythread_per_process
+
+# ==============================================================================
+
+process input
+  :: video_input
+  :video_filename                              input_list.txt
+  :frame_time                                  1
+  :exit_on_invalid                             false
+
+  :video_reader:type                           image_list
+
+  block video_reader:vidl_ffmpeg
+    :time_source                               start_at_0
+  endblock
+
+  block video_reader:image_list
+    :image_reader:type                         vxl
+    :skip_bad_images                           true
+
+    block image_reader:vxl
+      :force_byte                              true
+    endblock
+
+    block image_reader:add_timestamp_from_filename
+      :image_reader:type                       vxl
+
+      block image_reader:vxl
+        :force_byte                            true
+      endblock
+    endblock
+  endblock
+
+process downsampler
+  :: downsample
+  :target_frame_rate                           5
+  :burst_frame_count                           0
+  :burst_frame_break                           0
+  :renumber_frames                             true
+
+connect from input.image
+        to   downsampler.input_1
+connect from input.file_name
+        to   downsampler.input_2
+connect from input.frame_rate
+        to   downsampler.frame_rate
+connect from input.timestamp
+        to   downsampler.timestamp
+
+process detector_input
+  :: image_filter
+  :filter:type                                 vxl_convert_image
+
+  block filter:vxl_convert_image
+    :format                                    byte
+    :force_three_channel                       true
+  endblock
+
+connect from downsampler.output_1
+        to   detector_input.image
+
+# ==============================================================================
+
+process detector1
+  :: image_object_detector
+  :detector:type                               ocv_windowed
+
+  block detector:ocv_windowed
+    :detector:type                             detector_remax_dino
+
+    :mode                                      adaptive
+    :chip_adaptive_thresh                      2000000
+    :scale                                     1.25
+    :chip_width                                1333
+    :chip_height                               800
+    :chip_step_height                          1100
+    :chip_step_width                           600
+    :chip_edge_filter                          -10
+    :batch_size                                1
+
+    block detector:detector_remax_dino
+      # Network config, weights, and names
+      relativepath net_config  =               models/DINO_4scale_swin.py
+      relativepath weight_file =               models/pytorch_dino.pth
+      relativepath class_names =               models/xview_labels.txt
+      relativepath remax_model_file =          models/dino_remax.pkl
+      # Detector parameters
+      :norm_degree                             1
+      :device                                  cuda
+      :thresh                                  0.1
+      :gpu_index                               0
+    endblock
+  endblock
+
+connect from detector_input.image
+        to   detector1.image
+
+# ==============================================================================
+
+process detector_merger
+  :: merge_detection_sets
+
+connect from detector1.detected_object_set
+        to   detector_merger.detected_object_set1
+
+process detector_output
+  :: refine_detections
+  :refiner:type                                nms
+
+  block refiner:nms
+    :max_overlap                               0.50
+    :nms_scale_factor                          1.0
+    :output_scale_factor                       1.0
+  endblock
+
+connect from detector_merger.detected_object_set
+        to   detector_output.detected_object_set
+
+# ==============================================================================
+
+process detector_writer
+  :: detected_object_output
+
+  # Type of file to output
+  :file_name                                   computed_detections.csv
+  :writer:type                                 viame_csv
+
+connect from detector_output.detected_object_set
+        to   detector_writer.detected_object_set
+connect from downsampler.output_2
+        to   detector_writer.image_file_name
diff --git a/configs/pipelines/train_detector_convnext_remax.viame_csv.conf b/configs/pipelines/train_detector_convnext_remax.viame_csv.conf
@@ -0,0 +1,70 @@
+#  Groundtruth file extensions (txt, kw18, etc...). Note: this is indepedent of
+#  the format that's stored in the file.
+groundtruth_extensions = .csv
+
+#  Algorithm to use for 'groundtruth_reader'.
+#  Must be one of the following options:
+#  	- habcam :: Reads habcam detection/ground truth files.
+#  	- kw18 :: Detected object set reader using kw18 format.
+#  	- viame_csv :: Detected object set reader using VIAME csv format.
+groundtruth_reader:type = viame_csv
+
+#  Dump possible input data formatting warnings to these files
+data_warning_file = TRAINING_DATA_WARNINGS.txt
+groundtruth_reader:viame_csv:warning_file = TRAINING_DATA_WARNINGS.txt
+
+#  Can be either: "one_per_file" or "one_per_folder".
+groundtruth_style = one_per_folder
+
+#  Semicolon list of seperated image extensions to use in training, images
+#  without this extension will not be included.
+image_extensions = .jpg;.jpeg;.JPG;.JPEG;.png;.PNG;
+
+#  Semicolon list of seperated video extensions to use in training, videos
+#  without this extension will not be included.
+video_extensions = .mp4;.MP4;.mpg;.MPG;.mpeg;.MPEG;.avi;.AVI;.wmv;.WMV;.mov;.MOV;.webm;.WEBM;.ogg;.OGG
+
+#  Pipeline to use to extract video frames if inputs are videos
+relativepath video_extractor = filter_default.pipe
+
+#  Percent [0.0, 1.0] of test samples to use if no manual files specified.
+default_percent_test = 0.0
+
+#  Algorithm to use for 'detector_trainer'.
+detector_trainer:type = ocv_windowed
+
+block detector_trainer:ocv_windowed
+
+  # Directory for all files used in training
+  train_directory = deep_training
+
+  # Windowing mode, can be disabled, maintain_ar, scale, chip, adaptive
+  mode = disabled
+
+  # Don't train on chips with detections smaller than this
+  min_train_box_length = 5
+
+  # Uncomment to remove small detections instead of training on them
+  #small_box_area = 290
+  #small_action = remove
+
+  # Image reader type
+  image_reader:type = vxl
+
+endblock
+
+block detector_trainer:ocv_windowed:trainer
+
+  # Trainer type
+  type = remax_convnext
+
+  # Number of GPUs to use, -1 indicates all
+  remax_convnext:gpu_count = 1
+  relativepath remax_convnext:work_dir = ./
+  relativepath remax_convnext:output_directory = ./
+  remax_convnext:debug_mode = 
+  relativepath remax_convnext:feature_cache = models/convnext_test_features.pt
+  relativepath remax_convnext:net_config  = models/convnext_extra_large_config.py
+  relativepath remax_convnext:weight_file = models/convnext_xview_base.pth
+
+endblock