Release calibration model memory to fix OOM for tensorflow (#542)

Signed-off-by: Lv, Liang1 <liang1.lv@intel.com>
intel · Feb 14, 2023 · ad0f1e0 · ad0f1e0
1 parent 5acea85
commit ad0f1e0
Show file tree

Hide file tree

Showing 2 changed files with 26 additions and 10 deletions.
diff --git a/neural_compressor/adaptor/tf_utils/graph_converter.py b/neural_compressor/adaptor/tf_utils/graph_converter.py
@@ -147,10 +147,6 @@ def __init__(self,
         self._sampling_model.output_tensor_names = self.output_tensor_names
         self._sampling_model.input_tensor_names = self.input_tensor_names
 
-        self._itex_model = Model(self.model._model, **self.model.kwargs)
-        self._itex_model.graph_def = self.model.graph_def
-        self._itex_model.output_tensor_names = self.output_tensor_names
-        self._itex_model.input_tensor_names = self.input_tensor_names
         self._tmp_graph_def = copy.deepcopy(self.model.graph_def)
         self.new_api = new_api #bool(version1_gte_version2(tf.version.VERSION, '2.8.0'))
         self.performance_only = performance_only
@@ -347,11 +343,11 @@ def convert(self):
 
         if self.itex_mode:
             host_const_graph_def = \
-                PostHostConstConverter(self._itex_model.graph_def).do_transformation()
+                PostHostConstConverter(self._tmp_model.graph_def).do_transformation()
             host_const_graph_def.library.CopyFrom(self.model.graph_def.library)
-            self._itex_model.graph_def = host_const_graph_def
+            self._tmp_model.graph_def = host_const_graph_def
 
-            return self._itex_model
+            return self._tmp_model
 
         if self.exclude_node_names:
             self.bf16_ops.extend(self.exclude_node_names)
@@ -490,7 +486,6 @@ def quantize(self):
                 sampling_graph_def = copy.deepcopy(self._fp32_model.graph_def)
                 # TODO: this is a workaround to make Min/Max node be completly eliminated in int8 graph 
                 # after enabling pad+conv2d in new API.
-
                 non_pad_ops = list(list(set(self.fp32_ops).union(set(self.bf16_ops))))
                 sampling_graph_def = FusePadWithFP32Conv2DOptimizer(
                     sampling_graph_def,
@@ -512,6 +507,12 @@ def quantize(self):
                         self._inference(self._sampling_model)
                     self._calibration_data = Helper.gen_valid_sampling_log(tmp_dump_file)
 
+                del output_tensor_names
+                del sampling_graph_def
+                del self._sampling_model
+                import gc
+                gc.collect()
+
                 if len(self._calibration_data) > 0:
                     self._freeze_requantization_ranges(self._kl_op_dict)
                     self._fuse_requantize_with_fused_quantized_node()
@@ -807,6 +808,12 @@ def _insert_qdq_pairs(self):
                 self._inference(self._sampling_model)
             self._calibration_data = Helper.gen_valid_sampling_log(tmp_dump_file)
 
+        del sampling_graph_def
+        del output_tensor_names
+        del self._sampling_model
+        import gc
+        gc.collect()
+
         # Insert QDQ pattern
         self._tmp_graph_def = GenerateGraphWithQDQPattern(
               self._tmp_graph_def, self._calibration_data, self.op_wise_config,
@@ -847,8 +854,8 @@ def _convert_qdq(self):
             self._tmp_graph_def = MergeDuplicatedQDQOptimizer(self._tmp_graph_def).do_transformation()
 
             self._tmp_graph_def.library.CopyFrom(self.model.graph_def.library)
-            self._itex_model.graph_def = self._tmp_graph_def
-            self._itex_model.graph_def.library.CopyFrom(self.model.graph_def.library)
+            self._tmp_model.graph_def = self._tmp_graph_def
+            self._tmp_model.graph_def.library.CopyFrom(self.model.graph_def.library)
         else:
             self._tmp_graph_def, exclude_node_names = OptimizeQDQGraph(self._tmp_graph_def,
                                                    self._tmp_model.input_node_names,

diff --git a/neural_compressor/utils/utility.py b/neural_compressor/utils/utility.py
@@ -553,3 +553,12 @@ def set_tensorboard(tensorboard: bool):
     """Set the tensorboard in config."""
     from neural_compressor.config import options
     options.tensorboard = tensorboard
+
+def show_memory_info(hint):
+    """Show process full memory."""
+    pid = os.getpid()
+    p = psutil.Process(pid)
+
+    info = p.memory_full_info()
+    memory = info.uss / 1024. / 1024
+    print('{} memory used: {} MB'.format(hint, memory))