diff --git a/gnes/encoder/image/inception.py b/gnes/encoder/image/inception.py
index 222c13fe..eecdb812 100644
--- a/gnes/encoder/image/inception.py
+++ b/gnes/encoder/image/inception.py
@@ -27,13 +27,11 @@ class TFInceptionEncoder(BaseImageEncoder):
 
     def __init__(self, model_dir: str,
                  select_layer: str = 'PreLogitsFlatten',
-                 use_cuda: bool = False,
                  *args, **kwargs):
         super().__init__(*args, **kwargs)
 
         self.model_dir = model_dir
         self.select_layer = select_layer
-        self._use_cuda = use_cuda
         self.inception_size_x = 299
         self.inception_size_y = 299
 
@@ -57,7 +55,7 @@ def post_init(self):
                                                             dropout_keep_prob=1.0)
 
             config = tf.ConfigProto(log_device_placement=False)
-            if self._use_cuda:
+            if self.on_gpu:
                 config.gpu_options.allow_growth = True
             self.sess = tf.Session(config=config)
             self.saver = tf.train.Saver()
diff --git a/gnes/encoder/image/onnx.py b/gnes/encoder/image/onnx.py
index 57b8e933..a94e4829 100644
--- a/gnes/encoder/image/onnx.py
+++ b/gnes/encoder/image/onnx.py
@@ -26,13 +26,11 @@ class BaseONNXImageEncoder(BaseImageEncoder):
 
     def __init__(self, model_name: str,
                  model_dir: str,
-                 use_cuda: bool = False,
                  *args, **kwargs):
         super().__init__(*args, **kwargs)
 
         self.model_dir = model_dir
         self.model_name = model_name
-        self._use_cuda = use_cuda
 
     def post_init(self):
         import onnxruntime as ort
diff --git a/gnes/encoder/image/torchvision.py b/gnes/encoder/image/torchvision.py
index b6596a89..0c246241 100644
--- a/gnes/encoder/image/torchvision.py
+++ b/gnes/encoder/image/torchvision.py
@@ -19,7 +19,7 @@
 import numpy as np
 
 from ..base import BaseImageEncoder
-from ...helper import batching
+from ...helper import batching, as_numpy_array
 
 
 class TorchvisionEncoder(BaseImageEncoder):
@@ -28,14 +28,12 @@ class TorchvisionEncoder(BaseImageEncoder):
     def __init__(self, model_name: str,
                  layers: List[str],
                  model_dir: str,
-                 use_cuda: bool = False,
                  *args, **kwargs):
         super().__init__(*args, **kwargs)
 
         self.model_dir = model_dir
         self.model_name = model_name
         self.layers = layers
-        self._use_cuda = use_cuda
 
     def post_init(self):
         import torch
@@ -69,7 +67,7 @@ def forward(self, x):
         os.environ['TORCH_HOME'] = self.model_dir
         self._model = _Model(self.model_name, self.layers)
         self._model = self._model.eval()
-        if self._use_cuda:
+        if self.on_gpu:
             # self._model.cuda()
             self._device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
             self._model = self._model.to(self._device)
@@ -94,6 +92,7 @@ def _padding(img: List['np.ndarray']):
             max_lenth = -1
 
         @batching(chunk_dim=max_lenth)
+        @as_numpy_array
         def _encode(_, img: List['np.ndarray']):
             import copy
 
@@ -106,14 +105,11 @@ def _encode(_, img: List['np.ndarray']):
                 img_for_torch = np.array(img, dtype=np.float32).transpose(0, 3, 1, 2)
 
             img_tensor = torch.from_numpy(img_for_torch)
-            if self._use_cuda:
+            if self.on_gpu:
                 img_tensor = img_tensor.cuda()
 
             encodes = self._model(img_tensor)
 
-            output = np.array(encodes.data.cpu().numpy(), dtype=np.float32)
-            return output
+            return encodes.data.cpu()
 
-        output = _encode(self, img)
-
-        return output
+        return _encode(self, img)
diff --git a/gnes/encoder/video/incep_mixture.py b/gnes/encoder/video/incep_mixture.py
index b131a972..0a0be11b 100644
--- a/gnes/encoder/video/incep_mixture.py
+++ b/gnes/encoder/video/incep_mixture.py
@@ -28,7 +28,6 @@ class IncepMixtureEncoder(BaseVideoEncoder):
     def __init__(self, model_dir_inception: str,
                  model_dir_mixture: str,
                  select_layer: str = 'PreLogitsFlatten',
-                 use_cuda: bool = False,
                  feature_size: int = 300,
                  vocab_size: int = 28,
                  cluster_size: int = 256,
@@ -42,7 +41,6 @@ def __init__(self, model_dir_inception: str,
         self.model_dir_inception = model_dir_inception
         self.model_dir_mixture = model_dir_mixture
         self.select_layer = select_layer
-        self.use_cuda = use_cuda
         self.cluster_size = cluster_size
         self.feature_size = feature_size
         self.vocab_size = vocab_size
@@ -76,7 +74,7 @@ def post_init(self):
                                                             dropout_keep_prob=1.0)
 
             config = tf.ConfigProto(log_device_placement=False)
-            if self.use_cuda:
+            if self.on_gpu:
                 config.gpu_options.allow_growth = True
             self.sess = tf.Session(config=config)
             self.saver = tf.train.Saver()
@@ -85,7 +83,7 @@ def post_init(self):
         g2 = tf.Graph()
         with g2.as_default():
             config = tf.ConfigProto(log_device_placement=False)
-            if self.use_cuda:
+            if self.on_gpu:
                 config.gpu_options.allow_growth = True
             self.sess2 = tf.Session(config=config)
             self.mix_model = NetFV(feature_size=self.feature_size,
diff --git a/gnes/helper.py b/gnes/helper.py
index 55f71cfa..e42964d8 100644
--- a/gnes/helper.py
+++ b/gnes/helper.py
@@ -422,8 +422,6 @@ def arg_wrapper(self, data, label=None, *args, **kwargs):
                             reduced_result[col] = reduced_result[col].reshape(
                                 (-1, chunk_dim, reduced_result[col].shape[1]))
                     final_result = tuple(reduced_result)
-                else:
-                    raise TypeError('dont know how to reduce %s' % type(final_result[0]))
 
             if len(final_result):
                 return final_result
diff --git a/gnes/preprocessor/image/segmentation.py b/gnes/preprocessor/image/segmentation.py
index ad475c61..ca387057 100644
--- a/gnes/preprocessor/image/segmentation.py
+++ b/gnes/preprocessor/image/segmentation.py
@@ -29,12 +29,10 @@ class SegmentPreprocessor(SizedPreprocessor):
 
     def __init__(self, model_name: str,
                  model_dir: str,
-                 _use_cuda: bool = False,
                  *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.model_name = model_name
         self.model_dir = model_dir
-        self._use_cuda = _use_cuda
 
     def post_init(self):
         import torch
@@ -43,7 +41,7 @@ def post_init(self):
         os.environ['TORCH_HOME'] = self.model_dir
         self._model = getattr(models.detection, self.model_name)(pretrained=True)
         self._model = self._model.eval()
-        if self._use_cuda:
+        if self.on_gpu:
             # self._model.cuda()
             self._device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
             self._model = self._model.to(self._device)
@@ -54,7 +52,7 @@ def apply(self, doc: 'gnes_pb2.Document'):
             original_image = Image.open(io.BytesIO(doc.raw_bytes))
             all_subareas, index = get_all_subarea(original_image)
             image_tensor = torch_transform(original_image)
-            if self._use_cuda:
+            if self.on_gpu:
                 image_tensor = image_tensor.cuda()
 
             seg_output = self._model([image_tensor])
diff --git a/tests/test_pytorch_transformers_encoder.py b/tests/test_pytorch_transformers_encoder.py
index 36816b54..649e91d6 100644
--- a/tests/test_pytorch_transformers_encoder.py
+++ b/tests/test_pytorch_transformers_encoder.py
@@ -22,7 +22,7 @@ def setUp(self):
     def test_encoding(self):
         vec = self.tt_encoder.encode(self.test_str)
         self.assertEqual(vec.shape[0], len(self.test_str))
-        self.assertEqual(vec.shape[2], 768)
+        self.assertEqual(vec.shape[1], 768)
 
     def test_dump_load(self):
         self.tt_encoder.dump(self.dump_path)
@@ -31,7 +31,7 @@ def test_dump_load(self):
 
         vec = tt_encoder2.encode(self.test_str)
         self.assertEqual(vec.shape[0], len(self.test_str))
-        self.assertEqual(vec.shape[2], 768)
+        self.assertEqual(vec.shape[1], 768)
 
     def tearDown(self):
         if os.path.exists(self.dump_path):
diff --git a/tests/yaml/base-segmentation-image-prep.yml b/tests/yaml/base-segmentation-image-prep.yml
index 26ebd78c..47e6df37 100644
--- a/tests/yaml/base-segmentation-image-prep.yml
+++ b/tests/yaml/base-segmentation-image-prep.yml
@@ -4,7 +4,6 @@ components:
     parameters:
       model_dir: ${FASTERRCNN_MODEL}
       model_name: fasterrcnn_resnet50_fpn
-      _use_cuda: false
     gnes_config:
       name: fasterRCNN
       is_trained: true
diff --git a/tutorials/component-yaml-spec.md b/tutorials/component-yaml-spec.md
index 255ac115..394b831a 100644
--- a/tutorials/component-yaml-spec.md
+++ b/tutorials/component-yaml-spec.md
@@ -130,7 +130,6 @@ def __init__(self, model_name: str,
                  layers: List[str],
                  model_dir: str,
                  batch_size: int = 64,
-                 use_cuda: bool = False,
                  *args, **kwargs):
   # do model init...
   # ...
diff --git a/yaml-example/component/img_preprocessor_fasterRCNN.yml b/yaml-example/component/img_preprocessor_fasterRCNN.yml
index 871ba76c..82ee4e3e 100644
--- a/yaml-example/component/img_preprocessor_fasterRCNN.yml
+++ b/yaml-example/component/img_preprocessor_fasterRCNN.yml
@@ -3,6 +3,5 @@ parameters:
   model_dir: /ext_data/image_preprocessor
   model_name: fasterrcnn_resnet50_fpn
   target_img_size: 224
-  _use_cuda: false
 gnes_config:
   is_trained: true
\ No newline at end of file