From 38204708a4ea4912c2599809dd3c6d6fddde4453 Mon Sep 17 00:00:00 2001
From: yanliang567 <82361606+yanliang567@users.noreply.github.com>
Date: Fri, 10 May 2024 14:57:32 +0800
Subject: [PATCH] test: Update insert data tests and refactor some data gen
 functions (#32924)

related issue: #32653

Signed-off-by: yanliang567 <yanliang.qiao@zilliz.com>
---
 tests/python_client/common/common_func.py    |  21 +-
 tests/python_client/testcases/test_insert.py | 732 +++++++------------
 tests/python_client/testcases/test_search.py |  14 +-
 3 files changed, 298 insertions(+), 469 deletions(-)

diff --git a/tests/python_client/common/common_func.py b/tests/python_client/common/common_func.py
index 61b0fa92c5be..4a4c15d322c7 100644
--- a/tests/python_client/common/common_func.py
+++ b/tests/python_client/common/common_func.py
@@ -899,12 +899,12 @@ def get_column_data_by_schema(nb=ct.default_nb, schema=None, skip_vectors=False,
         if field.dtype == DataType.FLOAT_VECTOR and skip_vectors is True:
             tmp = []
         else:
-            tmp = gen_data_by_type(field, nb=nb, start=start)
+            tmp = gen_data_by_collection_field(field, nb=nb, start=start)
         data.append(tmp)
     return data
 
 
-def get_row_data_by_schema(nb=ct.default_nb, schema=None):
+def gen_row_data_by_schema(nb=ct.default_nb, schema=None):
     if schema is None:
         schema = gen_default_collection_schema()
     fields = schema.fields
@@ -916,7 +916,7 @@ def get_row_data_by_schema(nb=ct.default_nb, schema=None):
     for i in range(nb):
         tmp = {}
         for field in fields_not_auto_id:
-            tmp[field.name] = gen_data_by_type(field)
+            tmp[field.name] = gen_data_by_collection_field(field)
         data.append(tmp)
     return data
 
@@ -1016,7 +1016,7 @@ def get_dim_by_schema(schema=None):
     return None
 
 
-def gen_data_by_type(field, nb=None, start=None):
+def gen_data_by_collection_field(field, nb=None, start=None):
     # if nb is None, return one data, else return a list of data
     data_type = field.dtype
     if data_type == DataType.BOOL:
@@ -1124,6 +1124,19 @@ def gen_data_by_type(field, nb=None, start=None):
     return None
 
 
+def gen_data_by_collection_schema(schema, nb, r=0):
+    """
+    gen random data by collection schema, regardless of primary key or auto_id
+    vector type only support for DataType.FLOAT_VECTOR
+    """
+    data = []
+    start_uid = r * nb
+    fields = schema.fields
+    for field in fields:
+        data.append(gen_data_by_collection_field(field, nb, start_uid))
+    return data
+
+
 def gen_json_files_for_bulk_insert(data, schema, data_dir):
     for d in data:
         if len(d) > 0:
diff --git a/tests/python_client/testcases/test_insert.py b/tests/python_client/testcases/test_insert.py
index 6a92f53336fd..bf99966ad93b 100644
--- a/tests/python_client/testcases/test_insert.py
+++ b/tests/python_client/testcases/test_insert.py
@@ -32,12 +32,6 @@
 class TestInsertParams(TestcaseBase):
     """ Test case of Insert interface """
 
-    @pytest.fixture(scope="function", params=ct.get_invalid_strs)
-    def get_non_data_type(self, request):
-        if isinstance(request.param, list) or request.param is None:
-            pytest.skip("list and None type is valid data type")
-        yield request.param
-
     @pytest.fixture(scope="module", params=ct.get_invalid_strs)
     def get_invalid_field_name(self, request):
         if isinstance(request.param, (list, dict)):
@@ -76,7 +70,7 @@ def test_insert_list_data(self):
         assert collection_w.num_entities == ct.default_nb
 
     @pytest.mark.tags(CaseLabel.L2)
-    def test_insert_non_data_type(self, get_non_data_type):
+    def test_insert_non_data_type(self):
         """
         target: test insert with non-dataframe, non-list data
         method: insert with data (non-dataframe and non-list type)
@@ -84,23 +78,36 @@ def test_insert_non_data_type(self, get_non_data_type):
         """
         c_name = cf.gen_unique_str(prefix)
         collection_w = self.init_collection_wrap(name=c_name)
-        error = {ct.err_code: 1,
-                 ct.err_msg: "The type of data should be list or pandas.DataFrame"}
-        collection_w.insert(data=get_non_data_type,
+        error = {ct.err_code: 999,
+                 ct.err_msg: "The type of data should be List, pd.DataFrame or Dict"}
+        collection_w.insert(data=None,
                             check_task=CheckTasks.err_res, check_items=error)
 
     @pytest.mark.tags(CaseLabel.L2)
-    @pytest.mark.parametrize("data", [[], pd.DataFrame()])
+    @pytest.mark.parametrize("data", [pd.DataFrame()])
     def test_insert_empty_data(self, data):
         """
-        target: test insert empty data
+        target: test insert empty dataFrame()
         method: insert empty
         expected: raise exception
         """
         c_name = cf.gen_unique_str(prefix)
         collection_w = self.init_collection_wrap(name=c_name)
-        error = {ct.err_code: 1, ct.err_msg: "The fields don't match with schema fields, "
-                                             "expected: ['int64', 'float', 'varchar', 'float_vector'], got %s" % data}
+        error = {ct.err_code: 999, ct.err_msg: "The fields don't match with schema fields"}
+        collection_w.insert(
+            data=data, check_task=CheckTasks.err_res, check_items=error)
+
+    @pytest.mark.tags(CaseLabel.L2)
+    @pytest.mark.parametrize("data", [[[]]])
+    def test_insert_empty_data(self, data):
+        """
+        target: test insert empty array
+        method: insert empty
+        expected: raise exception
+        """
+        c_name = cf.gen_unique_str(prefix)
+        collection_w = self.init_collection_wrap(name=c_name)
+        error = {ct.err_code: 999, ct.err_msg: "The data don't match with schema fields"}
         collection_w.insert(
             data=data, check_task=CheckTasks.err_res, check_items=error)
 
@@ -116,8 +123,8 @@ def test_insert_dataframe_only_columns(self):
         columns = [ct.default_int64_field_name,
                    ct.default_float_vec_field_name]
         df = pd.DataFrame(columns=columns)
-        error = {ct.err_code: 1,
-                 ct.err_msg: "The data don't match with schema fields, expect 5 list, got 0"}
+        error = {ct.err_code: 999,
+                 ct.err_msg: "The fields don't match with schema fields"}
         collection_w.insert(
             data=df, check_task=CheckTasks.err_res, check_items=error)
 
@@ -129,11 +136,11 @@ def test_insert_empty_field_name_dataframe(self):
         expected: raise exception
         """
         c_name = cf.gen_unique_str(prefix)
-        collection_w = self.init_collection_wrap(name=c_name)
+        collection_w = self.init_collection_wrap(name=c_name, dim=32)
         df = cf.gen_default_dataframe_data(10)
         df.rename(columns={ct.default_int64_field_name: ' '}, inplace=True)
-        error = {ct.err_code: 1,
-                 ct.err_msg: "The name of field don't match, expected: int64, got "}
+        error = {ct.err_code: 999,
+                 ct.err_msg: "The name of field don't match, expected: int64"}
         collection_w.insert(
             data=df, check_task=CheckTasks.err_res, check_items=error)
 
@@ -149,33 +156,11 @@ def test_insert_invalid_field_name_dataframe(self, get_invalid_field_name):
         df = cf.gen_default_dataframe_data(10)
         df.rename(
             columns={ct.default_int64_field_name: get_invalid_field_name}, inplace=True)
-        error = {ct.err_code: 1, ct.err_msg: "The name of field don't match, expected: int64, got %s" %
-                 get_invalid_field_name}
+        error = {ct.err_code: 999,
+                 ct.err_msg: f"The name of field don't match, expected: int64, got {get_invalid_field_name}"}
         collection_w.insert(
             data=df, check_task=CheckTasks.err_res, check_items=error)
 
-    def test_insert_dataframe_index(self):
-        """
-        target: test insert dataframe with index
-        method: insert dataframe with index
-        expected: todo
-        """
-        pass
-
-    @pytest.mark.tags(CaseLabel.L2)
-    def test_insert_none(self):
-        """
-        target: test insert None
-        method: data is None
-        expected: return successfully with zero results
-        """
-        c_name = cf.gen_unique_str(prefix)
-        collection_w = self.init_collection_wrap(name=c_name)
-        mutation_res, _ = collection_w.insert(data=None, check_task=CheckTasks.err_res,
-                                              check_items={ct.err_code: 1,
-                                                           ct.err_msg: "The type of data should be List, "
-                                                                       "pd.DataFrame or Dict"})
-
     @pytest.mark.tags(CaseLabel.L1)
     def test_insert_numpy_data(self):
         """
@@ -185,8 +170,10 @@ def test_insert_numpy_data(self):
         """
         c_name = cf.gen_unique_str(prefix)
         collection_w = self.init_collection_wrap(name=c_name)
-        data = cf.gen_numpy_data(nb=10)
+        nb = 10
+        data = cf.gen_numpy_data(nb=nb)
         collection_w.insert(data=data)
+        assert collection_w.num_entities == nb
 
     @pytest.mark.tags(CaseLabel.L1)
     def test_insert_binary_dataframe(self):
@@ -278,8 +265,7 @@ def test_insert_field_name_not_match(self):
         collection_w = self.init_collection_wrap(name=c_name)
         df = cf.gen_default_dataframe_data(10)
         df.rename(columns={ct.default_float_field_name: "int"}, inplace=True)
-        error = {ct.err_code: 1,
-                 ct.err_msg: "The name of field don't match, expected: float, got int"}
+        error = {ct.err_code: 999, ct.err_msg: "The name of field don't match, expected: float, got int"}
         collection_w.insert(
             data=df, check_task=CheckTasks.err_res, check_items=error)
 
@@ -297,7 +283,7 @@ def test_insert_field_value_not_match(self):
         df = cf.gen_default_dataframe_data(nb)
         new_float_value = pd.Series(data=[float(i) for i in range(nb)], dtype="float64")
         df[df.columns[1]] = new_float_value
-        error = {ct.err_code: 1,
+        error = {ct.err_code: 999,
                  ct.err_msg: "The data type of field float doesn't match, expected: FLOAT, got DOUBLE"}
         collection_w.insert(data=df, check_task=CheckTasks.err_res, check_items=error)
 
@@ -305,17 +291,19 @@ def test_insert_field_value_not_match(self):
     def test_insert_value_less(self):
         """
         target: test insert value less than other
-        method: int field value less than vec-field value
+        method: string field value less than vec-field value
         expected: raise exception
         """
         c_name = cf.gen_unique_str(prefix)
         collection_w = self.init_collection_wrap(name=c_name)
         nb = 10
-        int_values = [i for i in range(nb - 1)]
-        float_values = [np.float32(i) for i in range(nb)]
-        float_vec_values = cf.gen_vectors(nb, ct.default_dim)
-        data = [int_values, float_values, float_vec_values]
-        error = {ct.err_code: 1, ct.err_msg: 'Arrays must all be same length.'}
+        data = []
+        for fields in collection_w.schema.fields:
+            field_data = cf.gen_data_by_collection_field(fields, nb=nb)
+            if fields.dtype == DataType.VARCHAR:
+                field_data = field_data[:-1]
+            data.append(field_data)
+        error = {ct.err_code: 999, ct.err_msg: "Field data size misaligned for field [varchar] "}
         collection_w.insert(
             data=data, check_task=CheckTasks.err_res, check_items=error)
 
@@ -329,11 +317,13 @@ def test_insert_vector_value_less(self):
         c_name = cf.gen_unique_str(prefix)
         collection_w = self.init_collection_wrap(name=c_name)
         nb = 10
-        int_values = [i for i in range(nb)]
-        float_values = [np.float32(i) for i in range(nb)]
-        float_vec_values = cf.gen_vectors(nb - 1, ct.default_dim)
-        data = [int_values, float_values, float_vec_values]
-        error = {ct.err_code: 1, ct.err_msg: 'Arrays must all be same length.'}
+        data = []
+        for fields in collection_w.schema.fields:
+            field_data = cf.gen_data_by_collection_field(fields, nb=nb)
+            if fields.dtype == DataType.FLOAT_VECTOR:
+                field_data = field_data[:-1]
+            data.append(field_data)
+        error = {ct.err_code: 999, ct.err_msg: 'Field data size misaligned for field [float_vector] '}
         collection_w.insert(
             data=data, check_task=CheckTasks.err_res, check_items=error)
 
@@ -346,14 +336,15 @@ def test_insert_fields_more(self):
         """
         c_name = cf.gen_unique_str(prefix)
         collection_w = self.init_collection_wrap(name=c_name)
-        df = cf.gen_default_dataframe_data(ct.default_nb)
-        new_values = [i for i in range(ct.default_nb)]
-        df.insert(3, 'new', new_values)
-        error = {ct.err_code: 1, ct.err_msg: "The fields don't match with schema fields, "
-                                             "expected: ['int64', 'float', 'varchar', 'float_vector'], "
-                                             "got ['int64', 'float', 'varchar', 'new', 'float_vector']"}
+        nb = 10
+        data = []
+        for fields in collection_w.schema.fields:
+            field_data = cf.gen_data_by_collection_field(fields, nb=nb)
+            data.append(field_data)
+        data.append([1 for _ in range(nb)])
+        error = {ct.err_code: 999, ct.err_msg: "The data don't match with schema fields"}
         collection_w.insert(
-            data=df, check_task=CheckTasks.err_res, check_items=error)
+            data=data, check_task=CheckTasks.err_res, check_items=error)
 
     @pytest.mark.tags(CaseLabel.L2)
     def test_insert_fields_less(self):
@@ -366,9 +357,7 @@ def test_insert_fields_less(self):
         collection_w = self.init_collection_wrap(name=c_name)
         df = cf.gen_default_dataframe_data(ct.default_nb)
         df.drop(ct.default_float_vec_field_name, axis=1, inplace=True)
-        error = {ct.err_code: 1, ct.err_msg: "The fields don't match with schema fields, "
-                                             "expected: ['int64', 'float', 'varchar', 'float_vector'], "
-                                             "got ['int64', 'float', 'varchar']"}
+        error = {ct.err_code: 999, ct.err_msg: "The fields don't match with schema fields"}
         collection_w.insert(
             data=df, check_task=CheckTasks.err_res, check_items=error)
 
@@ -382,39 +371,18 @@ def test_insert_list_order_inconsistent_schema(self):
         c_name = cf.gen_unique_str(prefix)
         collection_w = self.init_collection_wrap(name=c_name)
         nb = 10
-        int_values = [i for i in range(nb)]
-        float_values = [np.float32(i) for i in range(nb)]
-        float_vec_values = cf.gen_vectors(nb, ct.default_dim)
-        data = [float_values, int_values, float_vec_values]
-        error = {ct.err_code: 1,
-                 ct.err_msg: "The data type of field int64 doesn't match, expected: INT64, got FLOAT"}
+        data = []
+        for field in collection_w.schema.fields:
+            field_data = cf.gen_data_by_collection_field(field, nb=nb)
+            data.append(field_data)
+        tmp = data[0]
+        data[0] = data[1]
+        data[1] = tmp
+        error = {ct.err_code: 999,
+                 ct.err_msg: "The Input data type is inconsistent with defined schema"}
         collection_w.insert(
             data=data, check_task=CheckTasks.err_res, check_items=error)
 
-    @pytest.mark.tags(CaseLabel.L1)
-    def test_insert_dataframe_order_inconsistent_schema(self):
-        """
-        target: test insert with dataframe fields inconsistent with schema
-        method: insert dataframe, and fields order inconsistent with schema
-        expected: assert num entities
-        """
-        c_name = cf.gen_unique_str(prefix)
-        collection_w = self.init_collection_wrap(name=c_name)
-        nb = 10
-        int_values = pd.Series(data=[i for i in range(nb)])
-        float_values = pd.Series(data=[float(i) for i in range(nb)], dtype="float32")
-        float_vec_values = cf.gen_vectors(nb, ct.default_dim)
-        df = pd.DataFrame({
-            ct.default_float_field_name: float_values,
-            ct.default_float_vec_field_name: float_vec_values,
-            ct.default_int64_field_name: int_values
-        })
-        error = {ct.err_code: 1,
-                 ct.err_msg: "The fields don't match with schema fields, expected: ['int64', 'float', "
-                             "'varchar', 'json_field', 'float_vector'], got ['float', 'float_vector', "
-                             "'int64']"}
-        collection_w.insert(data=df, check_task=CheckTasks.err_res, check_items=error)
-
     @pytest.mark.tags(CaseLabel.L2)
     def test_insert_inconsistent_data(self):
         """
@@ -426,7 +394,7 @@ def test_insert_inconsistent_data(self):
         collection_w = self.init_collection_wrap(name=c_name)
         data = cf.gen_default_list_data(nb=100)
         data[0][1] = 1.0
-        error = {ct.err_code: 1,
+        error = {ct.err_code: 999,
                  ct.err_msg: "The Input data type is inconsistent with defined schema, please check it."}
         collection_w.insert(data, check_task=CheckTasks.err_res, check_items=error)
 
@@ -463,7 +431,7 @@ def test_insert_without_connection(self):
         res_list, _ = self.connection_wrap.list_connections()
         assert ct.default_alias not in res_list
         data = cf.gen_default_list_data(10)
-        error = {ct.err_code: 1, ct.err_msg: 'should create connect first'}
+        error = {ct.err_code: 999, ct.err_msg: 'should create connection first'}
         collection_w.insert(data=data, check_task=CheckTasks.err_res, check_items=error)
 
     @pytest.mark.tags(CaseLabel.L1)
@@ -489,9 +457,9 @@ def test_insert_partition_not_existed(self):
         """
         collection_w = self.init_collection_wrap(
             name=cf.gen_unique_str(prefix))
-        df = cf.gen_default_dataframe_data(nb=ct.default_nb)
-        error = {ct.err_code: 1,
-                 ct.err_msg: "partitionID of partitionName:p can not be existed"}
+        df = cf.gen_default_dataframe_data(nb=10)
+        error = {ct.err_code: 999,
+                 ct.err_msg: "partition not found[partition=p]"}
         mutation_res, _ = collection_w.insert(data=df, partition_name="p", check_task=CheckTasks.err_res,
                                               check_items=error)
 
@@ -530,21 +498,6 @@ def test_insert_partition_with_ids(self):
             data=df, partition_name=partition_w1.name)
         assert mutation_res.insert_count == ct.default_nb
 
-    @pytest.mark.tags(CaseLabel.L2)
-    def test_insert_with_field_type_not_match(self):
-        """
-        target: test insert entities, with the entity field type updated
-        method: update entity field type
-        expected: error raised
-        """
-        collection_w = self.init_collection_wrap(
-            name=cf.gen_unique_str(prefix))
-        df = cf.gen_collection_schema_all_datatype
-        error = {ct.err_code: 1,
-                 ct.err_msg: "The type of data should be list or pandas.DataFrame"}
-        collection_w.insert(
-            data=df, check_task=CheckTasks.err_res, check_items=error)
-
     @pytest.mark.tags(CaseLabel.L1)
     def test_insert_exceed_varchar_limit(self):
         """
@@ -564,59 +517,47 @@ def test_insert_exceed_varchar_limit(self):
         vectors = cf.gen_vectors(2, ct.default_dim)
         data = [vectors, ["limit_1___________",
                           "limit_2___________"], ['1', '2']]
-        error = {ct.err_code: 1,
+        error = {ct.err_code: 999,
                  ct.err_msg: "invalid input, length of string exceeds max length"}
         collection_w.insert(
             data, check_task=CheckTasks.err_res, check_items=error)
 
-    @pytest.mark.tags(CaseLabel.L2)
-    def test_insert_with_lack_vector_field(self):
-        """
-        target: test insert entities, with no vector field
-        method: remove entity values of vector field
-        expected: error raised
-        """
-        collection_w = self.init_collection_wrap(
-            name=cf.gen_unique_str(prefix))
-        df = cf.gen_collection_schema([cf.gen_int64_field(is_primary=True)])
-        error = {ct.err_code: 1, ct.err_msg: "Data type is not support."}
-        collection_w.insert(
-            data=df, check_task=CheckTasks.err_res, check_items=error)
-
     @pytest.mark.tags(CaseLabel.L2)
     def test_insert_with_no_vector_field_dtype(self):
         """
-        target: test insert entities, with vector field type is error
-        method: vector field dtype is not existed
+        target: test insert entities, with no vector field
+        method: vector field is missing in data
         expected: error raised
         """
-        collection_w = self.init_collection_wrap(
-            name=cf.gen_unique_str(prefix))
-        vec_field, _ = self.field_schema_wrap.init_field_schema(
-            name=ct.default_int64_field_name, dtype=DataType.NONE)
-        field_one = cf.gen_int64_field(is_primary=True)
-        field_two = cf.gen_int64_field()
-        df = [field_one, field_two, vec_field]
-        error = {ct.err_code: 1, ct.err_msg: "Field dtype must be of DataType."}
-        collection_w.insert(
-            data=df, check_task=CheckTasks.err_res, check_items=error)
+        collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix))
+        nb = 1
+        data = []
+        fields = collection_w.schema.fields
+        for field in fields:
+            field_data = cf.gen_data_by_collection_field(field, nb=nb)
+            if field.dtype != DataType.FLOAT_VECTOR:
+                data.append(field_data)
+        error = {ct.err_code: 999, ct.err_msg: f"The data don't match with schema fields, "
+                                               f"expect {len(fields)} list, got {len(data)}"}
+        collection_w.insert(data=data, check_task=CheckTasks.err_res, check_items=error)
 
     @pytest.mark.tags(CaseLabel.L2)
-    def test_insert_with_no_vector_field_name(self):
+    def test_insert_with_vector_field_dismatch_dtype(self):
         """
-        target: test insert entities, with no vector field name
-        method: vector field name is error
+        target: test insert entities, with no vector field
+        method: vector field is missing in data
         expected: error raised
         """
-        collection_w = self.init_collection_wrap(
-            name=cf.gen_unique_str(prefix))
-        vec_field = cf.gen_float_vec_field(name=ct.get_invalid_strs)
-        field_one = cf.gen_int64_field(is_primary=True)
-        field_two = cf.gen_int64_field()
-        df = [field_one, field_two, vec_field]
-        error = {ct.err_code: 1, ct.err_msg: "data should be a list of list"}
-        collection_w.insert(
-            data=df, check_task=CheckTasks.err_res, check_items=error)
+        collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix))
+        nb = 1
+        data = []
+        for field in collection_w.schema.fields:
+            field_data = cf.gen_data_by_collection_field(field, nb=nb)
+            if field.dtype == DataType.FLOAT_VECTOR:
+                field_data = [random.randint(-1000, 1000) * 0.0001 for _ in range(nb)]
+            data.append(field_data)
+        error = {ct.err_code: 999, ct.err_msg: "The Input data type is inconsistent with defined schema"}
+        collection_w.insert(data=data, check_task=CheckTasks.err_res, check_items=error)
 
     @pytest.mark.tags(CaseLabel.L1)
     def test_insert_drop_collection(self):
@@ -793,8 +734,8 @@ def test_insert_auto_id_true_with_dataframe_values(self, pk_field):
             primary_field=pk_field, auto_id=True)
         collection_w = self.init_collection_wrap(name=c_name, schema=schema)
         df = cf.gen_default_dataframe_data(nb=100)
-        error = {ct.err_code: 1,
-                 ct.err_msg: "Please don't provide data for auto_id primary field: int64"}
+        error = {ct.err_code: 999,
+                 ct.err_msg: f"Expect no data for auto_id primary field: {pk_field}"}
         collection_w.insert(
             data=df, check_task=CheckTasks.err_res, check_items=error)
         assert collection_w.is_empty
@@ -807,15 +748,16 @@ def test_insert_auto_id_true_with_list_values(self, pk_field):
         expected: 1.verify num entities 2.verify ids
         """
         c_name = cf.gen_unique_str(prefix)
-        schema = cf.gen_default_collection_schema(
-            primary_field=pk_field, auto_id=True)
+        schema = cf.gen_default_collection_schema(primary_field=pk_field, auto_id=True)
         collection_w = self.init_collection_wrap(name=c_name, schema=schema)
-        data = cf.gen_default_list_data(nb=100)
-        error = {ct.err_code: 1, ct.err_msg: "The fields don't match with schema fields, "
-                                             "expected: ['float', 'varchar', 'float_vector'], got ['', '', '', '']"}
-        collection_w.insert(
-            data=data, check_task=CheckTasks.err_res, check_items=error)
-        assert collection_w.is_empty
+        data = []
+        nb = 100
+        for field in collection_w.schema.fields:
+            field_data = cf.gen_data_by_collection_field(field, nb=nb)
+            if field.name != pk_field:
+                data.append(field_data)
+        collection_w.insert(data=data)
+        assert collection_w.num_entities == nb
 
     @pytest.mark.tags(CaseLabel.L1)
     def test_insert_auto_id_false_same_values(self):
@@ -982,7 +924,7 @@ def test_insert_multi_fields_using_default_value(self, default_value, auto_id):
         if auto_id:
             del data[0]
         collection_w.insert(data, check_task=CheckTasks.err_res,
-                            check_items={ct.err_code: 1,
+                            check_items={ct.err_code: 999,
                                          ct.err_msg: "The data type of field varchar doesn't match"})
         # 2. default value fields all after vector field, insert empty, succeed
         fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_vec_field(),
@@ -1150,20 +1092,6 @@ def test_insert_async_invalid_partition(self):
         with pytest.raises(MilvusException, match=err_msg):
             future.result()
 
-    @pytest.mark.tags(CaseLabel.L2)
-    def test_insert_async_no_vectors_raise_exception(self):
-        """
-        target: test insert vectors with no vectors
-        method: set only vector field and insert into collection
-        expected: raise exception
-        """
-        collection_w = self.init_collection_wrap(
-            name=cf.gen_unique_str(prefix))
-        df = cf.gen_collection_schema([cf.gen_int64_field(is_primary=True)])
-        error = {ct.err_code: 1, ct.err_msg: "fleldSchema lack of vector field."}
-        future, _ = collection_w.insert(
-            data=df, _async=True, check_task=CheckTasks.err_res, check_items=error)
-
 
 def assert_mutation_result(mutation_res):
     assert mutation_res.insert_count == ct.default_nb
@@ -1228,41 +1156,50 @@ class TestInsertInvalid(TestcaseBase):
       The following cases are used to test insert invalid params
       ******************************************************************
     """
-
-    @pytest.mark.tags(CaseLabel.L2)
-    def test_insert_ids_invalid(self):
-        """
-        target: test insert, with using auto id is invalid, which are not int64
-        method: create collection and insert entities in it
-        expected: raise exception
-        """
-        collection_name = cf.gen_unique_str(prefix)
-        collection_w = self.init_collection_wrap(name=collection_name)
-        int_field = cf.gen_float_field(is_primary=True)
-        vec_field = cf.gen_float_vec_field(name='vec')
-        df = [int_field, vec_field]
-        error = {ct.err_code: 1,
-                 ct.err_msg: "Primary key type must be DataType.INT64."}
-        mutation_res, _ = collection_w.insert(
-            data=df, check_task=CheckTasks.err_res, check_items=error)
-
-    @pytest.mark.tags(CaseLabel.L2)
-    def test_insert_string_to_int64_pk_field(self):
+    @pytest.mark.tags(CaseLabel.L0)
+    @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name])
+    def test_insert_with_invalid_field_value(self, primary_field):
         """
-        target: test insert, with using auto id is invalid, which are not int64
-        method: create collection and insert entities in it
+        target: verify error msg when inserting with invalid field value
+        method: insert with invalid field value
         expected: raise exception
         """
+        collection_w = self.init_collection_general(prefix, auto_id=False, insert_data=False,
+                                                    primary_field=primary_field, is_index=False,
+                                                    is_all_data_type=True, with_json=True)[0]
         nb = 100
-        collection_name = cf.gen_unique_str(prefix)
-        collection_w = self.init_collection_wrap(name=collection_name)
-        df = cf.gen_default_dataframe_data(nb)
-        invalid_id = random.randint(0, nb)
-        # df[ct.default_int64_field_name][invalid_id] = "2000000"
-        df.at[invalid_id, ct.default_int64_field_name] = "2000000"
-        error = {ct.err_code: 1,
-                 ct.err_msg: "The data in the same column must be of the same type."}
-        mutation_res, _ = collection_w.insert(data=df, check_task=CheckTasks.err_res, check_items=error)
+        data = cf.gen_data_by_collection_schema(collection_w.schema, nb=nb)
+        for dirty_i in [0, nb // 2, nb - 1]:      # check the dirty data at first, middle and last
+            log.debug(f"dirty_i: {dirty_i}")
+            for i in range(len(data)):
+                if data[i][dirty_i].__class__ is int:
+                    tmp = data[i][0]
+                    data[i][dirty_i] = "iamstring"
+                    error = {ct.err_code: 999, ct.err_msg: "The Input data type is inconsistent with defined schema"}
+                    collection_w.insert(data=data, check_task=CheckTasks.err_res, check_items=error)
+                    data[i][dirty_i] = tmp
+                elif data[i][dirty_i].__class__ is str:
+                    tmp = data[i][dirty_i]
+                    data[i][dirty_i] = random.randint(0, 1000)
+                    error = {ct.err_code: 999, ct.err_msg: "expect string input, got: <class 'int'>"}
+                    collection_w.insert(data=data, check_task=CheckTasks.err_res, check_items=error)
+                    data[i][dirty_i] = tmp
+                elif data[i][dirty_i].__class__ is bool:
+                    tmp = data[i][dirty_i]
+                    data[i][dirty_i] = "iamstring"
+                    error = {ct.err_code: 999, ct.err_msg: "The Input data type is inconsistent with defined schema"}
+                    collection_w.insert(data=data, check_task=CheckTasks.err_res, check_items=error)
+                    data[i][dirty_i] = tmp
+                elif data[i][dirty_i].__class__ is float:
+                    tmp = data[i][dirty_i]
+                    data[i][dirty_i] = "iamstring"
+                    error = {ct.err_code: 999, ct.err_msg: "The Input data type is inconsistent with defined schema"}
+                    collection_w.insert(data=data, check_task=CheckTasks.err_res, check_items=error)
+                    data[i][dirty_i] = tmp
+                else:
+                    continue
+        res = collection_w.insert(data)[0]
+        assert res.insert_count == nb
 
     @pytest.mark.tags(CaseLabel.L2)
     def test_insert_with_invalid_partition_name(self):
@@ -1278,23 +1215,6 @@ def test_insert_with_invalid_partition_name(self):
         mutation_res, _ = collection_w.insert(data=df, partition_name="p", check_task=CheckTasks.err_res,
                                               check_items=error)
 
-    @pytest.mark.tags(CaseLabel.L1)
-    def test_insert_with_invalid_field_value(self):
-        """
-        target: test insert with invalid field
-        method: insert with invalid field value
-        expected: raise exception
-        """
-        collection_name = cf.gen_unique_str(prefix)
-        collection_w = self.init_collection_wrap(name=collection_name)
-        field_one = cf.gen_int64_field(is_primary=True)
-        field_two = cf.gen_int64_field()
-        vec_field = ct.get_invalid_vectors
-        df = [field_one, field_two, vec_field]
-        error = {ct.err_code: 1, ct.err_msg: "Data type is not support."}
-        mutation_res, _ = collection_w.insert(
-            data=df, check_task=CheckTasks.err_res, check_items=error)
-
     @pytest.mark.tags(CaseLabel.L2)
     def test_insert_invalid_with_pk_varchar_auto_id_true(self):
         """
@@ -1353,12 +1273,10 @@ def test_insert_int32_overflow(self, invalid_int32):
         collection_w = self.init_collection_general(prefix, is_all_data_type=True)[0]
         data = cf.gen_dataframe_all_data_type(nb=1)
         data[ct.default_int32_field_name] = [invalid_int32]
-        error = {ct.err_code: 1, 'err_msg': "The data type of field int32 doesn't match, "
-                                            "expected: INT32, got INT64"}
+        error = {ct.err_code: 999, 'err_msg': "The Input data type is inconsistent with defined schema"}
         collection_w.insert(data, check_task=CheckTasks.err_res, check_items=error)
 
     @pytest.mark.tags(CaseLabel.L2)
-    @pytest.mark.skip("no error code provided now")
     def test_insert_over_resource_limit(self):
         """
         target: test insert over RPC limitation 64MB (67108864)
@@ -1369,8 +1287,7 @@ def test_insert_over_resource_limit(self):
         collection_name = cf.gen_unique_str(prefix)
         collection_w = self.init_collection_wrap(name=collection_name)
         data = cf.gen_default_dataframe_data(nb)
-        error = {ct.err_code: 1, ct.err_msg: "<_MultiThreadedRendezvous of RPC that terminated with:"
-                                             "status = StatusCode.RESOURCE_EXHAUSTED"}
+        error = {ct.err_code: 999, ct.err_msg: "message larger than max"}
         collection_w.insert(
             data=data, check_task=CheckTasks.err_res, check_items=error)
 
@@ -1391,7 +1308,7 @@ def test_insert_array_using_default_value(self, default_value):
         data = [{"int64": 1, "float_vector": vectors[1],
                  "varchar": default_value, "float": np.float32(1.0)}]
         collection_w.insert(data, check_task=CheckTasks.err_res,
-                            check_items={ct.err_code: 1, ct.err_msg: "Field varchar don't match in entities[0]"})
+                            check_items={ct.err_code: 999, ct.err_msg: "Field varchar don't match in entities[0]"})
 
     @pytest.mark.tags(CaseLabel.L2)
     @pytest.mark.skip("not support default_value now")
@@ -1411,7 +1328,7 @@ def test_insert_tuple_using_default_value(self, default_value):
         string_values = ["abc" for i in range(ct.default_nb)]
         data = (int_values, vectors, string_values, default_value)
         collection_w.insert(data, check_task=CheckTasks.err_res,
-                            check_items={ct.err_code: 1, ct.err_msg: "Field varchar don't match in entities[0]"})
+                            check_items={ct.err_code: 999, ct.err_msg: "Field varchar don't match in entities[0]"})
 
     @pytest.mark.tags(CaseLabel.L2)
     def test_insert_with_nan_value(self):
@@ -1425,10 +1342,10 @@ def test_insert_with_nan_value(self):
         collection_w = self.init_collection_wrap(name=collection_name)
         data = cf.gen_default_dataframe_data()
         data[vector_field][0][0] = None
-        error = {ct.err_code: 1, ct.err_msg: "The data in the same column must be of the same type."}
+        error = {ct.err_code: 999, ct.err_msg: "The Input data type is inconsistent with defined schema"}
         collection_w.insert(data=data, check_task=CheckTasks.err_res, check_items=error)
         data[vector_field][0][0] = float('nan')
-        error = {ct.err_code: 65535, ct.err_msg: "value 'NaN' is not a number or infinity"}
+        error = {ct.err_code: 999, ct.err_msg: "value 'NaN' is not a number or infinity"}
         collection_w.insert(data=data, check_task=CheckTasks.err_res, check_items=error)
         data[vector_field][0][0] = np.NAN
         collection_w.insert(data=data, check_task=CheckTasks.err_res, check_items=error)
@@ -1436,22 +1353,6 @@ def test_insert_with_nan_value(self):
         error = {ct.err_code: 65535, ct.err_msg: "value '+Inf' is not a number or infinity"}
         collection_w.insert(data=data, check_task=CheckTasks.err_res, check_items=error)
 
-    @pytest.mark.tags(CaseLabel.L2)
-    @pytest.mark.parametrize("json_value", ct.get_invalid_dict[:8])
-    def test_insert_json_filed_invalid(self, json_value):
-        """
-        target: test insert json field invalid
-        method: insert with nan value: list, number, string ...
-        expected: raise exception
-        """
-        if isinstance(json_value, list) or json_value is None:
-            pytest.skip("invalid in dataframe")
-        collection_name = cf.gen_unique_str(prefix)
-        collection_w = self.init_collection_wrap(name=collection_name)
-        data = cf.gen_default_dataframe_data()
-        data.loc[0, ct.default_json_field_name] = json_value
-        collection_w.insert(data)
-
 
 class TestInsertInvalidBinary(TestcaseBase):
     """
@@ -1463,20 +1364,16 @@ class TestInsertInvalidBinary(TestcaseBase):
     @pytest.mark.tags(CaseLabel.L1)
     def test_insert_ids_binary_invalid(self):
         """
-        target: test insert, with using customize ids, which are not int64
+        target: test insert float vector into a collection with binary vector schema
         method: create collection and insert entities in it
         expected: raise exception
         """
-        collection_name = cf.gen_unique_str(prefix)
-        collection_w = self.init_collection_wrap(name=collection_name)
-        field_one = cf.gen_float_field(is_primary=True)
-        field_two = cf.gen_float_field()
-        vec_field, _ = self.field_schema_wrap.init_field_schema(name=ct.default_binary_vec_field_name,
-                                                                dtype=DataType.BINARY_VECTOR)
-        df = [field_one, field_two, vec_field]
-        error = {ct.err_code: 1, ct.err_msg: "data should be a list of list"}
+        collection_w = self.init_collection_general(prefix, auto_id=False, insert_data=False, is_binary=True,
+                                                    is_index=False, with_json=False)[0]
+        data = cf.gen_default_list_data(nb=100, with_json=False)
+        error = {ct.err_code: 999, ct.err_msg: "Invalid binary vector data exists"}
         mutation_res, _ = collection_w.insert(
-            data=df, check_task=CheckTasks.err_res, check_items=error)
+            data=data, check_task=CheckTasks.err_res, check_items=error)
 
     @pytest.mark.tags(CaseLabel.L2)
     def test_insert_with_invalid_binary_partition_name(self):
@@ -1485,12 +1382,11 @@ def test_insert_with_invalid_binary_partition_name(self):
         method: insert with invalid partition name
         expected: raise exception
         """
-        collection_name = cf.gen_unique_str(prefix)
-        collection_w = self.init_collection_wrap(name=collection_name)
-        partition_name = ct.get_invalid_strs
-        df, _ = cf.gen_default_binary_dataframe_data(ct.default_nb)
-        error = {ct.err_code: 1,
-                 'err_msg': "The types of schema and data do not match."}
+        collection_w = self.init_collection_general(prefix, auto_id=False, insert_data=False, is_binary=True,
+                                                    is_index=False, with_json=False)[0]
+        partition_name = "non_existent_partition"
+        df, _ = cf.gen_default_binary_dataframe_data(nb=100)
+        error = {ct.err_code: 999, 'err_msg': f"partition not found[partition={partition_name}]"}
         mutation_res, _ = collection_w.insert(data=df, partition_name=partition_name, check_task=CheckTasks.err_res,
                                               check_items=error)
 
@@ -1530,7 +1426,6 @@ def test_insert_multi_string_fields(self, string_fields):
                 2.Insert multi string fields
         expected: Insert Successfully
         """
-
         schema = cf.gen_schema_multi_string_fields(string_fields)
         collection_w = self.init_collection_wrap(
             name=cf.gen_unique_str(prefix), schema=schema)
@@ -1538,42 +1433,6 @@ def test_insert_multi_string_fields(self, string_fields):
         collection_w.insert(df)
         assert collection_w.num_entities == ct.default_nb
 
-    @pytest.mark.tags(CaseLabel.L0)
-    def test_insert_string_field_invalid_data(self):
-        """
-        target: test insert string field data is not match
-        method: 1.create a collection
-                2.Insert string field data is not match
-        expected: Raise exceptions
-        """
-        c_name = cf.gen_unique_str(prefix)
-        collection_w = self.init_collection_wrap(name=c_name)
-        nb = 10
-        df = cf.gen_default_dataframe_data(nb)
-        new_float_value = pd.Series(
-            data=[float(i) for i in range(nb)], dtype="float64")
-        df[df.columns[2]] = new_float_value
-        error = {ct.err_code: 1,
-                 ct.err_msg: "The data type of field varchar doesn't match, expected: VARCHAR, got DOUBLE"}
-        collection_w.insert(
-            data=df, check_task=CheckTasks.err_res, check_items=error)
-
-    @pytest.mark.tags(CaseLabel.L0)
-    def test_insert_string_field_name_invalid(self):
-        """
-        target: test insert string field name is invaild
-        method: 1.create a collection  
-                2.Insert string field name is invalid
-        expected: Raise exceptions
-        """
-        c_name = cf.gen_unique_str(prefix)
-        collection_w = self.init_collection_wrap(name=c_name)
-        df = [cf.gen_int64_field(), cf.gen_string_field(
-            name=ct.get_invalid_strs), cf.gen_float_vec_field()]
-        error = {ct.err_code: 1, ct.err_msg: 'data should be a list of list'}
-        collection_w.insert(
-            data=df, check_task=CheckTasks.err_res, check_items=error)
-
     @pytest.mark.tags(CaseLabel.L0)
     def test_insert_string_field_length_exceed(self):
         """
@@ -1584,55 +1443,20 @@ def test_insert_string_field_length_exceed(self):
         """
         c_name = cf.gen_unique_str(prefix)
         collection_w = self.init_collection_wrap(name=c_name)
-        nums = 70000
-        field_one = cf.gen_int64_field()
-        field_two = cf.gen_float_field()
-        field_three = cf.gen_string_field(max_length=nums)
-        vec_field = cf.gen_float_vec_field()
-        df = [field_one, field_two, field_three, vec_field]
-        error = {ct.err_code: 1, ct.err_msg: 'data should be a list of list'}
-        collection_w.insert(
-            data=df, check_task=CheckTasks.err_res, check_items=error)
-
-    @pytest.mark.tags(CaseLabel.L1)
-    def test_insert_string_field_dtype_invalid(self):
-        """
-        target: test insert string field with invaild dtype
-        method: 1.create a collection  
-                2.Insert string field dtype is invalid
-        expected: Raise exception
-        """
-        c_name = cf.gen_unique_str(prefix)
-        collection_w = self.init_collection_wrap(name=c_name)
-        string_field = self.field_schema_wrap.init_field_schema(
-            name="string", dtype=DataType.STRING)[0]
-        int_field = cf.gen_int64_field(is_primary=True)
-        vec_field = cf.gen_float_vec_field()
-        df = [string_field, int_field, vec_field]
-        error = {ct.err_code: 1, ct.err_msg: 'data should be a list of list'}
-        collection_w.insert(
-            data=df, check_task=CheckTasks.err_res, check_items=error)
-
-    @pytest.mark.tags(CaseLabel.L1)
-    def test_insert_string_field_auto_id_is_true(self):
-        """
-        target: test create collection with string field 
-        method: 1.create a collection  
-                2.Insert string field with auto id is true
-        expected: Raise exception
-        """
-        c_name = cf.gen_unique_str(prefix)
-        collection_w = self.init_collection_wrap(name=c_name)
-        int_field = cf.gen_int64_field()
-        vec_field = cf.gen_float_vec_field()
-        string_field = cf.gen_string_field(is_primary=True, auto_id=True)
-        df = [int_field, string_field, vec_field]
-        error = {ct.err_code: 1, ct.err_msg: 'data should be a list of list'}
-        collection_w.insert(
-            data=df, check_task=CheckTasks.err_res, check_items=error)
+        max = 65535
+        data = []
+        for field in collection_w.schema.fields:
+            field_data = cf.gen_data_by_collection_field(field, nb=1)
+            if field.dtype == DataType.VARCHAR:
+                field_data = [cf.gen_str_by_length(length=max + 1)]
+            data.append(field_data)
+
+        error = {ct.err_code: 999, ct.err_msg: 'length of string exceeds max length'}
+        collection_w.insert(data=data, check_task=CheckTasks.err_res, check_items=error)
 
     @pytest.mark.tags(CaseLabel.L1)
-    def test_insert_string_field_space(self):
+    @pytest.mark.parametrize("str_field_value", ["", "    "])
+    def test_insert_string_field_space_empty(self, str_field_value):
         """
         target: test create collection with string field 
         method: 1.create a collection  
@@ -1641,30 +1465,20 @@ def test_insert_string_field_space(self):
         """
         c_name = cf.gen_unique_str(prefix)
         collection_w = self.init_collection_wrap(name=c_name)
-        nb = 1000
-        data = cf.gen_default_list_data(nb)
-        data[2] = [" "for _ in range(nb)]
-        collection_w.insert(data)
-        assert collection_w.num_entities == nb
+        nb = 100
+        data = []
+        for field in collection_w.schema.fields:
+            field_data = cf.gen_data_by_collection_field(field, nb=nb)
+            if field.dtype == DataType.VARCHAR:
+                field_data = [str_field_value for _ in range(nb)]
+            data.append(field_data)
 
-    @pytest.mark.tags(CaseLabel.L1)
-    def test_insert_string_field_empty(self):
-        """
-        target: test create collection with string field 
-        method: 1.create a collection  
-                2.Insert string field with empty
-        expected: Insert successfully
-        """
-        c_name = cf.gen_unique_str(prefix)
-        collection_w = self.init_collection_wrap(name=c_name)
-        nb = 1000
-        data = cf.gen_default_list_data(nb)
-        data[2] = [""for _ in range(nb)]
         collection_w.insert(data)
         assert collection_w.num_entities == nb
 
     @pytest.mark.tags(CaseLabel.L1)
-    def test_insert_string_field_is_pk_and_empty(self):
+    @pytest.mark.parametrize("str_field_value", ["", "    "])
+    def test_insert_string_field_is_pk_and_empty(self, str_field_value):
         """
         target: test create collection with string field is primary
         method: 1.create a collection  
@@ -1674,9 +1488,13 @@ def test_insert_string_field_is_pk_and_empty(self):
         c_name = cf.gen_unique_str(prefix)
         schema = cf.gen_string_pk_default_collection_schema()
         collection_w = self.init_collection_wrap(name=c_name, schema=schema)
-        nb = 1000
-        data = cf.gen_default_list_data(nb)
-        data[2] = [""for _ in range(nb)]
+        nb = 100
+        data = []
+        for field in collection_w.schema.fields:
+            field_data = cf.gen_data_by_collection_field(field, nb=nb)
+            if field.dtype == DataType.VARCHAR:
+                field_data = [str_field_value for _ in range(nb)]
+            data.append(field_data)
         collection_w.insert(data)
         assert collection_w.num_entities == nb
 
@@ -1699,7 +1517,7 @@ def test_upsert_data_pk_not_exist(self):
         assert collection_w.num_entities == ct.default_nb
 
     @pytest.mark.tags(CaseLabel.L0)
-    @pytest.mark.parametrize("start", [0, 1500, 2500, 3500])
+    @pytest.mark.parametrize("start", [0, 1500, 3500])
     def test_upsert_data_pk_exist(self, start):
         """
         target: test upsert data and collection pk exists
@@ -1782,7 +1600,7 @@ def test_upsert_data_is_none(self):
         collection_w = self.init_collection_general(pre_upsert, insert_data=True, is_index=False)[0]
         assert collection_w.num_entities == ct.default_nb
         collection_w.upsert(data=None, check_task=CheckTasks.err_res,
-                            check_items={ct.err_code: 1,
+                            check_items={ct.err_code: 999,
                                          ct.err_msg: "The type of data should be List, pd.DataFrame or Dict"})
 
     @pytest.mark.tags(CaseLabel.L1)
@@ -2011,7 +1829,7 @@ def test_upsert_multi_fields_using_default_value(self, default_value):
         ]
         collection_w.upsert(data,
                             check_task=CheckTasks.err_res,
-                            check_items={ct.err_code: 1,
+                            check_items={ct.err_code: 999,
                                          ct.err_msg: "The data type of field varchar doesn't match"})
 
         # 2. default value fields all after vector field, insert empty, succeed
@@ -2063,56 +1881,53 @@ def test_upsert_dataframe_using_default_value(self):
 class TestUpsertInvalid(TestcaseBase):
     """ Invalid test case of Upsert interface """
 
-    @pytest.mark.tags(CaseLabel.L2)
-    @pytest.mark.parametrize("data", ct.get_invalid_strs[:12])
-    def test_upsert_non_data_type(self, data):
+    @pytest.mark.tags(CaseLabel.L0)
+    @pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name])
+    def test_upsert_data_type_dismatch(self, primary_field):
         """
         target: test upsert with invalid data type
         method: upsert data type string, set, number, float...
         expected: raise exception
         """
-        if data is None:
-            pytest.skip("data=None is valid")
-        c_name = cf.gen_unique_str(pre_upsert)
-        collection_w = self.init_collection_wrap(name=c_name)
-        error = {ct.err_code: 1, ct.err_msg: "The fields don't match with schema fields, expected: "
-                                             "['int64', 'float', 'varchar', 'float_vector']"}
-        collection_w.upsert(data=data, check_task=CheckTasks.err_res, check_items=error)
-
-    @pytest.mark.tags(CaseLabel.L2)
-    def test_upsert_pk_type_invalid(self):
-        """
-        target: test upsert with invalid pk type
-        method: upsert data type string, float...
-        expected: raise exception
-        """
-        c_name = cf.gen_unique_str(pre_upsert)
-        collection_w = self.init_collection_wrap(name=c_name)
-        data = [['a', 1.5], [np.float32(i) for i in range(2)], [str(i) for i in range(2)],
-                cf.gen_vectors(2, ct.default_dim)]
-        error = {ct.err_code: 1, ct.err_msg: "The data type of field int64 doesn't match, "
-                                             "expected: INT64, got VARCHAR"}
-        collection_w.upsert(data=data, check_task=CheckTasks.err_res, check_items=error)
-
-    @pytest.mark.tags(CaseLabel.L2)
-    def test_upsert_data_unmatch(self):
-        """
-        target: test upsert with unmatched data type
-        method: 1. create a collection with default schema [int, float, string, vector]
-                2. upsert with data [int, string, float, vector]
-        expected: raise exception
-        """
-        c_name = cf.gen_unique_str(pre_upsert)
-        collection_w = self.init_collection_wrap(name=c_name)
-        vector = [random.random() for _ in range(ct.default_dim)]
-        data = [1, "a", 2.0, vector]
-        error = {ct.err_code: 1, ct.err_msg: "The fields don't match with schema fields, "
-                                             "expected: ['int64', 'float', 'varchar', 'float_vector']"}
-        collection_w.upsert(data=[data], check_task=CheckTasks.err_res, check_items=error)
-
-    @pytest.mark.tags(CaseLabel.L2)
-    @pytest.mark.parametrize("vector", [[], [1.0, 2.0], "a", 1.0, None])
-    def test_upsert_vector_unmatch(self, vector):
+        collection_w = self.init_collection_general(pre_upsert, auto_id=False, insert_data=False,
+                                                    primary_field=primary_field, is_index=False,
+                                                    is_all_data_type=True, with_json=True)[0]
+        nb = 100
+        data = cf.gen_data_by_collection_schema(collection_w.schema, nb=nb)
+        for dirty_i in [0, nb // 2, nb - 1]:  # check the dirty data at first, middle and last
+            log.debug(f"dirty_i: {dirty_i}")
+            for i in range(len(data)):
+                if data[i][dirty_i].__class__ is int:
+                    tmp = data[i][0]
+                    data[i][dirty_i] = "iamstring"
+                    error = {ct.err_code: 999, ct.err_msg: "The Input data type is inconsistent with defined schema"}
+                    collection_w.upsert(data=data, check_task=CheckTasks.err_res, check_items=error)
+                    data[i][dirty_i] = tmp
+                elif data[i][dirty_i].__class__ is str:
+                    tmp = data[i][dirty_i]
+                    data[i][dirty_i] = random.randint(0, 1000)
+                    error = {ct.err_code: 999, ct.err_msg: "expect string input, got: <class 'int'>"}
+                    collection_w.upsert(data=data, check_task=CheckTasks.err_res, check_items=error)
+                    data[i][dirty_i] = tmp
+                elif data[i][dirty_i].__class__ is bool:
+                    tmp = data[i][dirty_i]
+                    data[i][dirty_i] = "iamstring"
+                    error = {ct.err_code: 999, ct.err_msg: "The Input data type is inconsistent with defined schema"}
+                    collection_w.upsert(data=data, check_task=CheckTasks.err_res, check_items=error)
+                    data[i][dirty_i] = tmp
+                elif data[i][dirty_i].__class__ is float:
+                    tmp = data[i][dirty_i]
+                    data[i][dirty_i] = "iamstring"
+                    error = {ct.err_code: 999, ct.err_msg: "The Input data type is inconsistent with defined schema"}
+                    collection_w.upsert(data=data, check_task=CheckTasks.err_res, check_items=error)
+                    data[i][dirty_i] = tmp
+                else:
+                    continue
+        res = collection_w.upsert(data)[0]
+        assert res.insert_count == nb
+
+    @pytest.mark.tags(CaseLabel.L2)
+    def test_upsert_vector_unmatch(self):
         """
         target: test upsert with unmatched data vector
         method: 1. create a collection with dim=128
@@ -2120,14 +1935,14 @@ def test_upsert_vector_unmatch(self, vector):
         expected: raise exception
         """
         c_name = cf.gen_unique_str(pre_upsert)
-        collection_w = self.init_collection_wrap(name=c_name)
-        data = [2.0, "a", vector]
-        error = {ct.err_code: 1, ct.err_msg: "The fields don't match with schema fields, "
-                                             "expected: ['int64', 'float', 'varchar', 'float_vector']"}
-        collection_w.upsert(data=[data], check_task=CheckTasks.err_res, check_items=error)
+        collection_w = self.init_collection_wrap(name=c_name, with_json=False)
+        data = cf.gen_default_binary_dataframe_data()[0]
+        error = {ct.err_code: 999,
+                 ct.err_msg: "The name of field don't match, expected: float_vector, got binary_vector"}
+        collection_w.upsert(data=data, check_task=CheckTasks.err_res, check_items=error)
 
     @pytest.mark.tags(CaseLabel.L2)
-    @pytest.mark.parametrize("dim", [120, 129, 200])
+    @pytest.mark.parametrize("dim", [128-8, 128+8])
     def test_upsert_binary_dim_unmatch(self, dim):
         """
         target: test upsert with unmatched vector dim
@@ -2142,8 +1957,7 @@ def test_upsert_binary_dim_unmatch(self, dim):
         collection_w.upsert(data=data, check_task=CheckTasks.err_res, check_items=error)
 
     @pytest.mark.tags(CaseLabel.L2)
-    @pytest.mark.skip("https://github.com/milvus-io/pymilvus/issues/2042")
-    @pytest.mark.parametrize("dim", [127, 129, 200])
+    @pytest.mark.parametrize("dim", [256])
     def test_upsert_dim_unmatch(self, dim):
         """
         target: test upsert with unmatched vector dim
@@ -2151,10 +1965,11 @@ def test_upsert_dim_unmatch(self, dim):
                 2. upsert with mismatched dim
         expected: raise exception
         """
-        collection_w = self.init_collection_general(pre_upsert, True)[0]
-        data = cf.gen_default_data_for_upsert(dim=dim)[0]
-        error = {ct.err_code: 1,
-                 ct.err_msg: f"Collection field dim is 128, but entities field dim is {dim}"}
+        nb = 10
+        collection_w = self.init_collection_general(pre_upsert, True, with_json=False)[0]
+        data = cf.gen_default_list_data(nb=nb, dim=dim, with_json=False)
+        error = {ct.err_code: 1100,
+                 ct.err_msg: f"the dim ({dim}) of field data(float_vector) is not equal to schema dim ({ct.default_dim})"}
         collection_w.upsert(data=data, check_task=CheckTasks.err_res, check_items=error)
 
     @pytest.mark.tags(CaseLabel.L2)
@@ -2172,7 +1987,7 @@ def test_upsert_partition_name_invalid(self, partition_name):
         collection_w.create_partition(p_name)
         cf.insert_data(collection_w)
         data = cf.gen_default_dataframe_data(nb=100)
-        error = {ct.err_code: 1, ct.err_msg: "Invalid partition name"}
+        error = {ct.err_code: 999, ct.err_msg: "Invalid partition name"}
         collection_w.upsert(data=data, partition_name=partition_name,
                             check_task=CheckTasks.err_res, check_items=error)
 
@@ -2207,7 +2022,7 @@ def test_upsert_multi_partitions(self):
         collection_w.create_partition("partition_2")
         cf.insert_data(collection_w)
         data = cf.gen_default_dataframe_data(nb=1000)
-        error = {ct.err_code: 1, ct.err_msg: "['partition_1', 'partition_2'] has type <class 'list'>, "
+        error = {ct.err_code: 999, ct.err_msg: "['partition_1', 'partition_2'] has type <class 'list'>, "
                                              "but expected one of: (<class 'bytes'>, <class 'str'>)"}
         collection_w.upsert(data=data, partition_name=["partition_1", "partition_2"],
                             check_task=CheckTasks.err_res, check_items=error)
@@ -2221,7 +2036,7 @@ def test_upsert_with_auto_id(self):
         expected: raise exception
         """
         collection_w = self.init_collection_general(pre_upsert, auto_id=True, is_index=False)[0]
-        error = {ct.err_code: 1,
+        error = {ct.err_code: 999,
                  ct.err_msg: "Upsert don't support autoid == true"}
         float_vec_values = cf.gen_vectors(ct.default_nb, ct.default_dim)
         data = [[np.float32(i) for i in range(ct.default_nb)], [str(i) for i in range(ct.default_nb)],
@@ -2245,7 +2060,7 @@ def test_upsert_array_using_default_value(self, default_value):
         data = [{"int64": 1, "float_vector": vectors[1],
                  "varchar": default_value, "float": np.float32(1.0)}]
         collection_w.upsert(data, check_task=CheckTasks.err_res,
-                            check_items={ct.err_code: 1, ct.err_msg: "Field varchar don't match in entities[0]"})
+                            check_items={ct.err_code: 999, ct.err_msg: "Field varchar don't match in entities[0]"})
 
     @pytest.mark.tags(CaseLabel.L2)
     @pytest.mark.skip("not support default_value now")
@@ -2265,7 +2080,7 @@ def test_upsert_tuple_using_default_value(self, default_value):
         string_values = ["abc" for i in range(ct.default_nb)]
         data = (int_values, default_value, string_values, vectors)
         collection_w.upsert(data, check_task=CheckTasks.err_res,
-                            check_items={ct.err_code: 1, ct.err_msg: "Field varchar don't match in entities[0]"})
+                            check_items={ct.err_code: 999, ct.err_msg: "Field varchar don't match in entities[0]"})
 
 
 class TestInsertArray(TestcaseBase):
@@ -2324,7 +2139,7 @@ def test_insert_array_rows(self):
         schema = cf.gen_array_collection_schema()
         collection_w = self.init_collection_wrap(schema=schema)
 
-        data = cf.get_row_data_by_schema(schema=schema)
+        data = cf.gen_row_data_by_schema(schema=schema)
         collection_w.insert(data=data)
         assert collection_w.num_entities == ct.default_nb
 
@@ -2371,7 +2186,7 @@ def test_insert_array_length_differ(self):
         collection_w.insert(array)
         assert collection_w.num_entities == nb
 
-        data = cf.get_row_data_by_schema(nb=2, schema=schema)
+        data = cf.gen_row_data_by_schema(nb=2, schema=schema)
         collection_w.upsert(data)
 
     @pytest.mark.tags(CaseLabel.L2)
@@ -2382,11 +2197,11 @@ def test_insert_array_length_invalid(self):
         expected: raise error
         """
         # init collection
-        schema = cf.gen_array_collection_schema()
+        schema = cf.gen_array_collection_schema(dim=32)
         collection_w = self.init_collection_wrap(schema=schema)
         # Insert actual array length > max_capacity
         arr_len = ct.default_max_capacity + 1
-        data = cf.get_row_data_by_schema(schema=schema)
+        data = cf.gen_row_data_by_schema(schema=schema,nb=11)
         data[1][ct.default_float_array_field_name] = [np.float32(i) for i in range(arr_len)]
         err_msg = (f"the length (101) of 1th array exceeds max capacity ({ct.default_max_capacity}): "
                    f"expected=valid length array, actual=array length exceeds max capacity: invalid parameter")
@@ -2402,22 +2217,23 @@ def test_insert_array_type_invalid(self):
         expected: raise error
         """
         # init collection
-        arr_len = 10
-        schema = cf.gen_array_collection_schema()
+        arr_len = 5
+        nb = 10
+        dim = 8
+        schema = cf.gen_array_collection_schema(dim=dim)
         collection_w = self.init_collection_wrap(schema=schema)
-        data = cf.get_row_data_by_schema(schema=schema)
-
+        data = cf.gen_row_data_by_schema(schema=schema, nb=nb)
         # 1. Insert string values to an int array
         data[1][ct.default_int32_array_field_name] = [str(i) for i in range(arr_len)]
-        err_msg = "The data in the same column must be of the same type."
+        err_msg = "The Input data type is inconsistent with defined schema"
         collection_w.insert(data=data, check_task=CheckTasks.err_res,
-                            check_items={ct.err_code: 1, ct.err_msg: err_msg})
+                            check_items={ct.err_code: 999, ct.err_msg: err_msg})
 
         # 2. upsert float values to a string array
-        data = cf.get_row_data_by_schema(schema=schema)
+        data = cf.gen_row_data_by_schema(schema=schema)
         data[1][ct.default_string_array_field_name] = [np.float32(i) for i in range(arr_len)]
         collection_w.upsert(data=data, check_task=CheckTasks.err_res,
-                            check_items={ct.err_code: 1, ct.err_msg: err_msg})
+                            check_items={ct.err_code: 999, ct.err_msg: err_msg})
 
     @pytest.mark.tags(CaseLabel.L2)
     def test_insert_array_mixed_value(self):
@@ -2427,11 +2243,11 @@ def test_insert_array_mixed_value(self):
         expected: raise error
         """
         # init collection
-        schema = cf.gen_array_collection_schema()
+        schema = cf.gen_array_collection_schema(dim=32)
         collection_w = self.init_collection_wrap(schema=schema)
         # Insert array consisting of mixed values
-        data = cf.get_row_data_by_schema(schema=schema)
+        data = cf.gen_row_data_by_schema(schema=schema, nb=10)
         data[1][ct.default_string_array_field_name] = ["a", 1, [2.0, 3.0], False]
         collection_w.insert(data=data, check_task=CheckTasks.err_res,
-                            check_items={ct.err_code: 1,
-                                         ct.err_msg: "The data in the same column must be of the same type."})
+                            check_items={ct.err_code: 999,
+                                         ct.err_msg: "The Input data type is inconsistent with defined schema"})
diff --git a/tests/python_client/testcases/test_search.py b/tests/python_client/testcases/test_search.py
index 12829718a8cf..012bc4885a3a 100644
--- a/tests/python_client/testcases/test_search.py
+++ b/tests/python_client/testcases/test_search.py
@@ -594,7 +594,7 @@ def test_search_with_expression_invalid_array_one(self):
         nb = ct.default_nb
         schema = cf.gen_array_collection_schema()
         collection_w = self.init_collection_wrap(schema=schema)
-        data = cf.get_row_data_by_schema(schema=schema)
+        data = cf.gen_row_data_by_schema(schema=schema)
         data[1][ct.default_int32_array_field_name] = [1]
         collection_w.insert(data)
         collection_w.create_index("float_vector", ct.default_index)
@@ -623,7 +623,7 @@ def test_search_with_expression_invalid_array_two(self):
         nb = ct.default_nb
         schema = cf.gen_array_collection_schema()
         collection_w = self.init_collection_wrap(schema=schema)
-        data = cf.get_row_data_by_schema(schema=schema)
+        data = cf.gen_row_data_by_schema(schema=schema)
         collection_w.insert(data)
         collection_w.create_index("float_vector", ct.default_index)
         collection_w.load()
@@ -3300,7 +3300,7 @@ def test_search_with_expression_exists(self, exists, json_field_name, _async):
         collection_w = self.init_collection_wrap(schema=schema, enable_dynamic_field=enable_dynamic_field)
         log.info(schema.fields)
         if enable_dynamic_field:
-            data = cf.get_row_data_by_schema(nb, schema=schema)
+            data = cf.gen_row_data_by_schema(nb, schema=schema)
             for i in range(nb):
                 data[i]["new_added_field"] = i
             log.info(data[0])
@@ -3679,7 +3679,7 @@ def test_search_output_array_field(self, enable_dynamic_field):
 
         # 2. insert data
         if enable_dynamic_field:
-            data = cf.get_row_data_by_schema(schema=schema)
+            data = cf.gen_row_data_by_schema(schema=schema)
         else:
             data = cf.gen_array_dataframe_data(auto_id=auto_id)
 
@@ -6891,7 +6891,7 @@ class TestCollectionRangeSearch(TestcaseBase):
     @pytest.fixture(scope="function", params=ct.all_index_types[:7])
     def index_type(self, request):
         tags = request.config.getoption("--tags")
-        if CaseLabel.L2 not in tags or "all" not in tags:
+        if CaseLabel.L2 not in tags:
             if request.param not in ct.L0_index_types:
                 pytest.skip(f"skip index type {request.param}")
         yield request.param
@@ -6899,8 +6899,8 @@ def index_type(self, request):
     @pytest.fixture(scope="function", params=ct.float_metrics)
     def metric(self, request):
         tags = request.config.getoption("--tags")
-        if CaseLabel.L2 not in tags or "all" not in tags:
-            if request.param not in ct.default_L0_metric:
+        if CaseLabel.L2 not in tags:
+            if request.param != ct.default_L0_metric:
                 pytest.skip(f"skip index type {request.param}")
         yield request.param