VertaAI · hmacdonald-verta · Mar 13, 2023 · Mar 6, 2023 · Mar 8, 2023 · Mar 8, 2023
diff --git a/client/verta/tests/unit_tests/test_deployed_model.py b/client/verta/tests/unit_tests/test_deployed_model.py
@@ -3,6 +3,8 @@
 import os
 from typing import Any, Dict
 
+import numpy as np
+import pandas as pd
 import pytest
 from requests import Session, HTTPError
 from requests.exceptions import RetryError
@@ -15,6 +17,7 @@
 from verta._internal_utils import http_session
 
 PREDICTION_URL: str = 'https://test.dev.verta.ai/api/v1/predict/test_path'
+BATCH_PREDICTION_URL: str = 'https://test.dev.verta.ai/api/v1/batch-predict/test_path'
 TOKEN: str = '12345678-xxxx-1a2b-3c4d-e5f6g7h8'
 MOCK_RETRY: Retry = http_session.retry_config(
     max_retries=http_session.DEFAULT_MAX_RETRIES,
@@ -379,3 +382,134 @@ def test_predict_400_error_message_missing(mocked_responses) -> None:
         '400 Client Error: Bad Request for url: '
         'https://test.dev.verta.ai/api/v1/predict/test_path at '
     )
+
+
+def test_batch_predict_with_one_batch_with_no_output_index(mocked_responses) -> None:
+    """ Call batch_predict with a single batch. """
+    expected_df = pd.DataFrame({"A": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], "B": [11, 12, 13, 14, 15, 16, 17, 18, 19, 20]})
+    expected_df_json = expected_df.to_dict(orient="index")
+    mocked_responses.post(
+        BATCH_PREDICTION_URL,
+        json=expected_df_json,
+        status=200,
+        headers={"verta-request-id": "hereISaTESTidFROMtheUSER"},
+        )
+    creds = EmailCredentials.load_from_os_env()
+    dm = DeployedModel(
+        prediction_url=PREDICTION_URL,
+        creds=creds,
+        token=TOKEN,
+        )
+    # the input below is entirely irrelevant since it"s smaller than the batch size
-    # the input below is entirely irrelevant since it"s smaller than the batch size
+    # the input below is entirely irrelevant since it's smaller than the batch size
-    # the input below is entirely irrelevant since it"s smaller than the batch size
+    # the input below is entirely irrelevant since it's smaller than the batch size
+    prediction_df = dm.batch_predict(pd.DataFrame({"hi": "bye"}, index=[1]), 10)
+    # Since no index was provided, we can"t guarantee the index type for assertions
+    pd.testing.assert_frame_equal(expected_df.reset_index(drop=True), prediction_df.reset_index(drop=True))
+
+
+def test_batch_predict_with_one_batch_with_output_index(mocked_responses) -> None:
+    """ Call batch_predict with a single batch, where the output has an index. """
+    expected_df = pd.DataFrame({"A": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], "B": [11, 12, 13, 14, 15, 16, 17, 18, 19, 20]}, index=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"])
+    expected_df_json = expected_df.to_dict(orient="index")
+    mocked_responses.post(
+        BATCH_PREDICTION_URL,
+        json=expected_df_json,
+        status=200,
+        headers={"verta-request-id": "hereISaTESTidFROMtheUSER"},
+        )
+    creds = EmailCredentials.load_from_os_env()
+    dm = DeployedModel(
+        prediction_url=PREDICTION_URL,
+        creds=creds,
+        token=TOKEN,
+        )
+    # the input below is entirely irrelevant since it"s smaller than the batch size
+    prediction_df = dm.batch_predict(pd.DataFrame({"hi": "bye"}, index=[1]), 10)
+    # Since an index WAS provided, we should be able to assert with indexes included
+    pd.testing.assert_frame_equal(expected_df, prediction_df)
+
+
+def test_batch_predict_with_five_batches_of_one_with_no_indexes(mocked_responses) -> None:
+    """ Call batch_predict with five batches. """
+    expected_d_list = [pd.DataFrame({"A": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}),
-    expected_d_list = [pd.DataFrame({"A": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}),
+    expected_df_list = [pd.DataFrame({"A": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}),
-    expected_d_list = [pd.DataFrame({"A": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}),
+    expected_df_list = [pd.DataFrame({"A": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}),
+                       pd.DataFrame({"B": [11, 12, 13, 14, 15, 16, 17, 18, 19, 20]}),
+                       pd.DataFrame({"C": [21, 22, 23, 24, 25, 26, 27, 28, 29, 30]}),
+                       pd.DataFrame({"D": [31, 32, 33, 34, 35, 36, 37, 38, 39, 40]}),
+                       pd.DataFrame({"E": [41, 42, 43, 44, 45, 46, 47, 48, 49, 50]}),
+                       ]
+    for expected_d in expected_d_list:
+        mocked_responses.add(
+            responses.POST,
+            BATCH_PREDICTION_URL,
+            json=expected_d.to_dict(orient="index"),
+            status=200,
+            headers={"verta-request-id": "hereISaTESTidFROMtheUSER"},
+            )
+    creds = EmailCredentials.load_from_os_env()
+    dm = DeployedModel(
+        prediction_url=PREDICTION_URL,
+        creds=creds,
+        token=TOKEN,
+        )
+    input_df = pd.DataFrame({"a": [1, 2, 3, 4, 5], "b": [11, 12, 13, 14, 15]})
+    prediction_df = dm.batch_predict(input_df, 1)
+    expected_final_df = pd.concat(expected_d_list)
+    # Since no index was provided, we can"t guarantee the index type for assertions
-    # Since no index was provided, we can"t guarantee the index type for assertions
+    # Since no index was provided, we can't guarantee the index type for assertions
-    # Since no index was provided, we can"t guarantee the index type for assertions
+    # Since no index was provided, we can't guarantee the index type for assertions
+    pd.testing.assert_frame_equal(expected_final_df.reset_index(drop=True), prediction_df.reset_index(drop=True))
+
+
+def test_batch_predict_with_five_batches_of_one_with_indexes(mocked_responses) -> None:
+    """ CCall batch_predict with five batches, where each dataframe has an explicitly defined index. """
+    expected_d_list = [pd.DataFrame({"A": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}, index=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]),
+                       pd.DataFrame({"B": [11, 12, 13, 14, 15, 16, 17, 18, 19, 20]}, index=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]),
+                       pd.DataFrame({"C": [21, 22, 23, 24, 25, 26, 27, 28, 29, 30]}, index=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]),
+                       pd.DataFrame({"D": [31, 32, 33, 34, 35, 36, 37, 38, 39, 40]}, index=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]),
+                       pd.DataFrame({"E": [41, 42, 43, 44, 45, 46, 47, 48, 49, 50]}, index=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]),
+                       ]
+    for expected_d in expected_d_list:
+        mocked_responses.add(
+            responses.POST,
+            BATCH_PREDICTION_URL,
+            json=expected_d.to_dict(orient="index"),
+            status=200,
+            headers={"verta-request-id": "hereISaTESTidFROMtheUSER"},
+            )
+    creds = EmailCredentials.load_from_os_env()
+    dm = DeployedModel(
+        prediction_url=PREDICTION_URL,
+        creds=creds,
+        token=TOKEN,
+        )
+    input_df = pd.DataFrame({"a": [1, 2, 3, 4, 5], "b": [11, 12, 13, 14, 15]}, index=["A", "B", "C", "D", "E"])
+    prediction_df = dm.batch_predict(input_df, 1)
+    expected_final_df = pd.concat(expected_d_list)
+    pd.testing.assert_frame_equal(expected_final_df, prediction_df)
+
+
+
+
+def test_batch_predict_with_five_batches_with_nans(mocked_responses) -> None:
+    """ CCall batch_predict with five batches, where each dataframe has an explicitly defined index. """
+    expected_d_list = [pd.DataFrame({"A": [1, 2, 3, 4, 5, np.nan, 7, 8, 9, 10]}, index=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]),
+                       pd.DataFrame({"B": [11, 12, 13, 14, 15, 16, 17, 18, 19, 20]}, index=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]),
+                       pd.DataFrame({"C": [21, 22, np.nan, 24, 25, 26, 27, 28, 29, 30]}, index=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]),
+                       pd.DataFrame({"D": [31, 32, 33, 34, 35, 36, 37, np.nan, 39, 40]}, index=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]),
+                       pd.DataFrame({"E": [41, 42, 43, 44, np.nan, 46, 47, 48, 49, 50]}, index=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]),
+                       ]
+    for expected_d in expected_d_list:
+        mocked_responses.add(
+            responses.POST,
+            BATCH_PREDICTION_URL,
+            json=expected_d.to_dict(orient="index"),
+            status=200,
+            headers={"verta-request-id": "hereISaTESTidFROMtheUSER"},
+            )
+    creds = EmailCredentials.load_from_os_env()
+    dm = DeployedModel(
+        prediction_url=PREDICTION_URL,
+        creds=creds,
+        token=TOKEN,
+        )
+    input_df = pd.DataFrame({"a": [1, 2, np.nan, 4, 5], "b": [11, np.nan, 13, 14, 15]}, index=["A", "B", "C", "D", "E"])
+    prediction_df = dm.batch_predict(input_df, 1)
+    expected_final_df = pd.concat(expected_d_list)
+    pd.testing.assert_frame_equal(expected_final_df, prediction_df)