diff --git a/pkg/workloads/spark_job/test/integration/iris_context.py b/pkg/workloads/spark_job/test/integration/iris_context.py index d09d8e5235..5379b90887 100644 --- a/pkg/workloads/spark_job/test/integration/iris_context.py +++ b/pkg/workloads/spark_job/test/integration/iris_context.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import consts + """ HOW TO GENERATE CONTEXT @@ -25,10 +27,16 @@ from lib.storage import S3 bucket, key = S3.deconstruct_s3_path('s3:///apps//contexts/.msgpack') S3(bucket, client_config={}).get_msgpack(key) - -5. Modify environment_data.csv_data.path to point to the correct input data file """ + +def get(input_data_path): + raw_ctx["environment_data"]["csv_data"]["path"] = input_data_path + raw_ctx["cortex_config"]["api_version"] = consts.CORTEX_VERSION + + return raw_ctx + + raw_ctx = { "raw_dataset": { "key": "apps/iris/data/2019-03-08-09-58-35-701834/3976c5679bcf7cb550453802f4c3a9333c5f193f6097f1f5642de48d2397554/data_raw/raw.parquet", diff --git a/pkg/workloads/spark_job/test/integration/iris_test.py b/pkg/workloads/spark_job/test/integration/iris_test.py index 86a72c6f0f..73ff1e9ec1 100644 --- a/pkg/workloads/spark_job/test/integration/iris_test.py +++ b/pkg/workloads/spark_job/test/integration/iris_test.py @@ -17,7 +17,7 @@ from spark_job import spark_job from lib.exceptions import UserException from lib import Context -from test.integration.iris_context import raw_ctx +from test.integration import iris_context import pytest from pyspark.sql.types import * @@ -54,6 +54,10 @@ def test_simple_end_to_end(spark): local_storage_path = Path("/workspace/local_storage") local_storage_path.mkdir(parents=True, exist_ok=True) should_ingest = True + input_data_path = os.path.join(str(local_storage_path), "iris.csv") + + raw_ctx = iris_context.get(input_data_path) + workload_id = raw_ctx["raw_columns"]["raw_float_columns"]["sepal_length"]["workload_id"] cols_to_validate = [] @@ -65,10 +69,6 @@ def test_simple_end_to_end(spark): iris_data_string = "\n".join(",".join(str(val) for val in line) for line in iris_data) Path(os.path.join(str(local_storage_path), "iris.csv")).write_text(iris_data_string) - raw_ctx["environment_data"]["csv_data"]["path"] = os.path.join( - str(local_storage_path), "iris.csv" - ) - ctx = Context( raw_obj=raw_ctx, cache_dir="/workspace/cache", local_storage_path=str(local_storage_path) )