From 21cabc7c65b8e190b5bc342e96a31bea35545263 Mon Sep 17 00:00:00 2001 From: Bichitra Kumar Sahoo <32828151+bichitra95@users.noreply.github.com> Date: Mon, 24 Jul 2023 17:37:47 +0530 Subject: [PATCH] fix databricks datatype cast and support for int type decimal --- soda/spark/soda/data_sources/spark_data_source.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/soda/spark/soda/data_sources/spark_data_source.py b/soda/spark/soda/data_sources/spark_data_source.py index b66b60eae..033f4ed82 100644 --- a/soda/spark/soda/data_sources/spark_data_source.py +++ b/soda/spark/soda/data_sources/spark_data_source.py @@ -425,6 +425,7 @@ class SparkDataSource(SparkSQLBase): def __init__(self, logs: Logs, data_source_name: str, data_source_properties: dict): super().__init__(logs, data_source_name, data_source_properties) + self.NUMERIC_TYPES_FOR_PROFILING = ["integer", "int", "double", "float", "decimal"] self.method = data_source_properties.get("method", "hive") self.host = data_source_properties.get("host", "localhost") @@ -474,3 +475,6 @@ def connect(self): self.connection = connection except Exception as e: raise DataSourceConnectionError(self.type, e) + + def cast_to_text(self, expr: str) -> str: + return f"CAST({expr} AS VARCHAR(100))"