From 0a6ea572fb5340a904e9cefaa656ac0127b15216 Mon Sep 17 00:00:00 2001 From: Sasan Ahmadi Date: Sun, 6 Feb 2022 11:07:09 -0800 Subject: [PATCH] Fix to check if values are integer or float and convert accordingly. (#21277) This code will prevent the loss of data if the value is a float it will convert to float if it is not then int. It will use pd.Float64Dtype() for floats instead of using the the pd.Int64Dtype(). Since there could be floating-point values in the array this will fix the exception for safely casting the array to data type. fixes error when using mysql_to_s3 (TypeError: cannot safely cast non-equivalent object to int64) #16919 --- airflow/providers/amazon/aws/transfers/sql_to_s3.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/airflow/providers/amazon/aws/transfers/sql_to_s3.py b/airflow/providers/amazon/aws/transfers/sql_to_s3.py index 696b4fef2f8e4..8fe44de268e67 100644 --- a/airflow/providers/amazon/aws/transfers/sql_to_s3.py +++ b/airflow/providers/amazon/aws/transfers/sql_to_s3.py @@ -127,10 +127,14 @@ def _fix_int_dtypes(df: pd.DataFrame) -> None: if "float" in df[col].dtype.name and df[col].hasnans: # inspect values to determine if dtype of non-null values is int or float notna_series = df[col].dropna().values - if np.isclose(notna_series, notna_series.astype(int)).all(): + if np.equal(notna_series, notna_series.astype(int)).all(): # set to dtype that retains integers and supports NaNs df[col] = np.where(df[col].isnull(), None, df[col]) df[col] = df[col].astype(pd.Int64Dtype()) + elif np.isclose(notna_series, notna_series.astype(int)).all(): + # set to float dtype that retains floats and supports NaNs + df[col] = np.where(df[col].isnull(), None, df[col]) + df[col] = df[col].astype(pd.Float64Dtype()) def execute(self, context: 'Context') -> None: sql_hook = self._get_hook()