Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ensure mysql_to_gcs fully compatible with MySQL and BigQuery for datetime-related values #15026

Merged
merged 1 commit into from Jun 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
17 changes: 9 additions & 8 deletions airflow/providers/google/cloud/transfers/mysql_to_gcs.py
Expand Up @@ -18,8 +18,7 @@
"""MySQL to GCS operator."""

import base64
import calendar
from datetime import date, datetime, timedelta
from datetime import date, datetime, time, timedelta
from decimal import Decimal
from typing import Dict

Expand Down Expand Up @@ -102,10 +101,12 @@ def convert_type(self, value, schema_type: str):
Takes a value from MySQLdb, and converts it to a value that's safe for
JSON/Google Cloud Storage/BigQuery.

* Datetimes are converted to UTC seconds.
* Datetimes are converted to `str(value)` (`datetime.isoformat(' ')`)
strings.
* Times are converted to `str((datetime.min + value).time())` strings.
* Decimals are converted to floats.
* Dates are converted to ISO formatted string if given schema_type is
DATE, or UTC seconds otherwise.
* Dates are converted to ISO formatted strings if given schema_type is
DATE, or `datetime.isoformat(' ')` strings otherwise.
* Binary type fields are converted to integer if given schema_type is
INTEGER, or encoded with base64 otherwise. Imported BYTES data must
be base64-encoded according to BigQuery documentation:
Expand All @@ -119,16 +120,16 @@ def convert_type(self, value, schema_type: str):
if value is None:
return value
if isinstance(value, datetime):
value = calendar.timegm(value.timetuple())
value = str(value)
elif isinstance(value, timedelta):
value = value.total_seconds()
value = str((datetime.min + value).time())
elif isinstance(value, Decimal):
value = float(value)
elif isinstance(value, date):
if schema_type == "DATE":
value = value.isoformat()
else:
value = calendar.timegm(value.timetuple())
value = str(datetime.combine(value, time.min))
elif isinstance(value, bytes):
if schema_type == "INTEGER":
value = int.from_bytes(value, "big")
Expand Down
9 changes: 7 additions & 2 deletions tests/providers/google/cloud/transfers/test_mysql_to_gcs.py
Expand Up @@ -88,9 +88,14 @@ def test_init(self):
@parameterized.expand(
[
("string", None, "string"),
(datetime.date(1970, 1, 2), None, 86400),
(datetime.date(1970, 1, 2), None, "1970-01-02 00:00:00"),
(datetime.date(1000, 1, 2), None, "1000-01-02 00:00:00"),
(datetime.date(1970, 1, 2), "DATE", "1970-01-02"),
(datetime.datetime(1970, 1, 1, 1, 0), None, 3600),
(datetime.date(1000, 1, 2), "DATE", "1000-01-02"),
(datetime.datetime(1970, 1, 1, 1, 0), None, "1970-01-01 01:00:00"),
(datetime.datetime(1000, 1, 1, 1, 0), None, "1000-01-01 01:00:00"),
(datetime.timedelta(), None, "00:00:00"),
(datetime.timedelta(hours=23, minutes=59, seconds=59), None, "23:59:59"),
(decimal.Decimal(5), None, 5),
(b"bytes", "BYTES", "Ynl0ZXM="),
(b"\x00\x01", "INTEGER", 1),
Expand Down