Skip to content

Commit

Permalink
[Datastore] Fix ValueError when parsing a timestamp string column w…
Browse files Browse the repository at this point in the history
…ith pandas 1 (mlrun#5053)
  • Loading branch information
gtopper committed Feb 4, 2024
1 parent d4a27e5 commit d978962
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 2 deletions.
12 changes: 11 additions & 1 deletion mlrun/datastore/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from urllib.parse import parse_qs, urlparse, urlunparse

import pandas as pd
import semver

import mlrun.datastore

Expand Down Expand Up @@ -137,7 +138,16 @@ def filter_df_generator(
def _execute_time_filter(
df: pd.DataFrame, time_column: str, start_time: pd.Timestamp, end_time: pd.Timestamp
):
df[time_column] = pd.to_datetime(df[time_column], format="mixed", yearfirst=True)
if semver.parse(pd.__version__)["major"] >= 2:
# pandas 2 is too strict by default (ML-5629)
kwargs = {
"format": "mixed",
"yearfirst": True,
}
else:
# pandas 1 may fail on format "mixed" (ML-5661)
kwargs = {}
df[time_column] = pd.to_datetime(df[time_column], **kwargs)
if start_time:
df = df[df[time_column] > start_time]
if end_time:
Expand Down
2 changes: 1 addition & 1 deletion tests/datastore/test_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def test_kafka_source_with_new_nuclio():
assert function.spec.max_replicas == 2


# ML-5629
# ML-5629 (pandas 2), ML-5661 (pandas 1)
def test_timestamp_format_inference(rundb_mock):
source = CSVSource(
path=str(pathlib.Path(__file__).parent / "assets" / "mixed_timestamps.csv")
Expand Down

0 comments on commit d978962

Please sign in to comment.