From 1823cf89382d3755fa9ba4d8173044ce393c8d97 Mon Sep 17 00:00:00 2001 From: christoper burgess Date: Wed, 5 Aug 2020 13:49:24 +0100 Subject: [PATCH 1/4] feat: Format timestamps as timestamps --- vortexasdk/endpoints/timeseries_result.py | 6 +++++- vortexasdk/result_conversions.py | 17 ++++++++++++++--- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/vortexasdk/endpoints/timeseries_result.py b/vortexasdk/endpoints/timeseries_result.py index 132795a3..8c238975 100644 --- a/vortexasdk/endpoints/timeseries_result.py +++ b/vortexasdk/endpoints/timeseries_result.py @@ -33,12 +33,16 @@ def to_df(self, columns=None) -> pd.DataFrame: the number of cargo movements contributing towards this day's tonnage. """ - return create_dataframe( + df = create_dataframe( columns=columns, default_columns=DEFAULT_COLUMNS, data=super().to_list(), logger_description="TimeSeries", ) + df["key"] = pd.to_datetime(df["key"]) + + return df + DEFAULT_COLUMNS = ["key", "value", "count"] diff --git a/vortexasdk/result_conversions.py b/vortexasdk/result_conversions.py index 2ff41f02..907c888c 100644 --- a/vortexasdk/result_conversions.py +++ b/vortexasdk/result_conversions.py @@ -14,6 +14,15 @@ def create_list(list_of_dicts, output_class: FromDictMixin) -> List: return [output_class.from_dict(d) for d in list_of_dicts] +def format_datatypes(df: pd.DataFrame) -> pd.DataFrame: + """Format the relevant columns with sensible datatypes""" + timestamp_cols = [c for c in df.columns if "timestamp" in c] + + df[timestamp_cols] = df[timestamp_cols].apply(pd.to_datetime) + + return df + + def create_dataframe( columns: Union[None, List[str]], default_columns: List[str], @@ -30,8 +39,10 @@ def create_dataframe( logger.debug(f"Creating DataFrame of {logger_description}") if columns is None: - return pd.DataFrame(data=data, columns=default_columns) + df = pd.DataFrame(data=data, columns=default_columns) elif columns == "all": - return pd.DataFrame(data=data) + df = pd.DataFrame(data=data) else: - return pd.DataFrame(data=data, columns=columns) + df = pd.DataFrame(data=data, columns=columns) + + return format_datatypes(df) From 8e5a3f3098238d64347de8977440630f9d8258d3 Mon Sep 17 00:00:00 2001 From: christoper burgess Date: Wed, 5 Aug 2020 14:25:54 +0100 Subject: [PATCH 2/4] test: Test timestamp dtype --- tests/endpoints/test_cargo_movements_real.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/endpoints/test_cargo_movements_real.py b/tests/endpoints/test_cargo_movements_real.py index d61e6df8..13b6c9d8 100644 --- a/tests/endpoints/test_cargo_movements_real.py +++ b/tests/endpoints/test_cargo_movements_real.py @@ -120,6 +120,22 @@ def test_to_df_all_columns(self): assert len(df) == 2 + def test_timestamp_columns(self): + df = ( + CargoMovements() + .search( + filter_activity="loading_state", + filter_products="6f11b0724c9a4e85ffa7f1445bc768f054af755a090118dcf99f14745c261653", + filter_time_min=datetime(2019, 8, 29), + filter_time_max=datetime(2019, 8, 29, 0, 10), + ) + .to_df(columns="all") + .head(2) + ) + + self.assertEqual(str(df['events.cargo_port_load_event.0.end_timestamp'].dtypes), 'datetime64[ns, UTC]') + self.assertEqual(str(df['events.cargo_port_unload_event.0.end_timestamp'].dtypes), 'datetime64[ns, UTC]') + def test_search_single_filter_origin_name(self): df = ( CargoMovements() From d83d7a4550e1d4dad72f02ff8b7523cdc851c233 Mon Sep 17 00:00:00 2001 From: christoper burgess Date: Wed, 5 Aug 2020 17:05:52 +0100 Subject: [PATCH 3/4] perf: Incorporate @cvonsteg feedback --- vortexasdk/result_conversions.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vortexasdk/result_conversions.py b/vortexasdk/result_conversions.py index 907c888c..97b5ae6f 100644 --- a/vortexasdk/result_conversions.py +++ b/vortexasdk/result_conversions.py @@ -16,9 +16,10 @@ def create_list(list_of_dicts, output_class: FromDictMixin) -> List: def format_datatypes(df: pd.DataFrame) -> pd.DataFrame: """Format the relevant columns with sensible datatypes""" - timestamp_cols = [c for c in df.columns if "timestamp" in c] + timestamp_cols = [col for col in df.columns if "timestamp" in col] - df[timestamp_cols] = df[timestamp_cols].apply(pd.to_datetime) + for col in timestamp_cols: + df[col] = pd.to_datetime(df[col]) return df From 7c8a2ce8b9d878b1fbced0c319e2d238a05dae0c Mon Sep 17 00:00:00 2001 From: christoper burgess Date: Wed, 5 Aug 2020 17:14:39 +0100 Subject: [PATCH 4/4] chore: Unrelated speed test --- tests/endpoints/test_cargo_movements_real.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/endpoints/test_cargo_movements_real.py b/tests/endpoints/test_cargo_movements_real.py index 13b6c9d8..808ef2ca 100644 --- a/tests/endpoints/test_cargo_movements_real.py +++ b/tests/endpoints/test_cargo_movements_real.py @@ -249,5 +249,5 @@ def test_speed(self): # Check we load a reasonable number of cargo movements in a short enough period of time assert len(df) > 500 assert t_search.interval < 10 - assert t_to_list.interval < 5 + assert t_to_list.interval < 10 assert t_to_df.interval < 5