Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Format timestamps as timestamps #230

Merged
merged 7 commits into from
Aug 5, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion tests/endpoints/test_cargo_movements_real.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,22 @@ def test_to_df_all_columns(self):

assert len(df) == 2

def test_timestamp_columns(self):
df = (
CargoMovements()
.search(
filter_activity="loading_state",
filter_products="6f11b0724c9a4e85ffa7f1445bc768f054af755a090118dcf99f14745c261653",
filter_time_min=datetime(2019, 8, 29),
filter_time_max=datetime(2019, 8, 29, 0, 10),
)
.to_df(columns="all")
.head(2)
)

self.assertEqual(str(df['events.cargo_port_load_event.0.end_timestamp'].dtypes), 'datetime64[ns, UTC]')
self.assertEqual(str(df['events.cargo_port_unload_event.0.end_timestamp'].dtypes), 'datetime64[ns, UTC]')

def test_search_single_filter_origin_name(self):
df = (
CargoMovements()
Expand Down Expand Up @@ -233,5 +249,5 @@ def test_speed(self):
# Check we load a reasonable number of cargo movements in a short enough period of time
assert len(df) > 500
assert t_search.interval < 10
assert t_to_list.interval < 5
assert t_to_list.interval < 10
assert t_to_df.interval < 5
6 changes: 5 additions & 1 deletion vortexasdk/endpoints/timeseries_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,16 @@ def to_df(self, columns=None) -> pd.DataFrame:
the number of cargo movements contributing towards this day's tonnage.

"""
return create_dataframe(
df = create_dataframe(
columns=columns,
default_columns=DEFAULT_COLUMNS,
data=super().to_list(),
logger_description="TimeSeries",
)

df["key"] = pd.to_datetime(df["key"])

return df


DEFAULT_COLUMNS = ["key", "value", "count"]
18 changes: 15 additions & 3 deletions vortexasdk/result_conversions.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,16 @@ def create_list(list_of_dicts, output_class: FromDictMixin) -> List:
return [output_class.from_dict(d) for d in list_of_dicts]


def format_datatypes(df: pd.DataFrame) -> pd.DataFrame:
"""Format the relevant columns with sensible datatypes"""
timestamp_cols = [col for col in df.columns if "timestamp" in col]

for col in timestamp_cols:
df[col] = pd.to_datetime(df[col])

return df


def create_dataframe(
columns: Union[None, List[str]],
default_columns: List[str],
Expand All @@ -30,8 +40,10 @@ def create_dataframe(
logger.debug(f"Creating DataFrame of {logger_description}")

if columns is None:
return pd.DataFrame(data=data, columns=default_columns)
df = pd.DataFrame(data=data, columns=default_columns)
elif columns == "all":
return pd.DataFrame(data=data)
df = pd.DataFrame(data=data)
else:
return pd.DataFrame(data=data, columns=columns)
df = pd.DataFrame(data=data, columns=columns)
Comment on lines 42 to +47
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could simplify this slightly

columns = columns or default_columns # handles None 'if' clause
if columns == 'all':
    df = pd.DataFrame(data=data)
else:
    df = pd.DataFrame(data=data, columns=columns)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is definitely fewer lines of code - but I think it's slightly more difficult to read for the user: we mutate the columns variable and then handle it in the if statement - rather than explicitly dealing with the three posibilities for the columns variable..

Copy link
Contributor

@cvonsteg cvonsteg Aug 5, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your call!


return format_datatypes(df)