Skip to content

Commit

Permalink
Handle different date formats in clustering (#562)
Browse files Browse the repository at this point in the history
  • Loading branch information
brynpickering committed Feb 13, 2024
1 parent 36466f9 commit 4e77873
Show file tree
Hide file tree
Showing 6 changed files with 212 additions and 175 deletions.
26 changes: 22 additions & 4 deletions src/calliope/preprocess/time.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
"""
import logging
from pathlib import Path
from typing import overload

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -148,7 +149,7 @@ def cluster(data: xr.Dataset, clustering_file: str | Path, time_format: str):
Args:
data (xarray.Dataset): Calliope model data, containing only timeseries data variables.
clustering_file (str | Path): Path to file containing rows of dates and the corresponding datestamp to which they are to be clustered.
time_format (str): The format that dates in `clustering_file` have been defined (e.g., "%Y-%m-%d").
time_format (str): The format that dates in `clustering_file` have been defined (e.g., "%Y-%m-%d" or "ISO8601").
Returns:
xarray.Dataset:
Expand All @@ -157,9 +158,11 @@ def cluster(data: xr.Dataset, clustering_file: str | Path, time_format: str):
"""
clustering_timeseries = pd.read_csv(clustering_file, index_col=0).squeeze()
clustering_timeseries.index = _datetime_index(
clustering_timeseries.index, time_format
clustering_timeseries.index + " 00:00:00", time_format
)
representative_days = pd.to_datetime(clustering_timeseries.dropna()).dt.date
representative_days = _datetime_index(
clustering_timeseries.dropna() + " 00:00:00", time_format
).dt.date
grouper = representative_days.to_frame("clusters").groupby("clusters")
data_new = data.sel(
timesteps=data.timesteps.dt.date.isin(representative_days.values)
Expand All @@ -180,14 +183,29 @@ def cluster(data: xr.Dataset, clustering_file: str | Path, time_format: str):
return data_new


@overload
def _datetime_index(index: pd.Index, format: str) -> pd.Index:
"Pass pandas Index"


@overload
def _datetime_index(index: pd.Series, format: str) -> pd.Series:
"Pass pandas Series"


def _datetime_index(index: pd.Index | pd.Series, format: str) -> pd.Index | pd.Series:
try:
return pd.to_datetime(index, format=format)
if format == "ISO8601":
dt = pd.to_datetime(index, format=format)
else:
dt = pd.to_datetime(index, format=format, exact=False)
except ValueError as e:
raise exceptions.ModelError(
f"Error in parsing dates in timeseries data from using datetime format `{format}`. "
f"Full error: {e}"
)
else:
return dt


def _check_time_subset(ts_index: pd.Index, time_subset: list[str]):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
,clusters
01/01/2005,01/01/2005
02/01/2005,03/01/2005
03/01/2005,03/01/2005
04/01/2005,01/01/2005
05/01/2005,01/01/2005
240 changes: 120 additions & 120 deletions tests/common/test_model/data_sources/demand_heat_diff_dateformat.csv
Original file line number Diff line number Diff line change
@@ -1,121 +1,121 @@
,a,b
01/01/2005 00:00:00,0,0
01/01/2005 01:00:00,0,0
01/01/2005 02:00:00,0,0
01/01/2005 03:00:00,0,0
01/01/2005 04:00:00,0,0
01/01/2005 05:00:00,0,0
01/01/2005 06:00:00,10,10
01/01/2005 07:00:00,10,10
01/01/2005 08:00:00,10,10
01/01/2005 09:00:00,5,5
01/01/2005 10:00:00,5,5
01/01/2005 11:00:00,5,5
01/01/2005 12:00:00,5,5
01/01/2005 13:00:00,5,5
01/01/2005 14:00:00,5,5
01/01/2005 15:00:00,10,10
01/01/2005 16:00:00,10,10
01/01/2005 17:00:00,10,10
01/01/2005 18:00:00,10,10
01/01/2005 19:00:00,10,10
01/01/2005 20:00:00,10,10
01/01/2005 21:00:00,0,0
01/01/2005 22:00:00,0,0
01/01/2005 23:00:00,0,0
02/01/2005 00:00:00,0,0
02/01/2005 01:00:00,0,0
02/01/2005 02:00:00,0,0
02/01/2005 03:00:00,0,0
02/01/2005 04:00:00,0,0
02/01/2005 05:00:00,0,0
02/01/2005 06:00:00,10,10
02/01/2005 07:00:00,10,10
02/01/2005 08:00:00,10,10
02/01/2005 09:00:00,5,5
02/01/2005 10:00:00,5,5
02/01/2005 11:00:00,5,5
02/01/2005 12:00:00,5,5
02/01/2005 13:00:00,5,5
02/01/2005 14:00:00,5,5
02/01/2005 15:00:00,10,10
02/01/2005 16:00:00,10,10
02/01/2005 17:00:00,10,10
02/01/2005 18:00:00,10,10
02/01/2005 19:00:00,10,10
02/01/2005 20:00:00,10,10
02/01/2005 21:00:00,0,0
02/01/2005 22:00:00,0,0
02/01/2005 23:00:00,0,0
03/01/2005 00:00:00,0,0
03/01/2005 01:00:00,0,0
03/01/2005 02:00:00,0,0
03/01/2005 03:00:00,0,0
03/01/2005 04:00:00,0,0
03/01/2005 05:00:00,0,0
03/01/2005 06:00:00,10,10
03/01/2005 07:00:00,10,10
03/01/2005 08:00:00,10,10
03/01/2005 09:00:00,5,5
03/01/2005 10:00:00,5,5
03/01/2005 11:00:00,5,5
03/01/2005 12:00:00,5,5
03/01/2005 13:00:00,5,5
03/01/2005 14:00:00,5,5
03/01/2005 15:00:00,10,10
03/01/2005 16:00:00,10,10
03/01/2005 17:00:00,10,10
03/01/2005 18:00:00,10,10
03/01/2005 19:00:00,10,10
03/01/2005 20:00:00,10,10
03/01/2005 21:00:00,0,0
03/01/2005 22:00:00,0,0
03/01/2005 23:00:00,0,0
04/01/2005 00:00:00,0,0
04/01/2005 01:00:00,0,0
04/01/2005 02:00:00,0,0
04/01/2005 03:00:00,0,0
04/01/2005 04:00:00,0,0
04/01/2005 05:00:00,0,0
04/01/2005 06:00:00,10,10
04/01/2005 07:00:00,10,10
04/01/2005 08:00:00,10,10
04/01/2005 09:00:00,5,5
04/01/2005 10:00:00,5,5
04/01/2005 11:00:00,5,5
04/01/2005 12:00:00,5,5
04/01/2005 13:00:00,5,5
04/01/2005 14:00:00,5,5
04/01/2005 15:00:00,10,10
04/01/2005 16:00:00,10,10
04/01/2005 17:00:00,10,10
04/01/2005 18:00:00,10,10
04/01/2005 19:00:00,10,10
04/01/2005 20:00:00,10,10
04/01/2005 21:00:00,0,0
04/01/2005 22:00:00,0,0
04/01/2005 23:00:00,0,0
05/01/2005 00:00:00,0,0
05/01/2005 01:00:00,0,0
05/01/2005 02:00:00,0,0
05/01/2005 03:00:00,0,0
05/01/2005 04:00:00,0,0
05/01/2005 05:00:00,0,0
05/01/2005 06:00:00,10,10
05/01/2005 07:00:00,10,10
05/01/2005 08:00:00,10,10
05/01/2005 09:00:00,5,5
05/01/2005 10:00:00,5,5
05/01/2005 11:00:00,5,5
05/01/2005 12:00:00,5,5
05/01/2005 13:00:00,5,5
05/01/2005 14:00:00,5,5
05/01/2005 15:00:00,10,10
05/01/2005 16:00:00,10,10
05/01/2005 17:00:00,10,10
05/01/2005 18:00:00,10,10
05/01/2005 19:00:00,10,10
05/01/2005 20:00:00,10,10
05/01/2005 21:00:00,0,0
05/01/2005 22:00:00,0,0
05/01/2005 23:00:00,0,0
01/01/2005 00:00,0,0
01/01/2005 01:00,0,0
01/01/2005 02:00,0,0
01/01/2005 03:00,0,0
01/01/2005 04:00,0,0
01/01/2005 05:00,0,0
01/01/2005 06:00,10,10
01/01/2005 07:00,10,10
01/01/2005 08:00,10,10
01/01/2005 09:00,5,5
01/01/2005 10:00,5,5
01/01/2005 11:00,5,5
01/01/2005 12:00,5,5
01/01/2005 13:00,5,5
01/01/2005 14:00,5,5
01/01/2005 15:00,10,10
01/01/2005 16:00,10,10
01/01/2005 17:00,10,10
01/01/2005 18:00,10,10
01/01/2005 19:00,10,10
01/01/2005 20:00,10,10
01/01/2005 21:00,0,0
01/01/2005 22:00,0,0
01/01/2005 23:00,0,0
02/01/2005 00:00,0,0
02/01/2005 01:00,0,0
02/01/2005 02:00,0,0
02/01/2005 03:00,0,0
02/01/2005 04:00,0,0
02/01/2005 05:00,0,0
02/01/2005 06:00,10,10
02/01/2005 07:00,10,10
02/01/2005 08:00,10,10
02/01/2005 09:00,5,5
02/01/2005 10:00,5,5
02/01/2005 11:00,5,5
02/01/2005 12:00,5,5
02/01/2005 13:00,5,5
02/01/2005 14:00,5,5
02/01/2005 15:00,10,10
02/01/2005 16:00,10,10
02/01/2005 17:00,10,10
02/01/2005 18:00,10,10
02/01/2005 19:00,10,10
02/01/2005 20:00,10,10
02/01/2005 21:00,0,0
02/01/2005 22:00,0,0
02/01/2005 23:00,0,0
03/01/2005 00:00,0,0
03/01/2005 01:00,0,0
03/01/2005 02:00,0,0
03/01/2005 03:00,0,0
03/01/2005 04:00,0,0
03/01/2005 05:00,0,0
03/01/2005 06:00,10,10
03/01/2005 07:00,10,10
03/01/2005 08:00,10,10
03/01/2005 09:00,5,5
03/01/2005 10:00,5,5
03/01/2005 11:00,5,5
03/01/2005 12:00,5,5
03/01/2005 13:00,5,5
03/01/2005 14:00,5,5
03/01/2005 15:00,10,10
03/01/2005 16:00,10,10
03/01/2005 17:00,10,10
03/01/2005 18:00,10,10
03/01/2005 19:00,10,10
03/01/2005 20:00,10,10
03/01/2005 21:00,0,0
03/01/2005 22:00,0,0
03/01/2005 23:00,0,0
04/01/2005 00:00,0,0
04/01/2005 01:00,0,0
04/01/2005 02:00,0,0
04/01/2005 03:00,0,0
04/01/2005 04:00,0,0
04/01/2005 05:00,0,0
04/01/2005 06:00,10,10
04/01/2005 07:00,10,10
04/01/2005 08:00,10,10
04/01/2005 09:00,5,5
04/01/2005 10:00,5,5
04/01/2005 11:00,5,5
04/01/2005 12:00,5,5
04/01/2005 13:00,5,5
04/01/2005 14:00,5,5
04/01/2005 15:00,10,10
04/01/2005 16:00,10,10
04/01/2005 17:00,10,10
04/01/2005 18:00,10,10
04/01/2005 19:00,10,10
04/01/2005 20:00,10,10
04/01/2005 21:00,0,0
04/01/2005 22:00,0,0
04/01/2005 23:00,0,0
05/01/2005 00:00,0,0
05/01/2005 01:00,0,0
05/01/2005 02:00,0,0
05/01/2005 03:00,0,0
05/01/2005 04:00,0,0
05/01/2005 05:00,0,0
05/01/2005 06:00,10,10
05/01/2005 07:00,10,10
05/01/2005 08:00,10,10
05/01/2005 09:00,5,5
05/01/2005 10:00,5,5
05/01/2005 11:00,5,5
05/01/2005 12:00,5,5
05/01/2005 13:00,5,5
05/01/2005 14:00,5,5
05/01/2005 15:00,10,10
05/01/2005 16:00,10,10
05/01/2005 17:00,10,10
05/01/2005 18:00,10,10
05/01/2005 19:00,10,10
05/01/2005 20:00,10,10
05/01/2005 21:00,0,0
05/01/2005 22:00,0,0
05/01/2005 23:00,0,0
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
2005-01-01 21:00:00,0,0
2005-01-01 22:00:00,0,0
2005-01-01 23:00:00,0,0
02/01/2005 00:00:00,0,0
02/01/2005 00:00,0,0
2005-01-02 01:00:00,0,0
2005-01-02 02:00:00,0,0
2005-01-02 03:00:00,0,0
Expand Down
46 changes: 0 additions & 46 deletions tests/test_core_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,52 +209,6 @@ def override(param):
scenario="simple_supply",
)

def test_change_date_format(self):
"""
Test the date parser catches a different date format from file than
user input/default (inc. if it is just one line of a file that is incorrect)
"""

# should pass: changing datetime format from default
override = AttrDict.from_yaml_string(
"""
config.init.time_format: "%d/%m/%Y %H:%M:%S"
data_sources:
demand_elec.source: data_sources/demand_heat_diff_dateformat.csv
demand_heat.source: data_sources/demand_heat_diff_dateformat.csv
"""
)
model = build_model(override_dict=override, scenario="simple_conversion")
assert all(
model.inputs.timesteps.to_index()
== pd.date_range("2005-01", "2005-01-02 23:00:00", freq="H")
)

def test_incorrect_date_format_one(self):
# should fail: wrong dateformat input for one file
override = AttrDict.from_yaml_string(
"data_sources.demand_elec.source: data_sources/demand_heat_diff_dateformat.csv"
)

with pytest.raises(exceptions.ModelError):
build_model(override_dict=override, scenario="simple_conversion")

def test_incorrect_date_format_multi(self):
# should fail: wrong dateformat input for all files
override3 = {"config.init.time_format": "%d/%m/%Y %H:%M:%S"}

with pytest.raises(exceptions.ModelError):
build_model(override_dict=override3, scenario="simple_supply")

def test_incorrect_date_format_one_value_only(self):
# should fail: one value wrong in file
override = AttrDict.from_yaml_string(
"data_sources.test_demand_elec.source: data_sources/demand_heat_wrong_dateformat.csv"
)
# check in output error that it points to: 07/01/2005 10:00:00
with pytest.raises(exceptions.ModelError):
build_model(override_dict=override, scenario="simple_conversion")

def test_inconsistent_time_indices_fails(self):
"""
Test that, including after any time subsetting, the indices of all time
Expand Down

0 comments on commit 4e77873

Please sign in to comment.