alteryx · freddyaboulton · Jan 20, 2022 · Jan 19, 2022 · Jan 19, 2022 · Jan 19, 2022
diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -14,6 +14,7 @@
         * Removed potential prediction explanations failure when pipelines predicted a class with probability 1 :pr:`3221`
         * Dropped NaNs before partial dependence grid generation :pr:`3235`
         * Fixed bug where ``InvalidTargetDataCheck`` would not check time series regression targets :pr:`3251`
+        * Fixed bug in ``are_datasets_separated_by_gap_time_index`` :pr:`3256`
     * Changes
         * Raised lowest compatible numpy version to 1.21.0 to address security concerns :pr:`3207`
         * Changed the default objective to ``MedianAE`` from ``R2`` for time series regression :pr:`3205`

diff --git a/evalml/tests/utils_tests/test_gen_utils.py b/evalml/tests/utils_tests/test_gen_utils.py
@@ -837,3 +837,18 @@ def test_time_series_pipeline_validates_holdout_data(
         assert (
             result.error_codes[0] == ValidationErrorCode.INVALID_HOLDOUT_GAP_SEPARATION
         )
+
+
+def test_year_start_separated_by_gap():
+    X = pd.DataFrame(
+        {
+            "time_index": pd.Series(
+                pd.date_range("1960-01-01", freq="AS-JAN", periods=35)
+            )
+        }
+    )
+    train = X.iloc[:30]
+    test = X.iloc[32:36]
+    assert are_datasets_separated_by_gap_time_index(
+        train, test, {"time_index": "time_index", "gap": 2}
+    )
diff --git a/evalml/utils/gen_utils.py b/evalml/utils/gen_utils.py
@@ -8,6 +8,7 @@
 
 import numpy as np
 import pandas as pd
+from pandas.tseries.frequencies import to_offset
 from sklearn.utils import check_random_state
 
 from evalml.exceptions import MissingComponentError, ValidationErrorCode
@@ -611,13 +612,7 @@ def are_datasets_separated_by_gap_time_index(train, test, pipeline_params):
 
     first_testing_date = test_copy[test_copy.ww.time_index].iloc[0]
     last_training_date = train_copy[train_copy.ww.time_index].iloc[-1]
-    dt_difference = first_testing_date - last_training_date
-
-    try:
-        units_difference = dt_difference / freq
-    except ValueError:
-        units_difference = dt_difference / ("1" + freq)
-    return units_difference == gap_difference
+    return (to_offset(freq) * gap_difference) + last_training_date == first_testing_date
 
 
 _holdout_validation_result = namedtuple(