In [12]:
import numpy as np
from sklearn.model_selection import TimeSeriesSplit
from sklearn.datasets import make_regression

In [13]:
X = np.array([[1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12]])
y = np.array([2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24])

In [14]:
#X,y=make_regression(n_samples=15,n_features=1)

In [15]:
X

array([[ 1],
       [ 2],
       [ 3],
       [ 4],
       [ 5],
       [ 6],
       [ 7],
       [ 8],
       [ 9],
       [10],
       [11],
       [12]])

In [16]:
y

array([ 2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 22, 24])

In [17]:
X.shape

(12, 1)

In [18]:
y.shape

(12,)

In [19]:
tss = TimeSeriesSplit(n_splits=5)

In [20]:
for i, (train_index, test_index) in enumerate(tss.split(X)):
    print(f"Fold {i+1}:")
    print(f"  train:{X[train_index]}")
    print(f"  test:{X[test_index]}")

Fold 1:
  train:[[1]
 [2]]
  test:[[3]
 [4]]
Fold 2:
  train:[[1]
 [2]
 [3]
 [4]]
  test:[[5]
 [6]]
Fold 3:
  train:[[1]
 [2]
 [3]
 [4]
 [5]
 [6]]
  test:[[7]
 [8]]
Fold 4:
  train:[[1]
 [2]
 [3]
 [4]
 [5]
 [6]
 [7]
 [8]]
  test:[[ 9]
 [10]]
Fold 5:
  train:[[ 1]
 [ 2]
 [ 3]
 [ 4]
 [ 5]
 [ 6]
 [ 7]
 [ 8]
 [ 9]
 [10]]
  test:[[11]
 [12]]


In [21]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score,cross_validate

In [22]:
rf = RandomForestRegressor()

In [23]:
cv = cross_validate(rf,X.reshape(-1,1),y,cv=tss,scoring='neg_root_mean_squared_error') #['neg_root_mean_squared_error','neg_mean_absolute_percentage_error'])

In [24]:
cv

{'fit_time': array([0.10684276, 0.09179115, 0.0898869 , 0.09899497, 0.09382415]),
 'score_time': array([0.00750422, 0.00717902, 0.00723815, 0.00779486, 0.00748372]),
 'test_score': array([-3.64005494, -3.89070688, -3.91003836, -4.12310563, -4.20076184])}

In [25]:
cv['test_score'].mean()

-3.952933529111403

In [26]:
for train_index, test_index in tss.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    # Fit model
    rf.fit(X_train,y_train)
    # Score test set
    predicted_test=rf.predict(X_test)
    rmse = mean_squared_error(y_test,predicted_test,squared=False)
    print(rmse)

3.640054944640259
3.871382182115323
3.871382182115323
4.181339498294776
4.3368652273272215


## Blocked time series split

In [27]:
import pandas as pd

In [28]:
class BlockingTimeSeriesSplit():
    def __init__(self, n_splits):
        self.n_splits = n_splits
    
    def get_n_splits(self, X, y, groups):
        return self.n_splits
    
    def split(self, X, y=None, groups=None):
        n_samples = len(X)
        k_fold_size = n_samples // self.n_splits
        indices = np.arange(n_samples)

        margin = 0
        for i in range(self.n_splits):
            start = i * k_fold_size
            stop = start + k_fold_size
            mid = int(0.8 * (stop - start)) + start
            yield indices[start: mid], indices[mid + margin: stop]

In [29]:
btscv = BlockingTimeSeriesSplit(n_splits=5)

In [30]:
cv = cross_validate(rf,X.reshape(-1,1),y,cv=btscv,scoring='neg_root_mean_squared_error')

In [31]:
cv

{'fit_time': array([0.10727406, 0.08857012, 0.08911991, 0.09095573, 0.10019517]),
 'score_time': array([0.00738025, 0.00720215, 0.00727606, 0.00727224, 0.00825071]),
 'test_score': array([-2., -2., -2., -2., -2.])}