# Splitting Time Series

In [None]:
!pip install -r requirements.txt --quiet

In [None]:
import pandas as pd
import os

In [None]:
flights = pd.read_csv('data/flights_decomp.csv',
                      parse_dates=['Date'], 
                      index_col="Date")
flights.index = pd.to_datetime(flights.index)

## Defining functions

In [None]:
%%writefile timeseries/split.py
from sklearn.model_selection import TimeSeriesSplit

def rolling_forecasting_origin(series, n_splits:int=5):
    tscv = TimeSeriesSplit(n_splits=n_splits)
    splits = []
    
    for train_index, test_index in tscv.split(series):
        train, test = series.iloc[train_index], series.iloc[test_index]
        splits.append((train, test))
    
    return splits

def hold_out_split(series, test_size:float=0.2):
    split_idx = int(len(series) * (1 - test_size))
    train, test = series[:split_idx], series[split_idx:]
    return train, test

## Use Hold Out Partitioning on our data

In [None]:
from timeseries.split import hold_out_split
holout_train, holout_test = hold_out_split(flights,test_size=0.2)

In [None]:
len(holout_train), len(holout_test)

In [None]:
max(holout_train.index), min(holout_test.index)

In [None]:
SAVE_DIR = 'data/splits'
os.makedirs(SAVE_DIR, exist_ok=True)
holout_train.to_csv(f'{SAVE_DIR}/holout_train.csv', index=True)
holout_test.to_csv(f'{SAVE_DIR}/holout_test.csv', index=True)

## Use TSCV on our data

In [None]:
from timeseries.split import rolling_forecasting_origin
splits = rolling_forecasting_origin(flights, n_splits=5)

In [None]:
trains, tests = zip(*splits)

### Save our TSCV results as CSV files for downstream modeling

In [None]:
for i, (train, test) in enumerate(splits):
    train.to_csv(f'{SAVE_DIR}/train_split_{i+1}.csv', index=True)
    test.to_csv(f'{SAVE_DIR}/test_split_{i+1}.csv', index=True)