# Time series date slicing - Pandas vs Numpy

In [None]:
import numpy as np
import pandas as pd
from time import process_time_ns

#### Define functions and prepare data

In [None]:
def ts_yield_pd(ts, base, date):
    ts = ts.loc[:date]
    base = base.loc[:date]

    idx_div = ts.last_valid_index()
    idx_p = base.last_valid_index()
    return ts.at[idx_div] / base.at[idx_p]

def trim_ts(v, dt, start=None, end=None):
    search = []
    if start:
        search.append(np.datetime64(start))
    if end:
        search.append(np.datetime64(end))
    idx = np.searchsorted(dt, search)
    if start and end:
        v = v[idx[0]:idx[1]]
        dt = dt[idx[0]:idx[1]]
    elif start:
        v = v[idx[0]:]
        dt = dt[idx[0]:]
    elif end:
        v = v[:idx[0]]
        dt = dt[:idx[0]]
    return v, dt

def last_valid_index(v):
    i = -1
    while np.isnan(v[i]):
        i -= 1
    return len(v) + i

def ts_yield_np(ts, dt, base, dt_base, date):
    ts, dt = trim_ts(ts, dt, end=date)
    base, dt_base = trim_ts(base, dt_base, end=date)

    idx_ts = last_valid_index(ts)
    idx_base = last_valid_index(base)
    return ts[idx_ts] / base[idx_base]

In [None]:
dates = pd.date_range('1/1/2000', periods=2000)
ts = pd.Series(np.random.randn(2000), index=dates)
base = pd.Series(np.random.randn(2000), index=dates)
p_np, dt_p_np = ts.values, ts.index.values
b_np, dt_b_np = base.values, base.index.values

#### Comparison

In [None]:
%%timeit
for i in range(1000):
    y_pd = ts_yield_pd(ts, base, '2018-01-01')

In [None]:
%%timeit
for i in range(1000):
    y_np = ts_yield_np(p_np, dt_p_np, b_np, dt_b_np, np.datetime64('2018-01-01'))

Correctness check

In [None]:
y_pd = ts_yield_pd(ts, base, '2018-01-01')
y_np = ts_yield_np(p_np, dt_p_np, b_np, dt_b_np, np.datetime64('2018-01-01'))
y_pd, y_np