# Time series date slicing - Pandas vs Numpy

In [None]:
import numpy as np
import pandas as pd
from time import process_time_ns

#### Prepare data

In [None]:
dates = pd.date_range('1/1/2000', periods=2000)
df = pd.Series(np.random.randn(2000), index=dates)
start, end = '2004-01-01', '2005-02-20'
df.iloc[[0, -1]]

#### Process time (time library)

Slice using the Pandas method .loc[]

In [None]:
a1 = process_time_ns()
for i in range(1000):
    x = df.loc[start:end]
a2 = process_time_ns()

Slice using numpy searchsorted. The time required for the array preparation from the Pandas series is not included

In [None]:
v, dt = np.ravel(df.values), df.index.values
a3 = process_time_ns()
for i in range(1000):
    search = []
    if start:
        search.append(np.datetime64(start))
    if end:
        search.append(np.datetime64(end))
    idx = np.searchsorted(dt, search)
    if start and end:
        _v = v[idx[0]:idx[1]]
        _dt = dt[idx[0]:idx[1]]
    elif start:
        _v = v[idx[0]:]
        _dt = dt[idx[0]:]
    elif end:
        _v = v[:idx[0]]
        _dt = dt[:idx[0]]
a4 = process_time_ns()

Slice using numpy searchsorted. The time required for the array preparation from the Pandas series is included

In [None]:
a5 = process_time_ns()
for i in range(1000):
    v, dt = np.ravel(df.values), df.index.values
    search = []
    if start:
        search.append(np.datetime64(start))
    if end:
        search.append(np.datetime64(end))
    idx = np.searchsorted(dt, search)
    if start and end:
        _v = v[idx[0]:idx[1]]
        _dt = dt[idx[0]:idx[1]]
    elif start:
        _v = v[idx[0]:]
        _dt = dt[idx[0]:]
    elif end:
        _v = v[:idx[0]]
        _dt = dt[:idx[0]]
a6 = process_time_ns()

Print results in milliseconds

In [None]:
print('(in milliseconds)\npandas\t{:.2f}\nnumpy\t{:.2f}\npd_2_np\t{:.2f}'
      .format((a2-a1)/1e6,(a4-a3)/1e6,(a6-a5)/1e6))

#### Timeit magic call

In [None]:
%%timeit
for i in range(1000):
    x = df.loc[start:end]

In [None]:
%%timeit
for i in range(1000):
    search = []
    if start:
        search.append(np.datetime64(start))
    if end:
        search.append(np.datetime64(end))
    idx = np.searchsorted(dt, search)
    if start and end:
        _v = v[idx[0]:idx[1]]
        _dt = dt[idx[0]:idx[1]]
    elif start:
        _v = v[idx[0]:]
        _dt = dt[idx[0]:]
    elif end:
        _v = v[:idx[0]]
        _dt = dt[:idx[0]]

In [None]:
%%timeit
for i in range(1000):
    v, dt = np.ravel(df.values), df.index.values
    search = []
    if start:
        search.append(np.datetime64(start))
    if end:
        search.append(np.datetime64(end))
    idx = np.searchsorted(dt, search)
    if start and end:
        _v = v[idx[0]:idx[1]]
        _dt = dt[idx[0]:idx[1]]
    elif start:
        _v = v[idx[0]:]
        _dt = dt[idx[0]:]
    elif end:
        _v = v[:idx[0]]
        _dt = dt[:idx[0]]