# Time series date slicing - Pandas vs Numpy

In [1]:
import numpy as np
import pandas as pd
from time import process_time_ns

#### Define functions and prepare data

In [2]:
def compound(r: float, t: np.array, n: int = 1) -> np.array:
    return (1. + r / n) ** t - 1.


def dcf_F(cf: np.ndarray, r: float) -> np.ndarray:
    comp = compound(r, cf[:, 0], 1) + 1.
    return cf[:, 1] / comp

def dcf_C(cf: np.ndarray, r: float) -> np.ndarray:
    comp = compound(r, cf[0, :], 1) + 1.
    return cf[1, :] / comp

In [38]:
dt = np.cumsum(np.abs(np.random.randn(10000000)/1000.))
ts = np.abs(np.random.randn(10000000)*10)

In [39]:
vC = np.vstack((dt, ts))
vF = vC.T
id(vC), id(vF)

(139681195106752, 139681195082096)

#### Comparison

In [49]:
%%timeit -n 1 -r 1
#for i in range(1000):
pv = dcf_C(vC, .02)

1 loop, best of 1: 768 ms per loop


In [50]:
%%timeit -n 1 -r 1
#for i in range(1000):
pv = dcf_F(vF, .02)

1 loop, best of 1: 755 ms per loop


In [42]:
%%timeit -n 1 -r 1
for i in range(1000):
    pv = dcf_C(vF, .02)

1 loop, best of 1: 10.3 ms per loop


In [43]:
%%timeit -n 1 -r 1
for i in range(1000):
    pv = dcf_F(vC, .02)

1 loop, best of 1: 10.4 ms per loop


In [44]:
pv_CC = dcf_C(vC, .02)
pv_FF = dcf_F(vF, .02)
pv_CF = dcf_C(vF, .02)
pv_FC = dcf_F(vC, .02)

In [45]:
np.sum(pv_CC), np.sum(pv_FF), np.sum(pv_CF), np.sum(pv_FC)

(503748.5952144621, 503748.5952144621, 2.600500635336933, 2.600500635336933)

In [46]:
pv_CC.shape, pv_FF.shape, pv_CF.shape, pv_FC.shape

((10000000,), (10000000,), (2,), (2,))

Correctness check