# Last valid index - Pandas vs Numpy

In [80]:
import numpy as np
import pandas as pd
from time import process_time_ns

#### Prepare data

In [81]:
dates = pd.date_range('1/1/2000', periods=2000)
vec = np.random.randn(2000)

### Comparison between numpy solutions

#### Last 10 nans

In [82]:
vec10 = vec.copy()
vec10[-10:] = np.nan

Create a boolean mask with isnan. Slice a range array using the mask.

In [83]:
t1 = process_time_ns()
for i in range(1000):
    mask = ~np.isnan(vec10)
    idv = np.array(range(len(vec10)))
    idv = idv[mask][-1]
t2 = process_time_ns()
print('{:.2f} ms'.format((t2-t1)/1e6))

290.31 ms


Loop from the back of the array, stop when a non-nan is found.

In [84]:
t1 = process_time_ns()
for i in range(1000):
    i = -1
    while np.isnan(vec10[i]):
        i -= 1
    idv = len(vec10)+i
t2 = process_time_ns()
print('{:.2f} ms'.format((t2-t1)/1e6))

30.91 ms


#### Last 100 nans

In [85]:
vec100 = vec.copy()
vec100[-100:] = np.nan

Create a boolean mask with isnan. Slice a range array using the mask.

In [86]:
t1 = process_time_ns()
for i in range(1000):
    mask = ~np.isnan(vec100)
    idv = np.array(range(len(vec100)))
    idv = idv[mask][-1]
t2 = process_time_ns()
print('{:.2f} ms'.format((t2-t1)/1e6))

322.15 ms


Loop from the back of the array, stop when a non-nan is found.

In [87]:
t1 = process_time_ns()
for i in range(1000):
    i = -1
    while np.isnan(vec100[i]):
        i -= 1
    idv = len(vec100)+i
t2 = process_time_ns()
print('{:.2f} ms'.format((t2-t1)/1e6))

232.72 ms


### Comparison between pandas and numpy 

### Last 10 values are nans 

In [88]:
vec[-10:] = np.nan
df = pd.Series(vec, index=dates)

In [89]:
%%timeit
for i in range(1000):
    x = df.last_valid_index()

10 loops, best of 5: 167 ms per loop


In [90]:
%%timeit
for i in range(1000):
    i = -1
    while np.isnan(vec[i]):
        i -= 1
    idv = len(vec)+i

10 loops, best of 5: 23.3 ms per loop


### Last 100 values are nans 

In [91]:
vec[-100:] = np.nan
df = pd.Series(vec, index=dates)

In [92]:
%%timeit
for i in range(1000):
    x = df.last_valid_index()

10 loops, best of 5: 168 ms per loop


In [93]:
%%timeit
for i in range(1000):
    i = -1
    while np.isnan(vec[i]):
        i -= 1
    idv = len(vec)+i

1 loop, best of 5: 209 ms per loop
