In [1]:
import sys
from pathlib import Path

currentWorkingDirectory = Path('')
projectRootDirectory = currentWorkingDirectory.parent
sys.path.append(projectRootDirectory.as_posix())

## Conditional loops through Numpy arrays with NumExpr and Numba
We're going to look at a case where we are looking at a 2D numpy array and we're looking for the first value in each column that meets a condition.

The example we're going to use

In [4]:
from pathlib import Path

import pandas as pd
import numba
import numexpr as ne
import numpy as np

import altair as alt

from IPython.display import Markdown, display
%load_ext line_profiler

In [5]:
def printmd(string):
    display(Markdown(string))

In [8]:
dataPath = Path("../data/raw/")
interimDataPath = Path("../data/interim/")

def loadData(duplicateN:int=1):
    df = pd.read_csv(dataPath / "atlanticInterpolated.csv")
    z = df.z.values
    dropColumns = []
    for col in df.columns:
        if df.loc[30, col] > df.loc[0, col] - 0.1:
            dropColumns.append(col)
    df = df.drop(columns=dropColumns)
    df = pd.concat([df for _ in range(duplicateN)], axis=1)
    temps = df.iloc[:, 1:].values
    tempsC = np.ascontiguousarray(temps)
    surfaceTemps = temps[:2, :].mean(axis=0)
    surfaceTempsC = tempsC[:2, :].mean(axis=0)
    return df, temps, z, tempsC, surfaceTemps, surfaceTempsC

In [31]:
print("Load data")
df, temps, z, tempsC, surfaceTemps, surfaceTempsC = loadData(duplicateN=100)
print("Data loaded")

Load data
Data loaded


In [32]:
thresholdTemperatureDifference = 0.1

In [33]:
surfaceTemps = temps[:2,:].mean(axis=0)
surfaceTemps.shape

(248299,)

In [11]:
# %timeit -n 1 -r 1 (temps < (surfaceTemps - thresholdTemperatureDifference)).nonzero()

1.82 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [13]:
def numpyNonZeroColumnWise(
    temps: np.ndarray,
    surfaceTemps: np.ndarray,
    thresholdTemperatureDifference: float,
    z: np.ndarray,
):
    condition = temps < (surfaceTemps - thresholdTemperatureDifference)
    mldIndex = np.array(
        [condition[:, colIdx].nonzero()[0][0] for colIdx in range(temps.shape[1])]
    )
    mldDepth = np.array([z[idx] for idx in mldIndex])[:, np.newaxis]
    mldDepth = z[mldIndex][:, np.newaxis]
    mldTemp = np.array([temps[idx, colIdx] for colIdx, idx in enumerate(mldIndex)])[
        :, np.newaxis
    ]
    return mldDepth, mldTemp

mldnumpyNonZeroColumnWise, numpymldTempNonZeroColumnWise = numpyNonZeroColumnWise(
    temps=temps, surfaceTemps=surfaceTemps, thresholdTemperatureDifference=0.1, z=z
)

In [15]:
def createVisualisationDataframes(df:pd.DataFrame,mlDepth:np.ndarray,mldTemp:np.ndarray):
    meltDf = pd.melt(
        pd.concat([
            df.iloc[:,[0]],
            df.iloc[:,1:].sample(10,axis=1).drop(columns=['z'],errors='ignore')],axis=1),id_vars=['z'])
    mldDf = pd.DataFrame(np.hstack([mlDepth,mldTemp]),index=df.columns[1:],columns=["mld","mldTemp"]).reset_index()
    return meltDf,mldDf
meltDf,mldDf = createVisualisationDataframes(df=df,mlDepth=mldnumpyNonZeroColumnWise,mldTemp=numpymldTempNonZeroColumnWise)

In [17]:
sampleQueries = meltDf.variable.sample(5)
tempLines = alt.Chart(meltDf.loc[meltDf.variable.isin(sampleQueries)],
                      title="Sample temperature profiles (lines) with predicted mixed layer depth (circles)",
                     height=400).mark_line().encode(
    x=alt.X('value:Q',title="Temperature"),
    y=alt.Y('z:Q',title="Depth (m)"),
    color=alt.Color('variable:N',title="Profile number and date"))
mldSpots = alt.Chart(mldDf.loc[mldDf.loc[:,'index'].isin(sampleQueries)]).mark_circle(size=100).encode(
    x="mldTemp:Q",
    y="mld:Q",
    color="index:N")
alt.layer(*(tempLines,mldSpots))

In [27]:
%timeit -n 1 -r 5 (temps<surfaceTemps-thresholdTemperatureDifference)

7.53 ms ± 1.06 ms per loop (mean ± std. dev. of 5 runs, 1 loop each)


In [28]:
%timeit -n 1 -r 5 ne.evaluate("(temps<surfaceTemps-thresholdTemperatureDifference)")

4.23 ms ± 308 µs per loop (mean ± std. dev. of 5 runs, 1 loop each)


In [20]:
def numExprNonZeroColumnWise(
    temps: np.ndarray,
    surfaceTemps: np.ndarray,
    thresholdTemperatureDifference: float,
    z: np.ndarray,
):
    condition = ne.evaluate("temps<surfaceTemps-thresholdTemperatureDifference")
    mldIndex = np.array(
        [condition[:, colIdx].nonzero()[0][0] for colIdx in range(temps.shape[1])]
    )
    mldDepth = np.array([z[idx] for idx in mldIndex])[:, np.newaxis]
    mldDepth = z[mldIndex][:, np.newaxis]
    mldTemp = np.array([temps[idx, colIdx] for colIdx, idx in enumerate(mldIndex)])[
        :, np.newaxis
    ]
    return mldDepth, mldTemp


mldnumExprNonZeroColumnWise, numExprmldTempNonZeroColumnWise = numExprNonZeroColumnWise(
    temps=temps, surfaceTemps=surfaceTemps, thresholdTemperatureDifference=0.1, z=z
)
mldnumExprNonZeroColumnWise
np.testing.assert_array_almost_equal(
    mldnumpyNonZeroColumnWise, mldnumExprNonZeroColumnWise
)
np.testing.assert_array_almost_equal(
    numpymldTempNonZeroColumnWise, numExprmldTempNonZeroColumnWise
)


In [21]:
@numba.njit()
def numbaConditionLoop(
    temps: np.ndarray,
    surfaceTemps: np.ndarray,
    thresholdTemperatureDifference: float,
    z: np.ndarray,
):
    mlDepth = np.empty_like(surfaceTemps)
    mldTemp = np.empty_like(surfaceTemps)
    for col in range(temps.shape[1]):
        row = 0
        temperature = temps[row, col]
        surfaceTemp = surfaceTemps[col]
        threshold = surfaceTemp - thresholdTemperatureDifference
        while (temperature > threshold) and row < temps.shape[0]:
            row += 1
            temperature = temps[row, col]
        mlDepth[col] = z[int(row)]
        mldTemp[col] = temps[int(row), col]
    return mlDepth, mldTemp


@numba.njit(parallel=True)
def numbaConditionLoopParallel(
    temps: np.ndarray,
    surfaceTemps: np.ndarray,
    thresholdTemperatureDifference: float,
    z: np.ndarray,
):
    mlDepth = np.empty((surfaceTemps.shape[0],1))
    mldTemp = np.empty((surfaceTemps.shape[0],1))
    for col in numba.prange(temps.shape[1]):
        row = 0
        temperature = temps[row, col]
        surfaceTemp = surfaceTemps[col]
        threshold = surfaceTemp - thresholdTemperatureDifference
        while (temperature > threshold) and row < temps.shape[0]:
            row += 1
            temperature = temps[row, col]
        mlDepth[col] = z[int(row)]
        mldTemp[col] = temps[int(row), col]
    return mlDepth, mldTemp

mldnumbaConditionLoop, mldTempnumbaConditionLoop = numbaConditionLoop(
    temps=temps, surfaceTemps=surfaceTemps, thresholdTemperatureDifference=0.1, z=z
)
(
    mldnumbaConditionLoopParallel,
    mldTempnumbaConditionLoopParallel,
) = numbaConditionLoopParallel(
    temps=temps, surfaceTemps=surfaceTemps, thresholdTemperatureDifference=0.1, z=z
)
np.testing.assert_array_almost_equal(
    mldnumpyNonZeroColumnWise, mldnumExprNonZeroColumnWise
)
np.testing.assert_array_almost_equal(
    numpymldTempNonZeroColumnWise, numExprmldTempNonZeroColumnWise
)


OMP: Info #273: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


In [35]:
temps32 = temps.astype(np.float32)

In [36]:
printmd("**Numpy**")
%timeit -n 1 -r 3 numpyNonZeroColumnWise(temps=temps,surfaceTemps=surfaceTemps,thresholdTemperatureDifference=0.1,z=z)
printmd("**Numpy 32-bit**")
%timeit -n 1 -r 3 numpyNonZeroColumnWise(temps=temps32,surfaceTemps=surfaceTemps,thresholdTemperatureDifference=0.1,z=z)
printmd("**numExpr**")
%timeit -n 1 -r 3 numExprNonZeroColumnWise(temps=temps,surfaceTemps=surfaceTemps,thresholdTemperatureDifference=0.1,z=z)
printmd("**numExpr 32-bit**")
%timeit -n 1 -r 3 numExprNonZeroColumnWise(temps=temps32,surfaceTemps=surfaceTemps,thresholdTemperatureDifference=0.1,z=z)
printmd("**Numba loop (serial)**")
%timeit -n 1 -r 3 numbaConditionLoop(temps=temps,surfaceTemps=surfaceTemps,thresholdTemperatureDifference=0.1,z=z)
printmd("**Numba loop (parallel)**")
%timeit -n 1 -r 3 numbaConditionLoopParallel(temps=temps,surfaceTemps=surfaceTemps,thresholdTemperatureDifference=0.1,z=z)

**Numpy**

409 ms ± 13.9 ms per loop (mean ± std. dev. of 3 runs, 1 loop each)


**Numpy 32-bit**

417 ms ± 10.8 ms per loop (mean ± std. dev. of 3 runs, 1 loop each)


**numExpr**

378 ms ± 5.61 ms per loop (mean ± std. dev. of 3 runs, 1 loop each)


**numExpr 32-bit**

395 ms ± 10.8 ms per loop (mean ± std. dev. of 3 runs, 1 loop each)


**Numba loop (serial)**

19.2 ms ± 1.01 ms per loop (mean ± std. dev. of 3 runs, 1 loop each)


**Numba loop (parallel)**

8.89 ms ± 2.07 ms per loop (mean ± std. dev. of 3 runs, 1 loop each)


In [42]:
%lprun -f numExprNonZeroColumnWise numExprNonZeroColumnWise(temps=temps,surfaceTemps=surfaceTemps,thresholdTemperatureDifference=0.1,z=z)

Timer unit: 1e-06 s

Total time: 0.977527 s
File: /tmp/ipykernel_3080/212628625.py
Function: numExprNonZeroColumnWise at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
     1                                           def numExprNonZeroColumnWise(
     2                                               temps: np.ndarray,
     3                                               surfaceTemps: np.ndarray,
     4                                               thresholdTemperatureDifference: float,
     5                                               z: np.ndarray,
     6                                           ):
     7         1      83519.0  83519.0      8.5      condition = ne.evaluate("temps<surfaceTemps-thresholdTemperatureDifference")
     8         2      12966.0   6483.0      1.3      mldIndex = np.array(
     9         1     562702.0 562702.0     57.6          [condition[:, colIdx].nonzero()[0][0] for colIdx in range(temps.shape[1])]
    10                        

In [43]:
sampleQueries = meltDf.variable.sample(5)
tempLines = alt.Chart(meltDf.loc[meltDf.variable.isin(sampleQueries)]).mark_line().encode(
    x='value:Q',
    y='z:Q',
    color='variable:N')
mldSpots = alt.Chart(mldDf.loc[mldDf.loc[:,'index'].isin(sampleQueries)]).mark_circle(size=100).encode(
    x="mldTemp:Q",
    y="mld:Q",
    color="index:N")
alt.layer(*(tempLines,mldSpots))