In [1]:
from pathlib import Path

import pandas as pd
import numpy as np
import pythran

import altair as alt

from IPython.display import Markdown, display

%load_ext pythran.magic


In [2]:
def printmd(string):
    display(Markdown(string))

In [3]:
dataPath = Path("../data/raw/")
interimDataPath = Path("../data/interim/")

def loadData(duplicateN:int=1):
    df = pd.read_csv(dataPath / "atlanticInterpolated.csv")
    z = df.z.values
    dropColumns = []
    for col in df.columns:
        if df.loc[30, col] > df.loc[0, col] - 0.1:
            dropColumns.append(col)
    df = df.drop(columns=dropColumns)
    df = pd.concat([df for _ in range(duplicateN)], axis=1)
    temps = df.iloc[:, 1:].values
    tempsC = np.ascontiguousarray(temps)
    surfaceTemps = temps[:2, :].mean(axis=0)
    surfaceTempsC = tempsC[:2, :].mean(axis=0)
    return df, temps, z, tempsC, surfaceTemps, surfaceTempsC

In [4]:
print("Load data")
df, temps, z, tempsC, surfaceTemps, surfaceTempsC = loadData(duplicateN=100)
print("Data loaded")

Load data
Data loaded


In [5]:
thresholdTemperatureDifference = 0.1
surfaceTemps = temps[:2,:].mean(axis=0)
surfaceTemps.shape

(248299,)

In [6]:
%timeit -n 1 -r 5 (temps < (surfaceTemps - thresholdTemperatureDifference)).nonzero()

1.05 s ± 23.1 ms per loop (mean ± std. dev. of 5 runs, 1 loop each)


In [7]:
%%pythran
#pythran export pythranFunc(float64[:,:] order(C), float64[] order(C), float)
def pythranFunc(temps,surfaceTemps,thresholdTemperatureDifference):
    return (temps < (surfaceTemps - thresholdTemperatureDifference)).nonzero()

In [8]:
%timeit -n 1 -r 5 pythranFunc(tempsC,surfaceTempsC,thresholdTemperatureDifference)

419 ms ± 6.71 ms per loop (mean ± std. dev. of 5 runs, 1 loop each)
