In [1]:
import sys
from pathlib import Path

currentWorkingDirectory = Path('')
projectRootDirectory = currentWorkingDirectory.parent
sys.path.append(projectRootDirectory.as_posix())

In [2]:
import pandas as pd
import numpy as np

from IPython.display import Markdown, display

In [3]:
def printmd(string):
    display(Markdown(string))

In [20]:
# np.random.seed(3)
N = 300
arr = np.random.standard_normal((N,N,N))

In [21]:
printmd("Run in **C-order** - expect mean over **last column** to be fastest")
%timeit -n 1 arr.mean(axis=0)
%timeit -n 1 arr.mean(axis=1)
%timeit -n 1 arr.mean(axis=2)

Run in **C-order** - expect mean over **last column** to be fastest

21.7 ms ± 3.54 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
16.8 ms ± 1.21 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
16 ms ± 1.46 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [22]:
printmd("Run in **Fortran-order** - expect mean over **first column** to be fastest")
arr = np.asfortranarray(arr)
%timeit -n 1 arr.mean(axis=0)
%timeit -n 1 arr.mean(axis=1)
%timeit -n 1 arr.mean(axis=2)

Run in **Fortran-order** - expect mean over **first column** to be fastest

16.4 ms ± 3.08 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
15.3 ms ± 763 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
19.6 ms ± 677 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [3]:
dataPath = Path("../data/raw/")
interimDataPath = Path("../data/interim/")

def loadData(duplicateN:int=1):
    df = pd.read_csv(dataPath / "atlanticInterpolated.csv")
    z = df.z.values
    dropColumns = []
    for col in df.columns:
        if df.loc[30, col] > df.loc[0, col] - 0.1:
            dropColumns.append(col)
    df = df.drop(columns=dropColumns)
    df = pd.concat([df for _ in range(duplicateN)], axis=1)
    temps = df.iloc[:, 1:].values
    surfaceTemps = temps[:2, :].mean(axis=0)
    return df, temps, z, surfaceTemps