In [2]:
import numpy as np
from numba import jit, vectorize, int64, cuda
from decimal import Decimal
import copy

In [39]:
@jit
def abc_model_1(a, b, c, rain):
    """First implementation of the ABC-Model.
    Args:
        a, b, c: Model parameter as scalars.
        rain: Array of input rain.
    Returns:
        outflow: Simulated stream flow.
    """
    # Initialize model variables
    outflow = np.zeros((rain.size), dtype=np.float64)
    state_in = 0
    state_out = 0
    
    # Actual simulation loop
    for i in range(rain.size):
        state_out = (1 - c) * state_in + a * rain[i]
        outflow[i] = (1 - a - b) * rain[i] + c * state_in
        state_in = state_out
    return outflow

def abc_model_2(params, rain):
    """Second implementation of the ABC-Model.
    Args:
        params: A dictionary, containing the three model parameters.
        rain: Array of input rain.
    Returns:
        outflow: Simulated stream flow.
    """   
    # Initialize model variables
    outflow = np.zeros((rain.size), dtype=np.float64)
    state_in = 0
    state_out = 0
    
    # Actual simulation loop
    for i in range(rain.size):
        state_out = (1 - params['c']) * state_in + params['a'] * rain[i]
        outflow[i] = ((1 - params['a'] - params['b']) * rain[i]
                      + params['c'] * state_in)
        state_in = state_out
    return outflow

@jit
def abc_model_3(params, rain):
    """Second implementation of the ABC-Model.
    Args:
        params: A dictionary, containing the three model parameters.
        rain: Array of input rain.
    Returns:
        outflow: Simulated stream flow.
    """   
    # Initialize model variables
    outflow = np.zeros((rain.size), dtype=np.float64)
    state_in = 0
    state_out = 0
    
    # Actual simulation loop
    for i in range(rain.size):
        state_out = (1 - params['c']) * state_in + params['a'] * rain[i]
        outflow[i] = ((1 - params['a'] - params['b']) * rain[i]
                      + params['c'] * state_in)
        state_in = state_out
    return outflow

In [40]:
rain = np.random.rand((10**6))
time_model_1 = %timeit -o abc_model_1(0.6, 0.1, 0.3, rain)
time_model_2 = %timeit -o abc_model_2({'a': 0.6, 'b': 0.1, 'c': 0.3}, rain)
time_model_3 = %timeit -o abc_model_3({'a': 0.6, 'b': 0.1, 'c': 0.3}, rain)

4.01 ms ± 65 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
725 ms ± 10.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
682 ms ± 4.74 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# Bubble sort example. Jit

In [33]:
X2 = np.random.randn(10000)

In [34]:
def bubblesort(X):
    N = len(X)
    for end in range(N, 1, -1):
        for i in range(end - 1):
            cur = X[i]
            if cur > X[i + 1]:
                tmp = X[i]
                X[i] = X[i + 1]
                X[i + 1] = tmp

X = copy.deepcopy(X2)
%timeit bubblesort(X)

10.7 s ± 166 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [36]:
@jit
def bubblesort(X):
    N = len(X)
    val = Decimal(100)
    for end in range(N, 1, -1):
        for i in range(end - 1):
            cur = X[i]
            if cur > X[i + 1]:
                tmp = X[i]
                X[i] = X[i + 1]
                X[i + 1] = tmp

X = copy.deepcopy(X2)
%timeit bubblesort(X)

44.6 ms ± 1.13 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [71]:
@jit(nopython=True)
def bubblesort(X):
    N = len(X)
    for end in range(N, 1, -1):
        for i in range(end - 1):
            cur = X[i]
            if cur > X[i + 1]:
                tmp = X[i]
                X[i] = X[i + 1]
                X[i + 1] = tmp

X = copy.deepcopy(X2)
%timeit bubblesort(X)

28.9 ms ± 173 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [59]:
X

array([-3.39814877, -3.36136802, -3.29646275, ...,  3.64318678,
        3.6510605 ,  4.04434868])

In [107]:
@jit(nopython=True)
def bubblesort(X):
    N = len(X)
    for end in range(N, 1, -1):
        for i in range(end - 1):
            cur = X[i]
            if cur > X[i + 1]:
                tmp = X[i]
                X[i] = X[i + 1]
                X[i + 1] = tmp

@jit(nopython=True)
def do_sort(sortedd):
    bubblesort(sortedd)
    
X = copy.deepcopy(X2)
%timeit do_sort(X)

29.2 ms ± 865 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


# Vectorize

In [100]:
@vectorize([int64(int64, int64)])
def vec_add(x, y):
    return x + y

def vec_add2(X, Y, Z):
    size = len(X)
    for i in range(size):
        Z[i] = X[i] + Y[i]
    return Z
        

def vec_add3(X, Y):
    Z = []
    size = len(X)
    for i in range(size):
        Z.append(X[i] + Y[i])
    return Z

@jit
def vec_add4(X, Y, Z):
    size = len(X)
    for i in range(size):
        Z[i] = X[i] + Y[i]
    return Z
        
@jit
def vec_add5(X, Y):
    Z = []
    size = len(X)
    for i in range(size):
        Z.append(X[i] + Y[i])
    return Z
        
    
a = np.arange(100000, dtype=np.int64)

%timeit vec_add(a, a)
%timeit vec_add2(a, a, a)
%timeit vec_add3(a, a)
%timeit vec_add4(a, a, a)
%timeit vec_add5(a, a)

35.8 µs ± 205 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


  


41.8 ms ± 20.9 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
25.7 ms ± 77.1 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
59.4 µs ± 443 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
2.95 ms ± 18.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


# CUDA

In [109]:
print(cuda.gpus)

CudaSupportError: Error at driver init: 

CUDA driver library cannot be found.
If you are sure that a CUDA driver is installed,
try setting environment variable NUMBAPRO_CUDA_DRIVER
with the file path of the CUDA driver shared library.
:

In [5]:
@cuda.jit
def my_kernal(io_array):
    # thread id in a 1D block
    tx = cuda.threadIdx.x
    # block id in a 1D grid
    ty = cuda.blockIdx.x
    # block width, i.e. number of threads per block
    bw = cuda.blockDim.x
    # compute flattened index inside the array
    pos = tx + ty*bw
    print(tx, ty, bw, pos)
    if pos < io_array.size: # check array boundaries
        io_array[pos] *= 2  # do the computation

In [6]:
X = np.random.randn(1000)

In [7]:
my_kernal(X)

CudaSupportError: Error at driver init: 

CUDA driver library cannot be found.
If you are sure that a CUDA driver is installed,
try setting environment variable NUMBAPRO_CUDA_DRIVER
with the file path of the CUDA driver shared library.
:

In [6]:
X = np.random.randn(1000)
np.zeros_like(X)

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0