# Part 1: Array-oriented programming

## What is "array-oriented programming"?

In [None]:
import numpy as np

In [None]:
input_data = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])
output_data = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0])

for i in range(len(input_data)):             # explicitly specifies an order of execution
    output_data[i] = input_data[i]**2        # user says what happens to each element

output_data

In [None]:
input_data = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])

output_data = np.fromiter(
    map(lambda x: x**2, input_data), int     # still focused on the individual element "x"
)

output_data

In [None]:
input_data = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])

output_data = input_data**2                  # implicit indexes, no individual elements

output_data

## What is array-oriented programming good for?

In [None]:
from hist import Hist  # histogram library

In [None]:
dataset = np.random.normal(0, 1, 1000000)  # one MILLION data points

In [None]:
Hist.new.Reg(100, -5, 5).Double().fill(dataset)

In [None]:
dataset2 = dataset**2

In [None]:
Hist.new.Reg(100, -1, 10).Double().fill(dataset2)

In [None]:
dataset3 = np.sin(1/dataset2)

In [None]:
Hist.new.Reg(100, -1, 1).Double().fill(dataset3)

## NumPy

In [None]:
np.array([10, 20, 30]) + np.array([1, 2, 3])

In [None]:
def quadratic_formula(a, b, c):
    return (-b + np.sqrt(b**2 - 4*a*c)) / (2*a)

In [None]:
a = 5
b = 10
c = -0.1

quadratic_formula(a, b, c)

In [None]:
a = np.random.uniform(5, 10, 1000000)
b = np.random.uniform(10, 20, 1000000)
c = np.random.uniform(-0.1, 0.1, 1000000)

quadratic_formula(a, b, c)

### A note about performance

In [None]:
def pedantic_quadratic_formula(a, b, c):
    tmp1 = np.negative(b)            # -b
    tmp2 = np.square(b)              # b**2
    tmp3 = np.multiply(4, a)         # 4*a
    tmp4 = np.multiply(tmp3, c)      # tmp3*c
    del tmp3
    tmp5 = np.subtract(tmp2, tmp4)   # tmp2 - tmp4
    del tmp2, tmp4
    tmp6 = np.sqrt(tmp5)             # sqrt(tmp5)
    del tmp5
    tmp7 = np.add(tmp1, tmp6)        # tmp1 + tmp6
    del tmp1, tmp6
    tmp8 = np.multiply(2, a)         # 2*a
    return np.divide(tmp7, tmp8)     # tmp7 / tmp8

In [None]:
pedantic_quadratic_formula(a, b, c)

In [None]:
%%timeit

imperative = np.empty_like(c)
for i, (ai, bi, ci) in enumerate(zip(a, b, c)):
    imperative[i] = quadratic_formula(ai, bi, ci)

In [None]:
%%timeit

quadratic_formula(a, b, c)

In [None]:
%%timeit

pedantic_quadratic_formula(a, b, c)

In [None]:
import numexpr as ne

ne.evaluate("(-b + sqrt(b**2 - 4*a*c)) / (2*a)");

In [None]:
import numba as nb

@nb.vectorize
def numba_quadratic_formula(a, b, c):
    return (-b + np.sqrt(b**2 - 4*a*c)) / (2*a)

numba_quadratic_formula(a, b, c);

In [None]:
%%timeit
quadratic_formula(a, b, c)         # NumPy (loop over data for each operation... mostly)

In [None]:
%%timeit
ne.re_evaluate()                   # NumExpr (fast virtual machine)

In [None]:
%%timeit
numba_quadratic_formula(a, b, c)   # Numba (compiles with LLVM)

### Expressibility in NumPy

**Example:** Compute the size of the spaces between consecutive elements.

In [None]:
array = np.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9])
array

In [None]:
array[1:] - array[:-1]

**Quizlet:** Compute the length of the curve sampled by arrays `x` and `y`.

In [None]:
t = np.linspace(0, 2*np.pi, 10000)
x = np.sin(3*t)
y = np.sin(4*t)

<br>

<center>
<img src="../img/length-by-segment.svg" width="55%">
</center>

<br>

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.plot(x, y);

**Answer:**

In [None]:
# Your code goes here

**Example:** Big-endian to little-endian conversion.

In [None]:
big_endian = np.array([1.1, 2.2, 3.3, 4.4, 5.5], dtype=">f8") # modern machines use little-endian
wrong_endianness = big_endian.view(np.float64)
wrong_endianness

In [None]:
# to see the bytes as integers (0-255)
raw_bytes = wrong_endianness.view(np.uint8)
raw_bytes

In [None]:
grouped = raw_bytes.reshape(len(raw_bytes) // 8, 8)
grouped

In [None]:
reversed_grouped = grouped[:, ::-1]
reversed_grouped

In [None]:
flattened = reversed_grouped.reshape(len(raw_bytes))
flattened

In [None]:
flattened.view(wrong_endianness.dtype)

**Quizlet:** Downsample this curve to make it less noisy.

In [None]:
noisy_data = np.sin(np.linspace(0, 2*np.pi, 1000)) + np.random.normal(0, 0.5, 1000)

<br>

That is, replace every 10 consecutive array elements with their average value, reducing the number of array elements from 1000 to 100, but representing the same curve (ranging from ‒1 to 1).

Note: this is how `axis` works for reducers.

<center>
<img src="../img/example-reducer-2d.svg" width="40%">
</center>

In [None]:
plt.plot(noisy_data);

**Answer:**

In [None]:
# Your code goes here