# Welcome to the Dark Art of Coding:
## Introduction to Python
numpy module, part II

<img src='../../images/dark_art_logo.600px.png' width='300' style="float:right">

In [1]:
# Numpy is the fundamental package for scientific computing in Python.
# It provides:
#   a multidimensional array object
#   a collection of functions that perform operations including:
#       mathematical,
#       logical, 
#       shape manipulation, 
#       sorting, 
#       selection, 
#       Fourier transforms, 
#       linear algebra, 
#       statistical 
#       etc.

In [2]:
# Several important items to note:
#   NumPy arrays are a fixed size (note: Python's lists are not).
#   Elements in a NumPy array are required to be the same type
#   NumPy arrays execute vector mathematics/transforms without the need 
#        for 'for loops' resulting in a performance and efficiency improvement
#   Most scientific/math libraries use NumPy under the hood.
# 

# Loops versus vectorization...
---

In [3]:
import numpy as np
np.random.seed(42)

In [4]:
# Let's process a series of values by doubling each value via a for loop.
# We start by creating a function that takes a series of values,
#     doubles them, and returns the doubled values.

def generate_doubles(values):
    length = len(values)
    result = np.empty(length)
    for x in range(length):
        result[x] = 2 * values[x]
    return result    

In [30]:
# We create the values and then double them. 

values = np.arange(1, 10)

generate_doubles(values)

array([ 2.,  4.,  6.,  8., 10., 12., 14., 16., 18.])

In [31]:
# To compare the processing speed of the doubling function
#     versus using vectorization, we test each statement
#     using %timeit (available in iPython and Jupyter) 

million = np.arange(1_000_000)

%timeit generate_doubles(million)

269 ms ± 2.09 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [32]:
%timeit million * 2

760 µs ± 30.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [33]:
# At the time this code was first generated, the outcome of these two timings
#     resulted in a 400x increase in speed:

#     FOR LOOP:      269 ms ± 3.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
#     VECTORIZATION: 717 µs ± 9.66 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)

# Universal functions (ufuncs)
---

## Arithmetic

|Operator|Equivalent ufunc|Example|
|---|---|---|
|+|np.add()|a + 1|
|-|np.subtract()||
|\*|np.multiply()||
|/|np.divide()||
|\*\*|np.power()||
|%|np.mod()||

In [34]:
values

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [35]:
values + 5

array([ 6,  7,  8,  9, 10, 11, 12, 13, 14])

In [36]:
values * 2

array([ 2,  4,  6,  8, 10, 12, 14, 16, 18])

In [37]:
values ** 3

array([  1,   8,  27,  64, 125, 216, 343, 512, 729])

In [38]:
values % 2

array([1, 0, 1, 0, 1, 0, 1, 0, 1])

## Assorted ufuncs families


|ufunc family|ufuncs||
|---|---|---|
|Trigonometric|np.sin(), .cos(), .tan()||
|Logs and exponents|np.exp(), .exp2(), .power(), .log()||
|Aggregation|np.sum(), .min(), .max(), .median(), .percentile(), .any(), .all()||


In [54]:
np.power(values, 2)

array([ 1,  4,  9, 16, 25, 36, 49, 64, 81])

In [55]:
np.power(values, values)

array([        1,         4,        27,       256,      3125,     46656,
          823543,  16777216, 387420489])

In [56]:
np.log(values)

array([0.        , 0.69314718, 1.09861229, 1.38629436, 1.60943791,
       1.79175947, 1.94591015, 2.07944154, 2.19722458])

## ufuncs for pros

In [68]:
prev_calc = np.empty(9)

np.add(values, 10, out=prev_calc)
print(prev_calc)

[11. 12. 13. 14. 15. 16. 17. 18. 19.]


In [69]:
prev_calc2 = np.zeros(18)

np.multiply(values, 2, out=prev_calc2[::2])
print(prev_calc2)

[ 2.  0.  4.  0.  6.  0.  8.  0. 10.  0. 12.  0. 14.  0. 16.  0. 18.  0.]


In [71]:
# How is this different than:
# 
# placeholder2[::2] = np.multiply(values, 2)

# np.multiply(values, 2) creates a temporary variable in memory.
# those values are then written to the final storage destination within placeholder

In [82]:
np.add.reduce(values)

45

In [77]:
np.multiply.accumulate(values)

array([     1,      2,      6,     24,    120,    720,   5040,  40320,
       362880])

## more ufuncs

In [79]:
np.sum(values)

45

In [84]:
print(np.min(values))
print(np.max(values))

1
9


In [137]:
v = [5, 5, 5, 1, 5]

np.argmin(v)

3

## aggregating

In [90]:
numbers = np.arange(12).reshape(3, 4)

In [91]:
numbers

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [92]:
numbers.sum()

66

In [93]:
numbers.sum(axis=0)

array([12, 15, 18, 21])

In [94]:
numbers.sum(axis=1)

array([ 6, 22, 38])

In [108]:
some_zeros = np.zeros(12)
some_zeros[::2] = np.random.randint(0, 20, 6)
some_zeros

array([17.,  0., 11.,  0.,  1.,  0.,  9.,  0.,  3.,  0., 13.,  0.])

In [120]:
some_zeros = some_zeros.reshape(3, 4)
some_zeros

array([[17.,  0., 11.,  0.],
       [ 1.,  0.,  9.,  0.],
       [ 3.,  0., 13.,  0.]])

In [121]:
print(some_zeros.any())
      

True


In [124]:
some_zeros.any(axis=0)

array([ True, False,  True, False])

In [125]:
some_zeros.any(axis=1)

array([ True,  True,  True])

In [130]:
values

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [131]:
np.percentile(values, 25)

3.0

In [138]:
np.percentile(values, [10, 50, 90])

array([1.8, 5. , 8.2])