# NumPy

In [1]:
import numpy as np
np.version.full_version

'1.12.1'

In [2]:
#Creating a macro for the new line charector
NL='\n'

### Creating Arrays

In [3]:
a = np.array([3.14, 1, 2, 3])
print(a)
print(a.dtype)

[ 3.14  1.    2.    3.  ]
float64


In [4]:
b = np.array([1, 2, 3, 4], dtype=np.int16)
print(b)
print(b.dtype)

[1 2 3 4]
int16


In [5]:
# nested list result in multi-dimensional arrays
c = np.array([range(i, i+5) for i in [2, 4, 6, 8]])
print(c)

[[ 2  3  4  5  6]
 [ 4  5  6  7  8]
 [ 6  7  8  9 10]
 [ 8  9 10 11 12]]


In [6]:
d = np.zeros(10, dtype=int)
e = np.ones((2,5),dtype=float)
f = np.full((3,3), 3.14)
g = np.arange(0, 20, 2)
h = np.linspace(0, 1, 5)
print("d:",d,NL)
print("e:",e,NL)
print("f:",f,NL)
print("g:",g,NL)
print("h:",h)

d: [0 0 0 0 0 0 0 0 0 0] 

e: [[ 1.  1.  1.  1.  1.]
 [ 1.  1.  1.  1.  1.]] 

f: [[ 3.14  3.14  3.14]
 [ 3.14  3.14  3.14]
 [ 3.14  3.14  3.14]] 

g: [ 0  2  4  6  8 10 12 14 16 18] 

h: [ 0.    0.25  0.5   0.75  1.  ]


In [7]:
import numpy.random as rnd
rnd.seed(0) # seed for reproducibility
a = rnd.random((3,3))
b = rnd.normal(0,1,(3,3)) # mean 0, standard deviation 1
c = rnd.randint(0,10,(3,3))
print("a:",a,NL)
print("b:",b,NL)
print("c:",c)

a: [[ 0.5488135   0.71518937  0.60276338]
 [ 0.54488318  0.4236548   0.64589411]
 [ 0.43758721  0.891773    0.96366276]] 

b: [[ 1.26611853 -0.50587654  2.54520078]
 [ 1.08081191  0.48431215  0.57914048]
 [-0.18158257  1.41020463 -0.37447169]] 

c: [[0 1 9]
 [9 0 4]
 [7 3 2]]


In [8]:
a = np.eye(5)
print(a)
b = np.empty(5)#The values will be whatever happens to already exist at that memory location
print(b)

[[ 1.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.]
 [ 0.  0.  1.  0.  0.]
 [ 0.  0.  0.  1.  0.]
 [ 0.  0.  0.  0.  1.]]
[ 1.  1.  1.  1.  1.]


### Array attributes

In [9]:
a = rnd.randint(10,size=(3,4,5)) # 3D Array
print(a, NL)
print("ndim:", a.ndim)
print("shape:", a.shape)
print("size:", a.size)
print("dtype:",a.dtype)
print("itemsize:", a.itemsize, "bytes")
print("nbytes:", a.nbytes, "bytes")

[[[7 2 0 0 4]
  [5 5 6 8 4]
  [1 4 9 8 1]
  [1 7 9 9 3]]

 [[6 7 2 0 3]
  [5 9 4 4 6]
  [4 4 3 4 4]
  [8 4 3 7 5]]

 [[5 0 1 5 9]
  [3 0 5 0 1]
  [2 4 2 0 3]
  [2 0 7 5 9]]] 

ndim: 3
shape: (3, 4, 5)
size: 60
dtype: int64
itemsize: 8 bytes
nbytes: 480 bytes


### Array indexing

In [10]:
a = np.arange(10)
print(a)
print([a[0], a[5], a[9], a[-1], a[-4]])

[0 1 2 3 4 5 6 7 8 9]
[0, 5, 9, 9, 6]


In [11]:
a[5] = 10
print(a)

[ 0  1  2  3  4 10  6  7  8  9]


In [12]:
b = rnd.randint(10, size=(2,5))
print(b)
print(b[0,0], b[1,-1])
b[0,3]=11
print(b)

[[0 2 7 2 9]
 [2 3 3 2 3]]
0 3
[[ 0  2  7 11  9]
 [ 2  3  3  2  3]]


In [13]:
b[0,3] = 3.5
print(b)

[[0 2 7 3 9]
 [2 3 3 2 3]]


In [14]:
c = np.arange(10)
print(c[:5])
print(c[5:])
print(c[4:7])
print(c[::2])
print(c[1::2])
print(c[::-1])
print(c[5::-2])

[0 1 2 3 4]
[5 6 7 8 9]
[4 5 6]
[0 2 4 6 8]
[1 3 5 7 9]
[9 8 7 6 5 4 3 2 1 0]
[5 3 1]


In [15]:
d = np.array([range(10*i+1, 10*i+6) for i in range(1,6)])
print(d)

[[11 12 13 14 15]
 [21 22 23 24 25]
 [31 32 33 34 35]
 [41 42 43 44 45]
 [51 52 53 54 55]]


In [16]:
print(d[:2,:3], NL)
print(d[:3,:2], NL)
print(d[::-1,::-1], NL)
print(d[:,0], NL)
print(d[0,:], NL)
print(d[0])

[[11 12 13]
 [21 22 23]] 

[[11 12]
 [21 22]
 [31 32]] 

[[55 54 53 52 51]
 [45 44 43 42 41]
 [35 34 33 32 31]
 [25 24 23 22 21]
 [15 14 13 12 11]] 

[11 21 31 41 51] 

[11 12 13 14 15] 

[11 12 13 14 15]


### Copy

In [17]:
e = d[:2,:2]
print(e, NL)
e[0,0] = 77
print(e, NL)
print(d)

[[11 12]
 [21 22]] 

[[77 12]
 [21 22]] 

[[77 12 13 14 15]
 [21 22 23 24 25]
 [31 32 33 34 35]
 [41 42 43 44 45]
 [51 52 53 54 55]]


In [18]:
print(d, NL)
e = d[:2,:2].copy()
print(e, NL)
e[0,0] = 11
print(e, NL)
print(d)

[[77 12 13 14 15]
 [21 22 23 24 25]
 [31 32 33 34 35]
 [41 42 43 44 45]
 [51 52 53 54 55]] 

[[77 12]
 [21 22]] 

[[11 12]
 [21 22]] 

[[77 12 13 14 15]
 [21 22 23 24 25]
 [31 32 33 34 35]
 [41 42 43 44 45]
 [51 52 53 54 55]]


### Reshape

In [19]:
f = np.arange(1,10).reshape(3,3)
print(f)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [20]:
g = np.array([1,2,3])
print(g.reshape(1,3), NL)
print(g[np.newaxis,:], NL)
print(g.reshape(3,1), NL)
print(g[:,np.newaxis])


[[1 2 3]] 

[[1 2 3]] 

[[1]
 [2]
 [3]] 

[[1]
 [2]
 [3]]


### Concatenate

In [21]:
a = np.array([1, 2, 3])
b = np.array([10, 11, 12])
c = np.array([77, 66, 55])
d = np.array([[4,5,6,],[7,8,9]])
f = np.array([[13,14,15],[16,17,18]])

In [22]:
g = np.concatenate([a,b])
print(g)

[ 1  2  3 10 11 12]


In [23]:
g = np.concatenate([a,b,c])
print(g,NL)
g = np.concatenate([d,f])
print(g,NL)
g = np.concatenate([d,f], axis=1)
print(g)

[ 1  2  3 10 11 12 77 66 55] 

[[ 4  5  6]
 [ 7  8  9]
 [13 14 15]
 [16 17 18]] 

[[ 4  5  6 13 14 15]
 [ 7  8  9 16 17 18]]


In [24]:
h = np.vstack([a,d])
print(h)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [25]:
g = np.array([[33],[33]])
h = np.hstack([d,g])
print(h)

[[ 4  5  6 33]
 [ 7  8  9 33]]


### Splitting

In [26]:
a = np.array([1,2,3,5,5,3,2,1])
b,c,d = np.split(a, [3,5])
print(b,c,d)

[1 2 3] [5 5] [3 2 1]


In [27]:
a = np.arange(1,17).reshape(4,4)
b,c = np.hsplit(a,[2])
d,e = np.vsplit(a,[2])
print(a,NL)
print(b,NL)
print(c,NL)
print(d,NL)
print(e)

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]
 [13 14 15 16]] 

[[ 1  2]
 [ 5  6]
 [ 9 10]
 [13 14]] 

[[ 3  4]
 [ 7  8]
 [11 12]
 [15 16]] 

[[1 2 3 4]
 [5 6 7 8]] 

[[ 9 10 11 12]
 [13 14 15 16]]


### Array Arithmetic

In [28]:
a = np.arange(5)
print(a+5)
print(a-3)
print(a/2)
print(a*2)
print(a//2)

[5 6 7 8 9]
[-3 -2 -1  0  1]
[ 0.   0.5  1.   1.5  2. ]
[0 2 4 6 8]
[0 0 1 1 2]


In [29]:
print(-a)
print(a**2)
print(a%2)
print(-(0.5*a+1)**2)
print(a-3.5)

[ 0 -1 -2 -3 -4]
[ 0  1  4  9 16]
[0 1 0 1 0]
[-1.   -2.25 -4.   -6.25 -9.  ]
[-3.5 -2.5 -1.5 -0.5  0.5]


In [30]:
print(np.add(a,5))
print(np.subtract(a,3))
print(np.negative(a))
print(np.multiply(a,2))
print(np.divide(a,2))
print(np.floor_divide(a,2))
print(np.power(a,2))
print(np.mod(a,2))

[5 6 7 8 9]
[-3 -2 -1  0  1]
[ 0 -1 -2 -3 -4]
[0 2 4 6 8]
[ 0.   0.5  1.   1.5  2. ]
[0 0 1 1 2]
[ 0  1  4  9 16]
[0 1 0 1 0]


### Absolute

In [31]:
a = np.subtract(np.arange(10), 5)
print(a, NL, np.abs(a), NL, np.absolute(a))

[-5 -4 -3 -2 -1  0  1  2  3  4] 
 [5 4 3 2 1 0 1 2 3 4] 
 [5 4 3 2 1 0 1 2 3 4]


In [32]:
a = np.array([3+4j, 4-3j, 2+0j, 1-0j])
print(np.abs(a))

[ 5.  5.  2.  1.]


### Trigonometric Functions

In [33]:
theta = np.linspace(0, np.pi, 3)
a = [-1, 0, 1]
np.set_printoptions(precision= 5, suppress=True)
print(np.sin(theta))
print(np.cos(theta))
print(np.tan(theta))
print(np.arcsin(a))
print(np.arccos(a))
print(np.arctan(a))

[ 0.  1.  0.]
[ 1.  0. -1.]
[  0.00000e+00   1.63312e+16  -1.22465e-16]
[-1.5708  0.      1.5708]
[ 3.14159  1.5708   0.     ]
[-0.7854  0.      0.7854]


### Exponents and logarithms

In [34]:
x = np.array([1,2,3])
print("e^x=", np.exp(x))
print("2^x=", np.exp2(x))
print("3^x=", np.power(3,x))

e^x= [  2.71828   7.38906  20.08554]
2^x= [ 2.  4.  8.]
3^x= [ 3  9 27]


In [35]:
x = np.array([1,2,4,10])
print("ln(x)   =", np.log(x))
print("log2(x) =", np.log2(x))
print("log10(x)=", np.log10(x))

ln(x)   = [ 0.       0.69315  1.38629  2.30259]
log2(x) = [ 0.       1.       2.       3.32193]
log10(x)= [ 0.       0.30103  0.60206  1.     ]


In [36]:
# When x is very small, these functions give more precise values than if the raw np.log or np.exp were to be used.
x = np.array([0, 0.001, 0.01, 0.1])
print("exp(x) - 1 =", np.expm1(x))
print("log(1 + x) =", np.log1p(x))

exp(x) - 1 = [ 0.       0.001    0.01005  0.10517]
log(1 + x) = [ 0.       0.001    0.00995  0.09531]


### Specifying Output

For large calculations, it is sometimes useful to be able to specify the array where the result of the calculation will be stored. Rather than creating a temporary array, this can be used to write computation results directly to the memory location where you'd like them to be. For all ufuncs, this can be done using the out argument of the function. 

In [37]:
x = np.arange(5)

y = np.empty(5)
np.multiply(x, 10, out=y)
print(y)

[  0.  10.  20.  30.  40.]


In [38]:
y = np.zeros(10)
np.power(2, x, out=y[::2])
print(y)

[  1.   0.   2.   0.   4.   0.   8.   0.  16.   0.]


If we had instead written y[::2] = 2 \*\* x, this would have resulted in the creation of a temporary array to hold the results of 2 \*\* x, followed by a second operation copying those values into the y array. This doesn't make much of a difference for such a small computation, but for very large arrays the memory savings from careful use of the out argument can be significant.

### Aggregates & Outer Products


In [39]:
x = np.arange(1,6)
print(np.add.reduce(x))
print(np.multiply.reduce(x))
print(np.add.accumulate(x))
print(np.multiply.accumulate(x))
print(np.multiply.outer(x,x))

15
120
[ 1  3  6 10 15]
[  1   2   6  24 120]
[[ 1  2  3  4  5]
 [ 2  4  6  8 10]
 [ 3  6  9 12 15]
 [ 4  8 12 16 20]
 [ 5 10 15 20 25]]


In [40]:
big_array = rnd.rand(1000000)
%timeit sum(big_array)
%timeit np.sum(big_array)

10 loops, best of 3: 87.9 ms per loop
1000 loops, best of 3: 500 µs per loop


### Minimum and Maximum

In [41]:
%timeit min(big_array)
%timeit np.min(big_array)

10 loops, best of 3: 66.7 ms per loop
1000 loops, best of 3: 447 µs per loop


In [42]:
%timeit max(big_array)
%timeit np.max(big_array)

10 loops, best of 3: 65.6 ms per loop
1000 loops, best of 3: 416 µs per loop


In [43]:
#Shorter syntax
print(big_array.min(), big_array.max(), big_array.sum())

1.4057692298e-06 0.999999439272 500205.927597


### Multi Dimensional Aggregate

In [44]:
a = rnd.random((3,4))
print(a)
print(a.sum())
print(a.min(axis = 0))
print(a.max(axis = 1))

[[ 0.1229   0.30288  0.36088  0.22887]
 [ 0.09894  0.03707  0.30653  0.7625 ]
 [ 0.73891  0.55222  0.65618  0.96183]]
5.12971598969
[ 0.09894  0.03707  0.30653  0.22887]
[ 0.36088  0.7625   0.96183]


In [45]:
print(a.mean(), a.std(), np.median(a), a.prod())

0.427476332474 0.287197677989 0.333706473393 6.78692284695e-07


### Broadcasting

Broadcasting allows binary operations to be performed on arrays of different sizes of arrays. 
Broadcasting in NumPy follows a strict set of rules to determine the interaction between the two arrays:

   - Rule 1: If the two arrays differ in their number of dimensions, the shape of the one with fewer dimensions is padded with ones on its leading (left) side.
   - Rule 2: If the shape of the two arrays does not match in any dimension, the array with shape equal to 1 in that dimension is stretched to match the other shape.
   - Rule 3: If in any dimension the sizes disagree and neither is equal to 1, an error is raised.


In [46]:
M = np.ones((2, 3))
a = np.arange(3)

Let's consider an operation on these two arrays. The shape of the arrays are

- ``M.shape = (2, 3)``
- ``a.shape = (3,)``

We see by rule 1 that the array ``a`` has fewer dimensions, so we pad it on the left with ones:

- ``M.shape -> (2, 3)``
- ``a.shape -> (1, 3)``

By rule 2, we now see that the first dimension disagrees, so we stretch this dimension to match:

- ``M.shape -> (2, 3)``
- ``a.shape -> (2, 3)``

The shapes match, and we see that the final shape will be ``(2, 3)``:

In [47]:
print(M+a)

[[ 1.  2.  3.]
 [ 1.  2.  3.]]


In [48]:
a = np.arange(3).reshape((3, 1))
b = np.arange(3)

Again, we'll start by writing out the shape of the arrays:

- ``a.shape = (3, 1)``
- ``b.shape = (3,)``

Rule 1 says we must pad the shape of ``b`` with ones:

- ``a.shape -> (3, 1)``
- ``b.shape -> (1, 3)``

And rule 2 tells us that we upgrade each of these ones to match the corresponding size of the other array:

- ``a.shape -> (3, 3)``
- ``b.shape -> (3, 3)``

Because the result matches, these shapes are compatible. We can see this here:

In [49]:
print(a+b)

[[0 1 2]
 [1 2 3]
 [2 3 4]]


In [50]:
M = np.ones((3, 2))
a = np.arange(3)

This is just a slightly different situation than in the first example: the matrix ``M`` is transposed.
How does this affect the calculation? The shape of the arrays are

- ``M.shape = (3, 2)``
- ``a.shape = (3,)``

Again, rule 1 tells us that we must pad the shape of ``a`` with ones:

- ``M.shape -> (3, 2)``
- ``a.shape -> (1, 3)``

By rule 2, the first dimension of ``a`` is stretched to match that of ``M``:

- ``M.shape -> (3, 2)``
- ``a.shape -> (3, 3)``

Now we hit rule 3–the final shapes do not match, so these two arrays are incompatible, as we can observe by attempting this operation:

In [51]:
# print(M+a)

### Comparison

In [52]:
a = np.arange(1,6)
print(a<3)
print(a>3)
print(a<=3)
print(a>=3)
print(a!=3)
print(a==3)

[ True  True False False False]
[False False False  True  True]
[ True  True  True False False]
[False False  True  True  True]
[ True  True False  True  True]
[False False  True False False]


In [53]:
print((2*a)==(x**2))

[False  True False False False]


### Counting Entries

In [54]:
rng = rnd.RandomState(0)
x = rng.randint(10, size=(3,4))
print(x, NL)
print(x<6, NL)
print(np.count_nonzero(x<6))
print(np.sum(x<6))
print(np.sum(x<6, axis=1))
print(np.any(x>8))
print(np.any(x<0))
print(np.all(x<10))
print(np.all(x==3))
print(np.all(x<8,axis=1))


[[5 0 3 3]
 [7 9 3 5]
 [2 4 7 6]] 

[[ True  True  True  True]
 [False False  True  True]
 [ True  True False False]] 

8
8
[4 2 2]
True
False
True
False
[ True False  True]


### Boolean operators

In [55]:
print(np.sum((x>5) & (x<8)))

3


In [56]:
print(np.sum(~((x<=5) | (x>=8))))

3


### Boolean Array as mask

In [57]:
print(x)

[[5 0 3 3]
 [7 9 3 5]
 [2 4 7 6]]


In [58]:
print(x[x<5])

[0 3 3 3 2 4]


### Fancy Indexing

In [59]:
x = rng.randint(100, size=10)
print(x)

[88 88 12 58 65 39 87 46 88 81]


In [60]:
index = [1, 3, 5]
print(x[index])

[88 58 39]


In [61]:
x = np.arange(12).reshape((3,4))
row = np.array([0,1,2])
col = np.array([2,1,3])
print(x)
print(x[row,col])


[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[ 2  5 11]


The pairing of indices in fancy indexing follows all the broadcasting rules. So if we combine a column vector and a row vector within the indices, we get a two-dimensional result

In [62]:
print(x[row[:, np.newaxis], col])

[[ 2  1  3]
 [ 6  5  7]
 [10  9 11]]


In [63]:
print(x[2,[2,0,1]])

[10  8  9]


In [64]:
print(x[1:,[2,0,1]])

[[ 6  4  5]
 [10  8  9]]


In [65]:
mask = np.array([1, 0, 1, 0], dtype=bool)
print(x[row[:, np.newaxis], mask])

[[ 0  2]
 [ 4  6]
 [ 8 10]]


### Sorting

In [71]:
a = np.array([2,1,4,3,5])
i = np.argsort(a)
print(a,NL,i,NL,np.sort(a))

[2 1 4 3 5] 
 [1 0 3 2 4] 
 [1 2 3 4 5]


In [69]:
a.sort() #in-place sort
print(a)

[1 2 3 4 5]


In [72]:
a[i]

array([1, 2, 3, 4, 5])

In [75]:
a = rnd.randint(0,10,(4,6))
print(a)
print(np.sort(a, axis=0))
print(np.sort(a, axis=1))

[[9 2 0 1 9 7]
 [3 8 2 5 1 5]
 [8 5 1 4 2 3]
 [3 8 4 2 9 8]]
[[3 2 0 1 1 3]
 [3 5 1 2 2 5]
 [8 8 2 4 9 7]
 [9 8 4 5 9 8]]
[[0 1 2 7 9 9]
 [1 2 3 5 5 8]
 [1 2 3 4 5 8]
 [2 3 4 8 8 9]]


### Partial Sort


In [76]:
b = np.array([7,2,4,6,3,1,8,9,5])
print(np.partition(b,4))

[2 3 4 1 5 6 8 9 7]


In [78]:
np.partition(a, 2, axis=1)

array([[0, 1, 2, 9, 9, 7],
       [1, 2, 3, 5, 8, 5],
       [1, 2, 3, 4, 5, 8],
       [2, 3, 4, 8, 9, 8]])