# NumPy basics
This is mostly a refresher, but I want to make sure I'm not missing some basic syntax.

In [1]:
import numpy as np

## Testing basic commands

In [2]:
a = np.arange(15).reshape(3, 5)
a

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

### the number of dimensions in this array

In [3]:
a.ndim

2

### The data type of the elements

In [4]:
a.dtype.name

'int64'

### The size in bytes of each element

In [5]:
a.itemsize

8

### The number of elements in the array

In [6]:
a.size

15

## Creating arrays

### We can make an array with different types

In [7]:
a = np.array([1,2,3])
a.dtype.name

'int64'

In [8]:
b = np.array([1.2,3.4,5.6])
b.dtype.name

'float64'

### We can even specify the type

In [9]:
c = np.array([[1,2], [3,4]], dtype='complex')
c

array([[ 1.+0.j,  2.+0.j],
       [ 3.+0.j,  4.+0.j]])

### Creating placeholder arrays with set size, but default values
This is useful when the size of the array is known, but not the values.
It is possible to construct the array one row at a time, but that is
much more expensive than creating the array with the final size,
filled with default values, and then filling it in afterward.

In [10]:
np.zeros((2,3))

array([[ 0.,  0.,  0.],
       [ 0.,  0.,  0.]])

In [11]:
np.ones((2,3,4))

array([[[ 1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.]],

       [[ 1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.]]])

In [12]:
np.empty((2,3)) # the default value is random, and depends on the state of memory

array([[ 0.,  0.,  0.],
       [ 0.,  0.,  0.]])

### Create a sequence of numbers

In [13]:
np.arange(10, 30, 5)

array([10, 15, 20, 25])

In [14]:
np.arange(0.1, 3.4, 0.4)

array([ 0.1,  0.5,  0.9,  1.3,  1.7,  2.1,  2.5,  2.9,  3.3])

When working with floating point divisions, it is usually better to use `linspace` as the number of values is specified, rather than the step size. This results in a more predictable array size.

In [15]:
np.linspace(0, 5, 11)

array([ 0. ,  0.5,  1. ,  1.5,  2. ,  2.5,  3. ,  3.5,  4. ,  4.5,  5. ])

In [16]:
np.linspace(0, 2*np.pi, 100)

array([ 0.        ,  0.06346652,  0.12693304,  0.19039955,  0.25386607,
        0.31733259,  0.38079911,  0.44426563,  0.50773215,  0.57119866,
        0.63466518,  0.6981317 ,  0.76159822,  0.82506474,  0.88853126,
        0.95199777,  1.01546429,  1.07893081,  1.14239733,  1.20586385,
        1.26933037,  1.33279688,  1.3962634 ,  1.45972992,  1.52319644,
        1.58666296,  1.65012947,  1.71359599,  1.77706251,  1.84052903,
        1.90399555,  1.96746207,  2.03092858,  2.0943951 ,  2.15786162,
        2.22132814,  2.28479466,  2.34826118,  2.41172769,  2.47519421,
        2.53866073,  2.60212725,  2.66559377,  2.72906028,  2.7925268 ,
        2.85599332,  2.91945984,  2.98292636,  3.04639288,  3.10985939,
        3.17332591,  3.23679243,  3.30025895,  3.36372547,  3.42719199,
        3.4906585 ,  3.55412502,  3.61759154,  3.68105806,  3.74452458,
        3.8079911 ,  3.87145761,  3.93492413,  3.99839065,  4.06185717,
        4.12532369,  4.1887902 ,  4.25225672,  4.31572324,  4.37

## Printing arrays
When printing, in general:
- Last index: printed horizonatally
- Second to last index: printed vertially
- All other indices: printed as separate grids separated by an empty line

In [17]:
np.arange(5)

array([0, 1, 2, 3, 4])

In [18]:
np.arange(9).reshape(3,3)

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [19]:
np.arange(18).reshape(2,3,3)

array([[[ 0,  1,  2],
        [ 3,  4,  5],
        [ 6,  7,  8]],

       [[ 9, 10, 11],
        [12, 13, 14],
        [15, 16, 17]]])

## Basic operations
We can perform arithmetic operations, which are applied element-wise. A new array is created with the result.

In [20]:
a = np.linspace(1, 9, 9)
b = np.linspace(11, 19, 9)

In [21]:
a - b

array([-10., -10., -10., -10., -10., -10., -10., -10., -10.])

In [22]:
a/b

array([ 0.09090909,  0.16666667,  0.23076923,  0.28571429,  0.33333333,
        0.375     ,  0.41176471,  0.44444444,  0.47368421])

In [23]:
np.sin(a)

array([ 0.84147098,  0.90929743,  0.14112001, -0.7568025 , -0.95892427,
       -0.2794155 ,  0.6569866 ,  0.98935825,  0.41211849])

In [24]:
a > 5

array([False, False, False, False, False,  True,  True,  True,  True], dtype=bool)

In [25]:
a*b

array([  11.,   24.,   39.,   56.,   75.,   96.,  119.,  144.,  171.])

Notice how the product is also applied element-wise. Matrix multiplication can be achieved using the `numpy.dot` function or the matrix class

In [26]:
np.dot(a,b)

735.0

In [27]:
np.dot(a.reshape(3,3), b.reshape(3,3))

array([[  90.,   96.,  102.],
       [ 216.,  231.,  246.],
       [ 342.,  366.,  390.]])

We can use the `+=`, `*=`, etc. operators as well

In [28]:
a = np.ones((5,5))

In [29]:
a

array([[ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.]])

In [30]:
a += np.ones((5,5))
a

array([[ 2.,  2.,  2.,  2.,  2.],
       [ 2.,  2.,  2.,  2.,  2.],
       [ 2.,  2.,  2.,  2.,  2.],
       [ 2.,  2.,  2.,  2.,  2.],
       [ 2.,  2.,  2.,  2.,  2.]])

In [31]:
a *= np.ones((5,5))*3
a

array([[ 6.,  6.,  6.,  6.,  6.],
       [ 6.,  6.,  6.,  6.,  6.],
       [ 6.,  6.,  6.,  6.,  6.],
       [ 6.,  6.,  6.,  6.,  6.],
       [ 6.,  6.,  6.,  6.,  6.]])

## Unary operators performed on arrays
These include things like `sum`, `min`, `max`

In [32]:
a = np.random.random((3,2))
a

array([[ 0.30050529,  0.78642735],
       [ 0.98630357,  0.20639702],
       [ 0.45195205,  0.73952578]])

In [33]:
a.sum()

3.4711110655308173

In [34]:
a.min()

0.20639702303234342

In [35]:
a.max()

0.98630357240644584

These operators are applied across all the elements in the array, however it is possible to specify a dimension.

In [36]:
a = np.arange(12).reshape(3,4)
a

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [37]:
a.sum(axis=0) # sum across columns

array([12, 15, 18, 21])

In [38]:
a.sum(axis=1) # sum across rows

array([ 6, 22, 38])

In [39]:
a.cumsum(axis=1) # cumulative sum across rows

array([[ 0,  1,  3,  6],
       [ 4,  9, 15, 22],
       [ 8, 17, 27, 38]])

## Indexing, slicing, and looping
We can index, slice, and loop similar to a standard array

In [40]:
a = np.arange(10)**3
a

array([  0,   1,   8,  27,  64, 125, 216, 343, 512, 729])

In [41]:
a[8]

512

In [42]:
a[2:5]

array([ 8, 27, 64])

In [43]:
a[:6:2] = -1000 # replace even elements less than 6 with -1000
a

array([-1000,     1, -1000,    27, -1000,   125,   216,   343,   512,   729])

In [44]:
a[::-1] # reverse array

array([  729,   512,   343,   216,   125, -1000,    27, -1000,     1, -1000])

In [45]:
for i in a:
    print i,
print
for i in a:
    print i**(1/3.),

-1000 1 -1000 27 -1000 125 216 343 512 729
nan 1.0 nan 3.0 nan 5.0 6.0 7.0 8.0 9.0




Multidimensional arrays can have one index per dimension

In [46]:
a = np.fromfunction(lambda x,y: 10*x+y, (5,4), dtype=int)
a

array([[ 0,  1,  2,  3],
       [10, 11, 12, 13],
       [20, 21, 22, 23],
       [30, 31, 32, 33],
       [40, 41, 42, 43]])

In [47]:
a[2,3] # get a single element

23

In [48]:
a[0:5, 1] # get the complete second column

array([ 1, 11, 21, 31, 41])

In [49]:
a[:, 1] # same as above with less typing

array([ 1, 11, 21, 31, 41])

In [50]:
a[1:3, :] # get the complete second and third rows

array([[10, 11, 12, 13],
       [20, 21, 22, 23]])

If fewer indices is given than the number of dimensions, the missing indices will be taken as complete slices

In [51]:
a[-1]

array([40, 41, 42, 43])

In [52]:
a[0]

array([0, 1, 2, 3])

We can also use `...` to indicate "enough `:`'s as needed to represent the remaining rows." This is useful when the first or middle indices are complete slices, but not the indices at the end

In [53]:
a[..., 1]

array([ 1, 11, 21, 31, 41])

### Iterating over a multimensional array

In [54]:
for row in a:
    print row, ' - ',
    for col in row:
        print col,
    print

[0 1 2 3]  -  0 1 2 3
[10 11 12 13]  -  10 11 12 13
[20 21 22 23]  -  20 21 22 23
[30 31 32 33]  -  30 31 32 33
[40 41 42 43]  -  40 41 42 43


It's also possible to iterate over all the elements in the array using the `flat` attribute

In [55]:
for element in a.flat:
    print element,

0 1 2 3 10 11 12 13 20 21 22 23 30 31 32 33 40 41 42 43


## Shape manipulation

In [86]:
a = np.floor(10*np.random.random((3,4)))
a

array([[ 2.,  8.,  4.,  0.],
       [ 1.,  7.,  5.,  3.],
       [ 0.,  2.,  8.,  1.]])

In [87]:
a.shape

(3, 4)

We can change the shape of the array using any of these commands

In [88]:
a.ravel() # flatten

array([ 2.,  8.,  4.,  0.,  1.,  7.,  5.,  3.,  0.,  2.,  8.,  1.])

Notice that when `ravel()` is called, the array is flattened as a "C-style" array. That is the right-most number changes the fastest, and the rows are appended to one another.

In [89]:
a.shape = (6,2)
a

array([[ 2.,  8.],
       [ 4.,  0.],
       [ 1.,  7.],
       [ 5.,  3.],
       [ 0.,  2.],
       [ 8.,  1.]])

Notice again, the order is treated as C-style when the dimension is changed

In [90]:
a.transpose()

array([[ 2.,  4.,  1.,  5.,  0.,  8.],
       [ 8.,  0.,  7.,  3.,  2.,  1.]])

The `reshape` function returns a modified array with new dimensions, while the `resize` fucntion modifies the array in place

In [92]:
a.resize((4,3))
a

array([[ 2.,  8.,  4.],
       [ 0.,  1.,  7.],
       [ 5.,  3.,  0.],
       [ 2.,  8.,  1.]])

In [95]:
a.reshape(6,-1) # a dimension with -1 will be calculated automatically

array([[ 2.,  8.],
       [ 4.,  0.],
       [ 1.,  7.],
       [ 5.,  3.],
       [ 0.,  2.],
       [ 8.,  1.]])

In [97]:
a # unmodified

array([[ 2.,  8.,  4.],
       [ 0.,  1.,  7.],
       [ 5.,  3.,  0.],
       [ 2.,  8.,  1.]])

### Arrays can be stacked as well

In [121]:
a = np.floor(10*np.random.random((2,2)))
b = np.floor(10*np.random.random((2,2)))

In [122]:
a

array([[ 2.,  9.],
       [ 6.,  3.]])

In [123]:
b

array([[ 9.,  8.],
       [ 4.,  1.]])

`vstack()` can be used to stack arrays vertically

In [124]:
np.vstack((a,b))

array([[ 2.,  9.],
       [ 6.,  3.],
       [ 9.,  8.],
       [ 4.,  1.]])

In [125]:
np.vstack((a,b,a))

array([[ 2.,  9.],
       [ 6.,  3.],
       [ 9.,  8.],
       [ 4.,  1.],
       [ 2.,  9.],
       [ 6.,  3.]])

`hstack()` can be used to stack arrays horizontally

In [127]:
np.hstack((a,b))

array([[ 2.,  9.,  9.,  8.],
       [ 6.,  3.,  4.,  1.]])

`column_stack()` is essentially a wrapper around `vstack()`. This has slightly different behavior which can be quite convenient.

In [135]:
np.column_stack(([1,2,3],[4,5,6]))

array([[1, 4],
       [2, 5],
       [3, 6]])

In [136]:
np.vstack(([1,2,3],[4,5,6]))

array([[1, 2, 3],
       [4, 5, 6]])

In [137]:
np.hstack(([1,2,3],[4,5,6]))

array([1, 2, 3, 4, 5, 6])

### Splitting an array
An array can be split into n equal sub-arrays or split at particular divisors

In [139]:
a = np.floor(10*np.random.random((2,12)))
a

array([[ 2.,  4.,  4.,  4.,  2.,  6.,  2.,  6.,  7.,  6.,  5.,  0.],
       [ 6.,  6.,  8.,  7.,  3.,  2.,  3.,  4.,  8.,  3.,  9.,  3.]])

In [142]:
np.hsplit(a, 3) # split into 3 equal sub-arrays

[array([[ 2.,  4.,  4.,  4.],
        [ 6.,  6.,  8.,  7.]]), array([[ 2.,  6.,  2.,  6.],
        [ 3.,  2.,  3.,  4.]]), array([[ 7.,  6.,  5.,  0.],
        [ 8.,  3.,  9.,  3.]])]

In [147]:
np.hsplit(a,(4,6,8)) # split on columns 4, 6, and 8

[array([[ 2.,  4.,  4.,  4.],
        [ 6.,  6.,  8.,  7.]]), array([[ 2.,  6.],
        [ 3.,  2.]]), array([[ 2.,  6.],
        [ 3.,  4.]]), array([[ 7.,  6.,  5.,  0.],
        [ 8.,  3.,  9.,  3.]])]

`vsplit` can be used to split on the rows rather than columns

In [148]:
b = np.floor(10*np.random.random((12,2)))
b

array([[ 2.,  5.],
       [ 4.,  9.],
       [ 7.,  5.],
       [ 9.,  7.],
       [ 3.,  3.],
       [ 4.,  8.],
       [ 9.,  4.],
       [ 4.,  4.],
       [ 7.,  8.],
       [ 1.,  6.],
       [ 0.,  9.],
       [ 2.,  7.]])

In [152]:
np.vsplit(b, 4)

[array([[ 2.,  5.],
        [ 4.,  9.],
        [ 7.,  5.]]), array([[ 9.,  7.],
        [ 3.,  3.],
        [ 4.,  8.]]), array([[ 9.,  4.],
        [ 4.,  4.],
        [ 7.,  8.]]), array([[ 1.,  6.],
        [ 0.,  9.],
        [ 2.,  7.]])]

In [153]:
np.vsplit(b, (5,6))

[array([[ 2.,  5.],
        [ 4.,  9.],
        [ 7.,  5.],
        [ 9.,  7.],
        [ 3.,  3.]]), array([[ 4.,  8.]]), array([[ 9.,  4.],
        [ 4.,  4.],
        [ 7.,  8.],
        [ 1.,  6.],
        [ 0.,  9.],
        [ 2.,  7.]])]

## Columns and views

In [157]:
a = np.arange(12)
a

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

Simple assignment does not create a copy.

In [169]:
b = a
b is a

True

In [170]:
b.resize((3,4))
a

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

This is because Python passes mutable objects as references, so no copy is made

In [171]:
def f(x):
    print id(x)

print id(a)
f(a)

4379735664
4379735664


### Shallow copy of an array
This is done using the `view` function

In [172]:
c = a.view()
c is a

False

However, the data is still owned by the original object (because it's a shallow copy)

In [177]:
c.base is a

True

The shape of the view can be changed independent of the shape of the parent

In [178]:
c.shape = 2,6
a.shape

(3, 4)

Modifying the data in the view modifies the data in the parent

In [179]:
c[0,4] = 1234
a

array([[   0,    1,    2,    3],
       [1234,    5,    6,    7],
       [   8,    9,   10,   11]])

Slicing an array returns a view

In [181]:
s = a[:, 1:3]
s

array([[ 1,  2],
       [ 5,  6],
       [ 9, 10]])

In [183]:
s[:] = 10
s

array([[10, 10],
       [10, 10],
       [10, 10]])

In [184]:
a

array([[   0,   10,   10,    3],
       [1234,   10,   10,    7],
       [   8,   10,   10,   11]])

### Deep copy
A deep copy actually copies the data as well

In [186]:
d = a.copy()
d is a

False

In [187]:
d.base is a

False

In [188]:
d[0,0] = 9999
a

array([[   0,   10,   10,    3],
       [1234,   10,   10,    7],
       [   8,   10,   10,   11]])

# Less basic NumPy feature

## Broadcasting rules
The broadcasting rules are defined to handle when arrays are not of the same size.
If the arrays being operated on do not have the same shape, a one is prepended to the shape of the smaller array until the shapes do match.
The data in the "added" dimensions are take to be copies of the existing array

In [196]:
a = np.arange(12).reshape(4,3) # larger array. shape = (4,3)
b = np.arange(3) + 1 # smaller array. shape = 3
print a
print b
a*b

[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]
[1 2 3]


array([[ 0,  2,  6],
       [ 3,  8, 15],
       [ 6, 14, 24],
       [ 9, 20, 33]])

`b` was repeated four times when performing the multipliction

## Fancy indexing

In [198]:
# construct an array to work with
a = np.arange(12)**2
# indices we would like to access
i = np.array([1,1,3,8,5])

In [199]:
a

array([  0,   1,   4,   9,  16,  25,  36,  49,  64,  81, 100, 121])

In [201]:
a[i]

array([ 1,  1,  9, 64, 25])

In [202]:
j = np.array([[3,4], [5,6]])
a[j]

array([[ 9, 16],
       [25, 36]])

In [203]:
a = np.arange(12).reshape(3,4)
a

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

An array can be sliced in multiple dimensions as well

In [206]:
i = np.array([[0,1], [1,2]])
j = np.array([[2,1], [3,3]])
a[i,j] # should reproduce [ [a[0,2], a[1,1]], [a[1,3], a[2,3]]]

array([[ 2,  5],
       [ 7, 11]])

In [208]:
a[:,j]

array([[[ 2,  1],
        [ 3,  3]],

       [[ 6,  5],
        [ 7,  7]],

       [[10,  9],
        [11, 11]]])

We can put `i`,`j` into a sequence, and index using the new object.

In [209]:
l = i,j
a[l]

array([[ 2,  5],
       [ 7, 11]])

We can slice on a time-dependent series to pick out maxima

In [226]:
time = np.linspace(20, 14, 5)
data = np.sin(np.arange(20)).reshape(5,4)

In [227]:
time

array([ 20. ,  18.5,  17. ,  15.5,  14. ])

In [228]:
data

array([[ 0.        ,  0.84147098,  0.90929743,  0.14112001],
       [-0.7568025 , -0.95892427, -0.2794155 ,  0.6569866 ],
       [ 0.98935825,  0.41211849, -0.54402111, -0.99999021],
       [-0.53657292,  0.42016704,  0.99060736,  0.65028784],
       [-0.28790332, -0.96139749, -0.75098725,  0.14987721]])

Find the indices corresponding to the maximum values in the data

In [240]:
# find the indices of the maximum values in each column
ind = data.argmax(axis=0)
ind

array([2, 0, 3, 1])

In [241]:
time_max = time[ind]
time_max

array([ 17. ,  20. ,  15.5,  18.5])

In [243]:
# get peak value for each column
data_max = data[ind, xrange(data.shape[1])]
data_max

array([ 0.98935825,  0.84147098,  0.99060736,  0.6569866 ])

In [248]:
data_max == data.max(axis=0)

array([ True,  True,  True,  True], dtype=bool)

In [249]:
all(data_max == data.max(axis=0))

True

### Indexing with booleans

In [253]:
a = np.arange(12).reshape(3,4)
b = a > 4
b

array([[False, False, False, False],
       [False,  True,  True,  True],
       [ True,  True,  True,  True]], dtype=bool)

In [254]:
a[b]

array([ 5,  6,  7,  8,  9, 10, 11])

This returned a 1D array with the values passing the selection array `b`

In [255]:
a[b] = 0
a

array([[0, 1, 2, 3],
       [4, 0, 0, 0],
       [0, 0, 0, 0]])

## Linear algebra
There are many useful linear algebra functions in the `numpy.linalg` package

In [257]:
a = np.array([[1,2], [3,4]])
a

array([[1, 2],
       [3, 4]])

The transpose of an array/vector

In [260]:
a.transpose()

array([[1, 3],
       [2, 4]])

The inverse of a matrix

In [261]:
np.linalg.inv(a)

array([[-2. ,  1. ],
       [ 1.5, -0.5]])

The identity matrix is created using `numpy.eye(n)`, where `eye` stands for "I," and `n` is the dimension of the identity matrix

In [265]:
u = np.eye(2)
u

array([[ 1.,  0.],
       [ 0.,  1.]])

In [269]:
j = np.array([[0, -1], [1, 0]])
j

array([[ 0, -1],
       [ 1,  0]])

The dot product can be taken using `numpy.dot()`

In [272]:
np.dot(j,j)

array([[-1,  0],
       [ 0, -1]])

The trace

In [274]:
np.trace(u)

2.0

In [281]:
y = np.array([[5.], [7.]])
y

array([[ 5.],
       [ 7.]])

To solve the linear equations:

a[0,0] + a[0,1]*x = y0

a[1,0] + a[1,1]*x = y1

This solves for the vector `x`

In [280]:
np.linalg.solve(a, y)

array([[-3.],
       [ 4.]])

To find the eigenvalues/vectors

In [282]:
np.linalg.eig(j)

(array([ 0.+1.j,  0.-1.j]),
 array([[ 0.70710678+0.j        ,  0.70710678-0.j        ],
        [ 0.00000000-0.70710678j,  0.00000000+0.70710678j]]))