## Numpy

The building block of most scientific packages in Python

* fast arrays
* math functions
* linear algebra
* randomization
* ...

In [3]:
import numpy as np

### Arrays

In [5]:
arr = np.array([1,2,3])

In [6]:
len(arr)

3

In [7]:
arr[2]

3

In [8]:
type(arr[1])

numpy.int64

In [9]:
arr.dtype

dtype('int64')

In [10]:
arr32 = np.array([1,2,3], dtype=np.int32)

In [11]:
arr32

array([1, 2, 3], dtype=int32)

In [12]:
arr * arr

array([1, 4, 9])

In [15]:
v1 = np.random.rand(10000000)
v2 = np.random.rand(10000000)


In [16]:
%time v1 * v2

CPU times: user 22.7 ms, sys: 18.2 ms, total: 40.9 ms
Wall time: 39.4 ms


array([ 0.0310576 ,  0.26571423,  0.42693973, ...,  0.64823823,
        0.09038711,  0.1311474 ])

In [23]:
# Get the dot product

np.dot(arr,arr)

14

In [24]:
# Another way to get the dot product

arr @ arr

14

In [25]:
mat = np.array([[1,2,3],[4,5,6],[7,8,9]])
mat


array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

#### arange()

In [28]:
# Numpy uses "a"-range to build arrays from ranges
v = np.arange(12)
v


array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

#### shape/reshape

In [32]:
# reshape the flat (1x12) to a 4x3 array
v.reshape(4,3)

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [34]:
mat = np.arange(12).reshape(4,3)
mat

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [35]:
mat.shape

(4, 3)

#### Memory frugal
Numpy doesn't want to copy data stuctures

* Beware of pass by reference!

In [37]:
# Reshape an existing matrix and assign to new variable
mat2 = mat.reshape((3,4))
mat2


array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [38]:
# Change the original matrix
mat[1,2] = 17


In [41]:
# Note that mat2 is a reference with a reshape on mat!
mat2


array([[ 0,  1,  2,  3],
       [ 4, 17,  6,  7],
       [ 8,  9, 10, 11]])

In [43]:
# Transpose an array

mat.T


array([[ 0,  3,  6,  9],
       [ 1,  4,  7, 10],
       [ 2, 17,  8, 11]])

In [45]:
mat

array([[ 0,  1,  2],
       [ 3,  4, 17],
       [ 6,  7,  8],
       [ 9, 10, 11]])

### Slicing


In [4]:
v = np.arange(1,6)  # Go from 1 - 6
v


array([1, 2, 3, 4, 5])

In [5]:
v[2:4] Get the 3'rd through 4th elements'

array([3, 4])

In [58]:
# Second row, first column
arr[1,0]

4

In [64]:
# Every row, second element (1st column)
arr[:,1]

array([1, 5, 9])

In [65]:
arr[:,1].reshape((3,1))

array([[1],
       [5],
       [9]])

In [67]:
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [66]:
arr[1:,2:]

array([[ 6,  7],
       [10, 11]])

In [69]:
# broadcasting - assign 7 to lower quarter
arr[1:,2:] = 7
arr


array([[0, 1, 2, 3],
       [4, 5, 7, 7],
       [8, 9, 7, 7]])

In [74]:
x = np.arange(15).reshape(5,3)
x

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [85]:
# From the first row down (i.e. slice out the 0th row)
x[1:]

array([[ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [84]:
# From the second row
x[2:]

array([[ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [86]:
x[2:, 1:]

array([[ 7,  8],
       [10, 11],
       [13, 14]])

In [87]:
x[2:,:1]

array([[ 6],
       [ 9],
       [12]])

#### Slicing continued  - from Oreilly Intermediate Python

In [6]:
a_list = [0,1,10,11,20,21]
vector = np.array(a_list, float) # force type cast
vector

array([  0.,   1.,  10.,  11.,  20.,  21.])

In [7]:
array_2 = np.array(a_list, float).reshape(3,2)
array_2

array([[  0.,   1.],
       [ 10.,  11.],
       [ 20.,  21.]])

In [9]:
vector[0]

0.0

In [10]:
vector[-1]

21.0

In [11]:
vector[0:3]

array([  0.,   1.,  10.])

In [12]:
vector[::-1]  # Reversing (cool range trick)

array([ 21.,  20.,  11.,  10.,   1.,   0.])

In [15]:
array_3_list = np.array([0,1,2,10,11,12,20,21,22], float)
array_3_list

array([  0.,   1.,   2.,  10.,  11.,  12.,  20.,  21.,  22.])

In [16]:
array_3 = array_3_list.reshape(3,3)
array_3

array([[  0.,   1.,   2.],
       [ 10.,  11.,  12.],
       [ 20.,  21.,  22.]])

In [17]:
array_3[0] # The first element of the array is an array

array([ 0.,  1.,  2.])

In [19]:
array_3[0:2]

array([[  0.,   1.,   2.],
       [ 10.,  11.,  12.]])

In [20]:
array_3[::-1] # Reverse the 2d array - last becomes first, first becomes last

array([[ 20.,  21.,  22.],
       [ 10.,  11.,  12.],
       [  0.,   1.,   2.]])

In [21]:
array_3[2,1]  # The second element of the third row

21.0

In [22]:
array_3[:,1] # The second column

array([  1.,  11.,  21.])

In [23]:
...

Ellipsis

In [24]:
array_3[...,1] # Any dimension without specific instructions should include everything

array([  1.,  11.,  21.])

In [33]:
array_4 = np.array(range(120), int).reshape(2,3,4,5)
array_4

array([[[[  0,   1,   2,   3,   4],
         [  5,   6,   7,   8,   9],
         [ 10,  11,  12,  13,  14],
         [ 15,  16,  17,  18,  19]],

        [[ 20,  21,  22,  23,  24],
         [ 25,  26,  27,  28,  29],
         [ 30,  31,  32,  33,  34],
         [ 35,  36,  37,  38,  39]],

        [[ 40,  41,  42,  43,  44],
         [ 45,  46,  47,  48,  49],
         [ 50,  51,  52,  53,  54],
         [ 55,  56,  57,  58,  59]]],


       [[[ 60,  61,  62,  63,  64],
         [ 65,  66,  67,  68,  69],
         [ 70,  71,  72,  73,  74],
         [ 75,  76,  77,  78,  79]],

        [[ 80,  81,  82,  83,  84],
         [ 85,  86,  87,  88,  89],
         [ 90,  91,  92,  93,  94],
         [ 95,  96,  97,  98,  99]],

        [[100, 101, 102, 103, 104],
         [105, 106, 107, 108, 109],
         [110, 111, 112, 113, 114],
         [115, 116, 117, 118, 119]]]])

In [34]:
array_4[0,1] # wildcarded the 3 & 4 dimension

array([[20, 21, 22, 23, 24],
       [25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34],
       [35, 36, 37, 38, 39]])

In [35]:
array_4[0,...,1] # specifying the first dimension at the beginning and the last dimension at the end and * for else

array([[ 1,  6, 11, 16],
       [21, 26, 31, 36],
       [41, 46, 51, 56]])

In [36]:
np.array([array_4[0,i,j,1] for i in range(3) for j in range(4)]).reshape(3,4)  # same result as above

array([[ 1,  6, 11, 16],
       [21, 26, 31, 36],
       [41, 46, 51, 56]])

In [38]:
arr = np.array([0,1,10,11,20,21]).reshape(3,2)
arr

array([[ 0,  1],
       [10, 11],
       [20, 21]])

# Boolean Indexing

Select values in an array not by location but by some criteria

In [88]:
arr = np.arange(3)
arr

array([0, 1, 2])

In [90]:
# Pass in a boolean mask, all locations holding true will select those underlying values for the return
arr[np.array([True, False, True])]

array([0, 2])

In [93]:
# We can generate such a mask by doing boolean logic on the array, it'll return the map we want
arr >= 1

array([False,  True,  True], dtype=bool)

In [95]:
arr[arr >= 1]

array([1, 2])

In [98]:
arr = np.arange(10)

In [101]:
# Get everything where the value is greater than 2 and less than 7
arr[(arr>2)&(arr<7)]

array([3, 4, 5, 6])

In [96]:
mat = np.random.rand(5,5)
mat

array([[ 0.3202589 ,  0.93268913,  0.53259904,  0.14289764,  0.85696577],
       [ 0.9750687 ,  0.74822399,  0.4047018 ,  0.75702542,  0.25957011],
       [ 0.74427431,  0.85894505,  0.16298737,  0.72893348,  0.00535561],
       [ 0.03726729,  0.34261869,  0.25143283,  0.72770761,  0.27628391],
       [ 0.43027794,  0.44254818,  0.88083761,  0.16786464,  0.82645958]])

In [107]:
# Lets find all the values that are more than 1.5 std from the mean
mat[np.abs(mat - mat.mean()) > 1.5*mat.std()]

array([ 0.9750687 ,  0.00535561,  0.03726729])

In [109]:
# compute the difference from the mean for each cell
np.abs(mat - mat.mean())


array([[ 0.19229289,  0.42013735,  0.02004726,  0.36965414,  0.34441398],
       [ 0.46251692,  0.23567221,  0.10784998,  0.24447363,  0.25298167],
       [ 0.23172253,  0.34639327,  0.34956442,  0.21638169,  0.50719617],
       [ 0.47528449,  0.1699331 ,  0.26111895,  0.21515583,  0.23626787],
       [ 0.08227385,  0.07000361,  0.36828582,  0.34468714,  0.31390779]])

In [112]:
# All cells where the value is greater than 1.5 stds of the mean
np.abs(mat - mat.mean()) > 1.5*mat.std()

array([[False, False, False, False, False],
       [ True, False, False, False, False],
       [False, False, False, False,  True],
       [ True, False, False, False, False],
       [False, False, False, False, False]], dtype=bool)

In [113]:
# Apply the map
mat[np.abs(mat - mat.mean()) > 1.5*mat.std()]

array([ 0.9750687 ,  0.00535561,  0.03726729])

In [122]:
# Add scalar to every element
arr = np.arange(3)
arr+4

array([4, 5, 6])

In [123]:
# divide everything by 7
arr/7

array([ 0.        ,  0.14285714,  0.28571429])

In [127]:
mat = np.arange(9).reshape(3,3)
mat

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [137]:
# Vector from 2 to 8, skip 2
vec = np.arange(2,8,2)
vec

array([2, 4, 6])

In [135]:
# Add a vector to a matrix - must be the same length!
mat+vec

array([[ 2,  5,  8],
       [ 5,  8, 11],
       [ 8, 11, 14]])

In [138]:
# More tricky example - let numpy clone rows/cols of a,b to have reasonable add
# See image below, #3
v1 = np.arange(3)
v2 = np.arange(3).reshape(3,1)

In [140]:
print(v1)
print(v2)

[0 1 2]
[[0]
 [1]
 [2]]


In [141]:
v1 + v2

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])

![IMG_4523.JPG](attachment:IMG_4523.JPG)

In [147]:
v = np.arange(12).reshape((4,3))
v

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

### Various functions: any, all, min, max, etc...

In [149]:
# 0 = False, 1>= = True - Any = are any elements in the matrix true
v.any()

True

In [150]:
v.all() # Are all of the elements true?  No, because of 0,0

False

In [151]:
v[0,0] = 1
v

array([[ 1,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [152]:
v.all()


True

In [153]:
v.min()

1

In [154]:
v.max()

11

In [162]:
v.prod()

39916800

In [163]:
v

array([[ 1,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [165]:
# Sum the cols
v.sum(axis=0)

array([19, 22, 26])

In [166]:
# sum the rows
v.sum(axis=1)

array([ 4, 12, 21, 30])

## Copying 

In [159]:
v1 = v.copy()
v1[0,0] = 1000
v

array([[ 1,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

## Serialization

Many ways, this one uses pickle.  We turn the arrays into byte arrays that can be written out easily.

In [167]:
data = v.dumps()
data

b'\x80\x02cnumpy.core.multiarray\n_reconstruct\nq\x00cnumpy\nndarray\nq\x01K\x00\x85q\x02c_codecs\nencode\nq\x03X\x01\x00\x00\x00bq\x04X\x06\x00\x00\x00latin1q\x05\x86q\x06Rq\x07\x87q\x08Rq\t(K\x01K\x04K\x03\x86q\ncnumpy\ndtype\nq\x0bX\x02\x00\x00\x00i8q\x0cK\x00K\x01\x87q\rRq\x0e(K\x03X\x01\x00\x00\x00<q\x0fNNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00tq\x10b\x89h\x03X`\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\t\x00\x00\x00\x00\x00\x00\x00\n\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x00\x00\x00\x00\x00\x00q\x11h\x05\x86q\x12Rq\x13tq\x14b.'

In [168]:
v2 = np.loads(data)
v2

array([[ 1,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [169]:
np.sin(np.pi/2)

1.0

In [170]:
v = np.arange(-3,3)
np.sin(v)

array([-0.14112001, -0.90929743, -0.84147098,  0.        ,  0.84147098,
        0.90929743])

__ufunc__ - Universal Functions that run against numpy structs or standard pythong

## Decorate functions to be used with numpy or standard

In [177]:
@np.vectorize        # Our decorator
def noneg(n):
    if n < 0:
        return 0
    return n

In [178]:
noneg(3)

array(3)

In [179]:
noneg(3).shape

()

In [180]:
noneg(v)

array([0, 0, 0, 0, 1, 2])

In [181]:
nv = np.array([-1, np.nan, 1])

In [182]:
nv

array([ -1.,  nan,   1.])