In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

## Making an array in Numpy

Make a simple array of zeros. This returns an object of class "ndarray".

In [3]:
a = np.zeros(100, dtype=np.int32)
print(a)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]


In [7]:
print(np.arange(10, 15, 2, dtype=np.float32))

[10. 12. 14.]


In [8]:
b = np.array([5., 4., 3], dtype=np.float32)
c = np.array([2., 1.], dtype=np.float32)
d = np.append(b, c)
print(d)

[5. 4. 3. 2. 1.]


In [10]:
def func():
    blah = np.zeros(100000)
    return

Check size (number of elements) and number of bytes per element.

In [11]:
print(a.size)
print(a.itemsize)

100
4


64-bit quantities of course take more memory per element.

In [12]:
a = np.zeros(100, dtype=np.float64)
print(a.itemsize)

8


Make an array of ones, this time with a floating point data type.

In [13]:
a = np.ones(100, dtype=np.float32)
print(a)

[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1.]


Make an array with an increasing sequence of numbers.

In [14]:
a = np.arange(100, dtype=np.int32)
print(a)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
 96 97 98 99]


Reference different elements of the array.

In [15]:
print(a[0])

0


In [16]:
print(a[50])

50


Reference a range of elements. Note oddity that it is a[first:last+1].

In [17]:
print(a[2:5])

[2 3 4]


Can use shortcuts to reference to end of array.

In [19]:
print(a[90:])

[90 91 92 93 94 95 96 97 98 99]


In [20]:
print(a[10:30:2])

[10 12 14 16 18 20 22 24 26 28]


Can add (or multiple, divide, etc) two arrays of the same size:

In [21]:
a = np.ones(10, dtype=np.float32) * 2
b = np.ones(10, dtype=np.float32)

In [23]:
print(a)

[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]


In [24]:
print(b)

[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]


In [25]:
print(a * b)

[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]


In [26]:
print(a + b)

[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]


In [27]:
print(b / a)

[0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5]


## Multidimensional arrays

We can define a multidimensional array as well. We pass NumPy a "tuple" containing the array dimensions that we want.

In [29]:
a = np.ones((12, 4), dtype=np.int32)

Note that if I print the array, the first index tells me which "row" I am looking at, and the second which "column." This is a convention choice in NumPy.

In [30]:
print(a)

[[1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]]


I can convert the array to a 1D array by "flattening" it:

In [31]:
aflat = a.flatten()
print(aflat)

[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1]


We can see below how the array is stored in memory by looking at a flattened array:

In [32]:
a[2, 3] = 2
print(a)

[[1 1 1 1]
 [1 1 1 1]
 [1 1 1 2]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]]


This test reveals that the array is stored as the first row, then the second row, then the third, etc.

In [33]:
aflat = a.flatten()
print(aflat)

[1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1]


In [34]:
print(aflat.reshape((6,8)))

[[1 1 1 1 1 1 1 1]
 [1 1 1 2 1 1 1 1]
 [1 1 1 1 1 1 1 1]
 [1 1 1 1 1 1 1 1]
 [1 1 1 1 1 1 1 1]
 [1 1 1 1 1 1 1 1]]


A more reliable way to determine this is to ask the object about the length (in bytes) of the "stride" it takes in memory in each dimension. In this case, each row increment is 4 4-byte numbers, so 16 bytes, and each column increment is 1 4-byte number, so 4 bytes.

In [35]:
print(a.strides)

(16, 4)


Another example:

In [26]:
b = np.zeros((10, 20))
print(b.strides)

(160, 8)


In [27]:
b?

Difference between NumPy arrays and Python lists:

In [36]:
mylist = ['a', b, 78., 78, func]

In [37]:
print(mylist)

['a', array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], dtype=float32), 78.0, 78, <function func at 0x7f9a377acc10>]


In [38]:
alist = [1., 1., 1., 3.]
aarray = np.ones(4)
aarray[3] = 3.
print(alist)
print(aarray)

[1.0, 1.0, 1.0, 3.0]
[1. 1. 1. 3.]


In [39]:
print(alist[2])

1.0


In [40]:
print(aarray[2])

1.0


In [41]:
print(aarray + aarray)

[2. 2. 2. 6.]


In [42]:
print(alist + alist)

[1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 1.0, 3.0]


In [45]:
print(np.array(alist))

[1. 1. 1. 3.]


In [46]:
blist = [1., 1., 1., "hello"]
print(blist)

[1.0, 1.0, 1.0, 'hello']


In [47]:
print(blist + aarray)

UFuncTypeError: ufunc 'add' did not contain a loop with signature matching types (dtype('<U32'), dtype('float64')) -> None

## ufuncs

ufuncs are methods of ndarray objects that act on the array as a whole. To show how ufuncs work, I will generate a set of random numbers (uniform between 0 and 1).

In [48]:
r = np.random.random(100)
print(r)

[0.38755951 0.26882708 0.4100732  0.07695825 0.87383881 0.67433494
 0.19584667 0.97221409 0.94096695 0.40556932 0.54275131 0.63471005
 0.25901389 0.63654889 0.8067426  0.57495465 0.94539921 0.71349272
 0.68191428 0.93652998 0.06606264 0.03331043 0.50758852 0.57074307
 0.15853815 0.39107969 0.94299298 0.63114838 0.18896133 0.98152171
 0.2942467  0.39864365 0.44378105 0.52034231 0.25157048 0.88865059
 0.93681706 0.23394425 0.97406512 0.31987737 0.81354839 0.31102635
 0.45975713 0.53069495 0.22712557 0.59983536 0.12823221 0.1804894
 0.69831503 0.77257248 0.99446363 0.52829745 0.81257318 0.61022343
 0.84814556 0.34587946 0.1038842  0.35869025 0.48639219 0.90536078
 0.2602415  0.10274975 0.08135628 0.82108007 0.51505129 0.4268225
 0.38197485 0.11107338 0.92865004 0.53262672 0.33449309 0.96427729
 0.08721945 0.33754772 0.27235138 0.74648662 0.06145765 0.80595858
 0.18376809 0.63781178 0.87507015 0.52306938 0.33737367 0.27303097
 0.40848377 0.86460889 0.33921882 0.05676996 0.84825289 0.252233

In [49]:
print(r.max())

0.9944636263005462


In [50]:
print(r.min())

0.03331042706742682


In [51]:
print(r.mean())

0.5047995339345547


In [52]:
print(r.std())

0.2823265832765444


In [53]:
print(r > 0.9)

[False False False False False False False  True  True False False False
 False False False False  True False False  True False False False False
 False False  True False False  True False False False False False False
  True False  True False False False False False False False False False
 False False  True False False False False False False False False  True
 False False False False False False False False  True False False  True
 False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False]


In [55]:
print(r[r > 0.9])
print((r > 0.9).sum())

[0.97221409 0.94096695 0.94539921 0.93652998 0.94299298 0.98152171
 0.93681706 0.97406512 0.99446363 0.90536078 0.92865004 0.96427729]
12


In [56]:
print(np.sort(r))

[0.03331043 0.05676996 0.06145765 0.06606264 0.07695825 0.08135628
 0.08721945 0.10274975 0.1038842  0.11107338 0.12823221 0.15853815
 0.1804894  0.18376809 0.18896133 0.19561722 0.19584667 0.22712557
 0.23394425 0.23760066 0.25157048 0.25223338 0.25901389 0.2602415
 0.26882708 0.27235138 0.27303097 0.28116425 0.2942467  0.30991355
 0.31102635 0.31987737 0.32853514 0.33449309 0.33737367 0.33754772
 0.33921882 0.34587946 0.35869025 0.38197485 0.38755951 0.39107969
 0.39864365 0.40556932 0.40848377 0.4100732  0.4268225  0.44378105
 0.45975713 0.48639219 0.50080737 0.50758852 0.51505129 0.52034231
 0.52306938 0.52829745 0.53069495 0.53262672 0.54275131 0.57074307
 0.57495465 0.59083845 0.59983536 0.61022343 0.63114838 0.63471005
 0.63539307 0.63654889 0.63781178 0.67433494 0.68191428 0.69831503
 0.71349272 0.74648662 0.77257248 0.78845462 0.80595858 0.8067426
 0.81257318 0.81354839 0.82108007 0.84814556 0.84825289 0.85288025
 0.86460889 0.87383881 0.87507015 0.88865059 0.90536078 0.928650

In [57]:
print(np.sort(r) > 0.9)

[False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False  True  True  True  True  True  True  True  True
  True  True  True  True]


In [58]:
isort = np.argsort(r)
print(isort)

[21 87 76 20  3 62 72 61 56 67 46 24 47 78 28 94  6 44 37 99 34 89 12 60
  1 74 83 95 30 96 41 39 98 70 82 73 86 55 57 66  0 25 31  9 84  2 65 32
 42 58 92 22 64 33 81 51 43 69 10 23 15 97 45 53 27 11 91 13 79  5 18 48
 17 75 49 93 77 14 52 40 63 54 88 90 85  4 80 35 59 68 19 36  8 26 16 71
  7 38 29 50]


In [59]:
print(r[isort])

[0.03331043 0.05676996 0.06145765 0.06606264 0.07695825 0.08135628
 0.08721945 0.10274975 0.1038842  0.11107338 0.12823221 0.15853815
 0.1804894  0.18376809 0.18896133 0.19561722 0.19584667 0.22712557
 0.23394425 0.23760066 0.25157048 0.25223338 0.25901389 0.2602415
 0.26882708 0.27235138 0.27303097 0.28116425 0.2942467  0.30991355
 0.31102635 0.31987737 0.32853514 0.33449309 0.33737367 0.33754772
 0.33921882 0.34587946 0.35869025 0.38197485 0.38755951 0.39107969
 0.39864365 0.40556932 0.40848377 0.4100732  0.4268225  0.44378105
 0.45975713 0.48639219 0.50080737 0.50758852 0.51505129 0.52034231
 0.52306938 0.52829745 0.53069495 0.53262672 0.54275131 0.57074307
 0.57495465 0.59083845 0.59983536 0.61022343 0.63114838 0.63471005
 0.63539307 0.63654889 0.63781178 0.67433494 0.68191428 0.69831503
 0.71349272 0.74648662 0.77257248 0.78845462 0.80595858 0.8067426
 0.81257318 0.81354839 0.82108007 0.84814556 0.84825289 0.85288025
 0.86460889 0.87383881 0.87507015 0.88865059 0.90536078 0.928650