In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

## Making an array in Numpy

Make a simple array of zeros. This returns an object of class "ndarray".

In [2]:
a = np.zeros(100, dtype=np.int32)
print(a)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]


Check size (number of elements) and number of bytes per element.

In [3]:
print(a.size)
print(a.itemsize)

100
4


64-bit quantities of course take more memory per element.

In [4]:
a = np.zeros(100, dtype=np.float64)
print(a.itemsize)

8


Make an array of ones, this time with a floating point data type.

In [5]:
a = np.ones(100, dtype=np.float32)
print(a)

[ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.]


Make an array with an increasing sequence of numbers.

In [6]:
a = np.arange(100, dtype=np.int32)
print(a)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99]


Reference different elements of the array.

In [7]:
print(a[0])

0


In [8]:
print(a[50])

50


Reference a range of elements. Note oddity that it is a[first:last+1].

In [9]:
print(a[2:5])

[2 3 4]


Can use shortcuts to reference to end of array.

In [10]:
print(a[90:])

[90 91 92 93 94 95 96 97 98 99]


In [14]:
print(a[10:30:2][::2])

[10 14 18 22 26]


Can add (or multiple, divide, etc) two arrays of the same size:

In [15]:
a = np.ones(10, dtype=np.float32) * 2
b = np.ones(10, dtype=np.float32)

In [16]:
print(a)

[ 2.  2.  2.  2.  2.  2.  2.  2.  2.  2.]


In [17]:
print(b)

[ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.]


In [18]:
print(a * b)

[ 2.  2.  2.  2.  2.  2.  2.  2.  2.  2.]


In [19]:
print(a + b)

[ 3.  3.  3.  3.  3.  3.  3.  3.  3.  3.]


In [20]:
print(b / a)

[ 0.5  0.5  0.5  0.5  0.5  0.5  0.5  0.5  0.5  0.5]


In [21]:
c = np.zeros(10)
for i in np.arange(10):
    c[i] = b[i] / a[i]
print(c)

[ 0.5  0.5  0.5  0.5  0.5  0.5  0.5  0.5  0.5  0.5]


## Multidimensional arrays

We can define a multidimensional array as well. We pass NumPy a "tuple" containing the array dimensions that we want.

In [24]:
a = np.ones((12, 4), dtype=np.int32)

Note that if I print the array, the first index tells me which "row" I am looking at, and the second which "column." This is a convention choice in NumPy.

In [25]:
print(a)

[[1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]]


I can convert the array to a 1D array by "flattening" it:

In [26]:
aflat = a.flatten()
print(aflat)

[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1]


We can see below how the array is stored in memory by looking at a flattened array:

In [27]:
a[2, 3] = 2
print(a)

[[1 1 1 1]
 [1 1 1 1]
 [1 1 1 2]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]]


This test reveals that the array is stored as the first row, then the second row, then the third, etc.

In [28]:
aflat = a.flatten()
print(aflat)

[1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1]


In [30]:
print(aflat.reshape((6,8)))

[[1 1 1 1 1 1 1 1]
 [1 1 1 2 1 1 1 1]
 [1 1 1 1 1 1 1 1]
 [1 1 1 1 1 1 1 1]
 [1 1 1 1 1 1 1 1]
 [1 1 1 1 1 1 1 1]]


A more reliable way to determine this is to ask the object about the length (in bytes) of the "stride" it takes in memory in each dimension. In this case, each row increment is 4 4-byte numbers, so 16 bytes, and each column increment is 1 4-byte number, so 4 bytes.

In [31]:
print(a.strides)

(16, 4)


Another example:

In [36]:
b = np.zeros((10, 20))
print(b.strides)

(160, 8)


In [37]:
b?

## ufuncs

ufuncs are methods of ndarray objects that act on the array as a whole. To show how ufuncs work, I will generate a set of random numbers (uniform between 0 and 1).

In [38]:
r = np.random.random(100)
print(r)

[ 0.58887457  0.34281741  0.67328143  0.2657617   0.52464245  0.37362533
  0.09252202  0.66417391  0.41080351  0.03477305  0.58882325  0.05447467
  0.68944888  0.30579951  0.35325489  0.50880865  0.45613009  0.71201636
  0.66016013  0.07713858  0.94858165  0.59703917  0.95691452  0.61988906
  0.12357546  0.80490687  0.33394956  0.40750456  0.07197123  0.46497809
  0.88415676  0.68258832  0.68664935  0.93873704  0.33415363  0.16904445
  0.93844258  0.08749115  0.74515156  0.15857018  0.8016851   0.53580889
  0.83593624  0.12233229  0.55851279  0.49557688  0.29740509  0.63188308
  0.98683538  0.83206089  0.10043695  0.95389908  0.86448828  0.21995406
  0.52364735  0.79547482  0.78819919  0.88430485  0.2531807   0.29562375
  0.3295182   0.01846576  0.34488627  0.70878065  0.12602367  0.13801124
  0.0296297   0.58353251  0.52979219  0.94914398  0.19455004  0.24014242
  0.20588997  0.84081668  0.14583231  0.6082501   0.65614152  0.79678715
  0.2134285   0.78351705  0.77385766  0.22177832  0

In [39]:
print(r.max())

0.986835381904


In [40]:
print(r.min())

0.0126106156418


In [41]:
print(r.mean())

0.489918959627


In [42]:
print(r.std())

0.290166148298


In [43]:
print(r > 0.9)

[False False False False False False False False False False False False
 False False False False False False False False  True False  True False
 False False False False False False False False False  True False False
  True False False False False False False False False False False False
  True False False  True False False False False False False False False
 False False False False False False False False False  True False False
 False False False False False False False False False False False False
 False False False False False  True False  True False False False False
 False False False False]


In [44]:
print(np.sort(r))

[ 0.01261062  0.01846576  0.02075155  0.0296297   0.03477305  0.05447467
  0.07197123  0.07713858  0.08749115  0.09252202  0.10043695  0.12233229
  0.12357546  0.12602367  0.13045699  0.13801124  0.14583231  0.15857018
  0.16904445  0.17144214  0.19455004  0.20175344  0.20588997  0.2134285
  0.21995406  0.22177832  0.22378918  0.24014242  0.24540418  0.2531807
  0.2657617   0.29562375  0.29740509  0.30579951  0.3295182   0.33394956
  0.33415363  0.34281741  0.34488627  0.35325489  0.37362533  0.38475692
  0.40750456  0.41080351  0.45613009  0.46497809  0.48455675  0.49557688
  0.4981517   0.50880865  0.52364735  0.52464245  0.52979219  0.53580889
  0.55851279  0.56169997  0.57291932  0.58353251  0.58882325  0.58887457
  0.59703917  0.6082501   0.61988906  0.63188308  0.64186697  0.65614152
  0.66016013  0.66417391  0.67328143  0.68258832  0.68664935  0.68944888
  0.70878065  0.71201636  0.74515156  0.77385766  0.78351705  0.78819919
  0.79547482  0.79678715  0.8016851   0.80490687  0.8

In [45]:
print(np.sort(r) > 0.9)

[False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False False  True  True  True  True  True
  True  True  True  True]
