# Notes on Notation

In [1]:
import numpy as np

In [2]:
nd = np.array([
    [1, 2, 3, 4],
    [2, 3, 4, 5],
    [3, 4, 5, 6],
    [4, 5, 6, 7],
])

In [3]:
nd[0,0] # Row 0, Col 0

1

In [4]:
nd[3,2] # Row 3, Col 2

6

In [5]:
nd[0:3,1:3] # First up to fourth row, Second and third column

array([[2, 3],
       [3, 4],
       [4, 5]])

In [6]:
nd[:, 3] # All rows, fourth column

array([4, 5, 6, 7])

In [7]:
nd[-1, 1:3] # Last row, second up and third column

array([5, 6])

# Creating Numpy Arrays

List to 1D array:

In [8]:
np.array([2,3,4])

array([2, 3, 4])

List of tuples to 2D array:

In [9]:
np.array([(2,3,4), (5,6,7)])

array([[2, 3, 4],
       [5, 6, 7]])

# Arrays with Initial Values

Create an empty array:

In [10]:
np.empty(5)

array([  1.28822975e-231,  -1.29074226e-231,   2.26342764e-314,
         2.26344561e-314,   2.26344564e-314])

You can also specify the rows and columns:

In [11]:
np.empty((5,4))

array([[  1.28822975e-231,   1.29074226e-231,   2.26342764e-314,
          2.26344561e-314],
       [  2.26344564e-314,   2.26344567e-314,   2.26344570e-314,
          2.26344573e-314],
       [  2.26344577e-314,   2.26344580e-314,   2.24995959e-314,
          2.25779650e-314],
       [  1.08857507e+296,   3.01091341e+160,   4.10957136e-280,
          1.39076453e-308],
       [  2.32035850e+077,  -2.68679158e+154,   1.28822975e-231,
          1.39067116e-308]])

The values in the empty array correspond to the existing values in the memory location that was used.

Create an array full of `1` values:

In [12]:
np.ones((5,4))

array([[ 1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.]])

# Specify the datatype

In [13]:
np.ones((5,4), dtype=np.int)

array([[1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1]])

The default data type is float, but you can specify otherwise.

# Generating random numbers

In [14]:
np.random.random((5,4))

array([[ 0.99857682,  0.17940637,  0.45215181,  0.65287742],
       [ 0.12650037,  0.29054806,  0.50778198,  0.3524189 ],
       [ 0.66895811,  0.08964705,  0.16717973,  0.69736636],
       [ 0.54158125,  0.92452891,  0.76086528,  0.60416841],
       [ 0.02127448,  0.40593614,  0.57739653,  0.89671024]])

You can also use `numpy.random.rand`, `numpy.random.normal` (for a normal distribution), and `numpy.random.randint` for a uniform distribution of integers.

Using `numpy.random.rand` is a bit easier but is less idiomatic:

In [15]:
np.random.rand(5,4)

array([[ 0.65952058,  0.91834662,  0.20939698,  0.72617621],
       [ 0.74886541,  0.42923731,  0.47609648,  0.2643534 ],
       [ 0.53285196,  0.58383228,  0.65338245,  0.37240268],
       [ 0.10775107,  0.24550998,  0.74152413,  0.94053143],
       [ 0.51448556,  0.43391174,  0.89436016,  0.12849079]])

Using a Gaussian (normal) sample results in a distribution of items with a mean of 0 and a standard deviation of 1:

In [16]:
np.random.normal(size=(2,3))

array([[ 0.45616462,  0.25414094, -1.58868612],
       [-2.27483168,  0.26477947, -0.76101257]])

You can also specify a mean and standard deviation:

In [17]:
np.random.normal(50, 25, size=(2,3))

array([[  21.91022228,   57.89123249,   30.50920529],
       [  39.5342393 ,   13.87150304,  118.78401568]])

You can use `np.random.randint` to generate random integers between ranges:

In [18]:
print np.random.randint(10) # assumed to be 0,10
print np.random.randint(0, 10)
print np.random.randint(0, 10, size=5) # 1D array of 5 integers b/n 0, 10
print np.random.randint(0, 10, size=(2,3)) # a 2x3 array of random integers b/n 0, 10

2
0
[2 8 4 5 6]
[[7 7 1]
 [8 1 7]]


# Array attributes

`shape` will give you the "shape" (number of rows and columns) of an array:

In [19]:
a = np.ones((3,5))

In [20]:
print a.shape
print a.shape[0] # number of rows
print a.shape[1] # number of columns
print len(a.shape) # number of dimensions

(3, 5)
3
5
2


`size` will give you the total number of elements:

In [21]:
print a.size

15


`dtype` will tell you the data type of the elements in the array:

In [22]:
print a.dtype

float64


# Operations on ndarrays

In [23]:
np.random.seed(693)
a = np.random.randint(0, 10, size=(5,4))
print 'Array: \n', a

Array: 
[[2 0 5 1]
 [1 3 4 4]
 [9 2 9 1]
 [9 3 7 5]
 [4 7 0 3]]


Sum all the elements of an array:

In [24]:
a.sum()

79

Summing columns and rows:

In [25]:
print a.sum(axis=0) # column
print a.sum(axis=1) # row

[25 15 25 14]
[ 8 12 21 24 14]


Min, max, and mean:

In [26]:
print a.min(axis=0) # min for each column
print a.max(axis=1) # max for each row
print a.mean() # Mean for all elements

[1 0 0 1]
[5 4 9 9 7]
3.95


# Locate Maximum Value

Write the `get_max_index` function to return the index of the maximum value of a 1D array

In [27]:
def get_max_index(a):
    """Return the index of the maximum value in given 1D array."""
    return a.argmax()

def test_run():
    a = np.array([9, 6, 2, 3, 12, 14, 7, 10], dtype=np.int32)  # 32-bit integer array
    print "Array:", a
    
    # Find the maximum and its index in array
    print "Maximum value:", a.max()
    print "Index of max.:", get_max_index(a)

test_run()

Array: [ 9  6  2  3 12 14  7 10]
Maximum value: 14
Index of max.: 5


# Accessing array elements

In [31]:
a = np.random.rand(5, 4)
print a

[[ 0.56136226  0.80309824  0.02816901  0.34656847]
 [ 0.52476857  0.7780763   0.2852825   0.09296586]
 [ 0.18428266  0.27587949  0.01893441  0.96264274]
 [ 0.44672924  0.05898629  0.11829386  0.45481583]
 [ 0.41412521  0.214972    0.72249968  0.61072672]]


Access element in the fourth row, third column:

In [34]:
print a[3,2]

0.118293862338


Slicing an array:

In [36]:
print a[0, 1:3] # In the zeroth (first) row, get elements in the second and third column

[ 0.80309824  0.02816901]


Get the top-left corner of the array:

In [37]:
print a[0:2, 0:2]

[[ 0.56136226  0.80309824]
 [ 0.52476857  0.7780763 ]]


Specifying the steps when slicing:

In [39]:
print a[:, 0:3:2] # Selects columns 0, 2 for each row

[[ 0.56136226  0.02816901]
 [ 0.52476857  0.2852825 ]
 [ 0.18428266  0.01893441]
 [ 0.44672924  0.11829386]
 [ 0.41412521  0.72249968]]


# Modifying array elements

In [44]:
np.random.seed(693)
a = np.random.rand(5, 4)
print a

[[ 0.86356288  0.73081098  0.7148832   0.30453533]
 [ 0.30360608  0.950046    0.13393385  0.75487025]
 [ 0.01771487  0.63701907  0.77232948  0.54253067]
 [ 0.00875843  0.36469785  0.45897155  0.88777042]
 [ 0.89703738  0.06210626  0.20591271  0.75672645]]


Assign a single value:

In [46]:
print a[0,0]
a[0,0] = 1
print a[0,0]

0.863562875791
1.0


Assign a single value to an entire row:

In [48]:
print a[0,:]
a[0,:] = 2
print a[0,:]

[ 1.          0.73081098  0.7148832   0.30453533]
[ 2.  2.  2.  2.]


Assign a single value to an entire column:

In [51]:
print a[:,3]
a[:,3] = 3
print a

[ 3.  3.  3.  3.  3.]
[[ 2.          2.          2.          3.        ]
 [ 0.30360608  0.950046    0.13393385  3.        ]
 [ 0.01771487  0.63701907  0.77232948  3.        ]
 [ 0.00875843  0.36469785  0.45897155  3.        ]
 [ 0.89703738  0.06210626  0.20591271  3.        ]]


# Indexing an array with another array

In [52]:
np.random.seed(693)
a = np.random.rand(5)
print a

[ 0.86356288  0.73081098  0.7148832   0.30453533  0.30360608]


In [53]:
indicies = np.array([1,1,2,3])

In [54]:
print a[indicies]

[ 0.73081098  0.73081098  0.7148832   0.30453533]


# Boolean or "mask" index arrays

In [55]:
a = np.array([(10,20,30,40), (40,30,20,10)])

In [57]:
print a[a<25]

[10 20 20 10]


Replacing values with masking:

In [59]:
a[a<25] = 99
print a

[[99 99 30 40]
 [40 30 99 99]]


# Arithmetic operations

In [60]:
a = np.array([(1,2,3,4,5), (10,20,30,40,50)])
print a

[[ 1  2  3  4  5]
 [10 20 30 40 50]]


Multiply each element by 2:

In [61]:
a * 2

array([[  2,   4,   6,   8,  10],
       [ 20,  40,  60,  80, 100]])

Divide each element by 2:

In [63]:
a / 2.0

array([[  0.5,   1. ,   1.5,   2. ,   2.5],
       [  5. ,  10. ,  15. ,  20. ,  25. ]])

Adding arrays:

In [64]:
b = np.array([(100,200,300,400,500), (1,2,3,4,5)])

In [65]:
a + b

array([[101, 202, 303, 404, 505],
       [ 11,  22,  33,  44,  55]])

Multiplying two arrays:

In [66]:
np.array([1,2,3]) * np.array([10,20,30])

array([10, 40, 90])

Note that this is element-wise multiplication, not matrix!

You use the `.dot()` method for matrix multiplication:

In [67]:
a = np.array([1,2,3])
b = np.array([10,20,30])
np.dot(a,b)

140