In [2]:
# Numpy provides:
## ndarray - fast and space-efficient multidimensional array providing vectorized arithmetic operations
##    and sophisticated broadcasting capabilities
## standard mathematical functions for fast operations on entire arrays of data without having to write loops
## tools for reading / writing array data to disk and working with memory-mapped files
## linear algebra, random number generation, and fourier transform capabilities
## tools for integrating code written in C, C++, and Fortran

In [22]:
# ndarray - n-dimensional arrays
import random
import numpy as np
# create a random array between 0 and 1, with 2 row of 3 elements
data = np.random.uniform(0,1,[2,3])
data

array([[ 0.36266433,  0.57161027,  0.31522562],
       [ 0.2206973 ,  0.52578877,  0.96649052]])

In [27]:
data * 10

array([[ 3.62664326,  5.71610275,  3.15225624],
       [ 2.20697302,  5.25788767,  9.66490518]])

In [28]:
data + data

array([[ 0.72532865,  1.14322055,  0.63045125],
       [ 0.4413946 ,  1.05157753,  1.93298104]])

In [29]:
# array is generic multidimensional container for homogeneous data
# every array has a 
## shape - a tuple indicating the size of each dimension
## dtype - an object describing the data type of the arraydata.shape

(2, 3)

In [31]:
data.shape

(2, 3)

In [32]:
data.dtype

dtype('float64')

In [33]:
# create arrays using the array function
# takes any sequence-like object (including other arrays)
# produces a new NumPy array containing the passed data
## try it on lists
data1 = [6, 7.5, 8, 0, 1]

In [34]:
arr1 = np.array(data1)

In [35]:
arr1

array([ 6. ,  7.5,  8. ,  0. ,  1. ])

In [36]:
# nested sequences are converted into multidimensional arrays
data2 = [[1, 2, 3, 4], [5, 6, 7, 8]]

In [37]:
arr2 = np.array(data2)

In [38]:
arr2

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [39]:
# ndim tells you how many dimensions there are
arr2.ndim

2

In [40]:
arr2.shape

(2, 4)

In [41]:
arr2.dtype

dtype('int64')

In [42]:
# other functions also create arrays
## zeros
## ones
## empty
## pass an index or a tuple
np.zeros(10)

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])

In [44]:
np.zeros((3, 6))

array([[ 0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.]])

In [48]:
# sometimes np.empty passes zeros, sometimes it just passes garbage values
np.empty((2, 3, 2))

array([[[ 0.,  0.],
        [ 0.,  0.],
        [ 0.,  0.]],

       [[ 0.,  0.],
        [ 0.,  0.],
        [ 0.,  0.]]])

In [50]:
# arange is an array-valued version of the built-in Python range function
np.arange(15)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [51]:
# standard array creation functions include:
## array             - convert input data to an ndarray
## asarray           - convert input to ndarray, but don't copy if the input is already an ndarray
## arange            - like built-in range but returns an array instead of a list
## ones, ones_like   - takes another array and produces a ones array of the same shape and dtype
## zeros, zeros_like - like ones and ones_like, but producing arrays of 0's instead
## empty, empty_like - create new arrays by allocating new memory, but don't populate with values
## eye, identity     - create a square N x N identity matrix (1's on the diagonal, 0's elsewhere)

In [53]:
# data types for arrays
arr1 = np.array([1, 2, 3], dtype = np.float64)
arr2 = np.array([1, 2, 3], dtype = np.int32)
arr1.dtype

dtype('float64')

In [54]:
arr2.dtype

dtype('int32')

In [55]:
# dtypes have a type name, and a number indicating the number of bits per element
## standard double-precision floating point values take up 8 bytes / 64 bits

In [None]:
# general data types:
## float   - floating point
## complex - complex
## int     - integer
## bool    - boolean
## string_ - string
## object  - object

In [56]:
# cast arrays using astype
arr = np.array([1, 2, 3, 4, 5])
arr.dtype

dtype('int64')

In [58]:
# cast arrays using astype
float_arr = arr.astype(np.float64)
float_arr

array([ 1.,  2.,  3.,  4.,  5.])

In [59]:
# when casting floating point to integer, decimals are truncated
arr = np.array([3.7, -1.2, -2.6, 0.5, 12.9, 10.1])
arr

array([  3.7,  -1.2,  -2.6,   0.5,  12.9,  10.1])

In [60]:
arr.astype(np.int32)

array([ 3, -1, -2,  0, 12, 10], dtype=int32)

In [63]:
# can do this with strings too
numeric_strings = np.array(['1.25', '-9.6', '42'], dtype = np.string_)
numeric_strings.astype(float)

array([  1.25,  -9.6 ,  42.  ])

In [64]:
numeric_strings.dtype

dtype('S4')

In [65]:
numeric_strings.shape

(3,)

In [67]:
# you can also use another array's dtype
int_array = np.arange(10)
calibers = np.array([.22, .270, .357, .380, .44, .50], dtype = np.float64)
int_array.astype(calibers.dtype)

array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.])

In [None]:
# astype always creates a new array, even if the new dtype is the same as the old dtype

In [68]:
# operations between arrays and scalars
# you can vectorize arrays
# arithmetic operations between equal-size arrays applies the operation element-wise
arr = np.array([[1., 2., 3.], [4., 5., 6.]])
arr

array([[ 1.,  2.,  3.],
       [ 4.,  5.,  6.]])

In [69]:
arr * arr

array([[  1.,   4.,   9.],
       [ 16.,  25.,  36.]])

In [70]:
arr - arr

array([[ 0.,  0.,  0.],
       [ 0.,  0.,  0.]])

In [71]:
# arrays with scalars operate as you would expect
1 / arr

array([[ 1.        ,  0.5       ,  0.33333333],
       [ 0.25      ,  0.2       ,  0.16666667]])

In [72]:
arr ** 0.5

array([[ 1.        ,  1.41421356,  1.73205081],
       [ 2.        ,  2.23606798,  2.44948974]])

In [78]:
# operations between differently-sized arrays is called broadcasting - discussed in Chapter 12.
# indexing works similarly to lists
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [79]:
arr[5]

5

In [80]:
arr[5:8]

array([5, 6, 7])

In [81]:
arr[5:8] = 12

In [82]:
arr

array([ 0,  1,  2,  3,  4, 12, 12, 12,  8,  9])

In [83]:
# array slices are views on the original array, so if you change the slice, you change the array
arr_slice = arr[5:8]
arr_slice[1] = 12345
arr

array([    0,     1,     2,     3,     4,    12, 12345,    12,     8,     9])

In [85]:
arr_slice[:] = 64
arr

array([ 0,  1,  2,  3,  4, 64, 64, 64,  8,  9])

In [86]:
# to copy an array slice, you need to explicitly copy it
arr[5:8].copy()

array([64, 64, 64])

In [89]:
# higher-dimensional arrays, you have many more options
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

In [90]:
arr2d[2]

array([7, 8, 9])

In [91]:
arr2d[0]

array([1, 2, 3])

In [92]:
# access individual elements multiple ways
arr2d[0][2]

3

In [93]:
arr2d[0, 2]

3

In [95]:
# in multidimensional arrays, if you omit later indices, the returned object will be a 
## lower-dimensional ndarray consisting of all the data along the higher dimensions.
## so in the 2 x 2 x 3 array arr3d
arr3d = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])

In [96]:
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [98]:
arr3d[0]

array([[1, 2, 3],
       [4, 5, 6]])

In [99]:
arr3d[1]

array([[ 7,  8,  9],
       [10, 11, 12]])

In [100]:
# both scalars and arrays can be assigned to arr3d[0]
old_values = arr3d[0].copy()
arr3d[0] = 42

In [101]:
arr3d

array([[[42, 42, 42],
        [42, 42, 42]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [102]:
arr3d[0] = old_values
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])