In [1]:
import numpy as np

# 4.1 NumPy ndarray

In [6]:
# Generate some random data
data = np.random.randn(2,3) # (2,3) means two rows and three columns
data

array([[ 0.28331822,  0.48402203, -0.06393786],
       [-0.77634853,  0.07630232,  1.01655952]])

In [7]:
data *10

array([[ 2.83318223,  4.84022034, -0.63937863],
       [-7.76348532,  0.76302317, 10.1655952 ]])

In [8]:
data + data

array([[ 0.56663645,  0.96804407, -0.12787573],
       [-1.55269706,  0.15260463,  2.03311904]])

In [9]:
data.shape

(2, 3)

In [10]:
data.dtype

dtype('float64')

## Creating ndarrays

In [11]:
# Use array function
data1 = [3,5,6.8,5,3]
arr1 = np.array(data1)
arr1

array([3. , 5. , 6.8, 5. , 3. ])

In [12]:
arr1.ndim

1

In [13]:
data2 = [[1,2,3,4],[5,6,7,8]] # It is a list of lists
arr2 = np.array(data2)
arr2

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [14]:
arr2.ndim

2

In [15]:
arr2.shape

(2, 4)

In [16]:
arr2.dtype

dtype('int32')

In [17]:
np.zeros((3,5))

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [18]:
np.empty((3,4,2))

array([[[1.07624902e-311, 7.75683064e-322],
        [0.00000000e+000, 0.00000000e+000],
        [1.89146896e-307, 1.15998412e-028],
        [2.44171989e+232, 8.00801729e+159]],

       [[8.16124052e-153, 5.05462580e-038],
        [2.40461215e-057, 8.98332509e-096],
        [3.70003357e-033, 4.07356338e+223],
        [8.94213159e+130, 3.06061314e-057]],

       [[3.24245676e-086, 7.12020992e-067],
        [1.17606470e-047, 1.95360835e-109],
        [2.86752281e+161, 2.78225500e+296],
        [9.80058441e+252, 1.23971686e+224]]])

### arange: an array-valued version of the range function

In [19]:
np.arange(16) # from 0 to 15 (including 15)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [20]:
# array of strings to numbers
# use the astype() function to convert
numeric_strings = np.array(['1.25', '-9.6', '42'], dtype=np.string_)
numeric_strings.astype(float)

array([ 1.25, -9.6 , 42.  ])

## Arithmetic with NumPy Arrays


In [21]:
# +, - , *, /, **, >, <, == and so on can be applied to equal-size arrays element-wise
arr1 ** arr1

array([2.70000000e+01, 3.12500000e+03, 4.58205826e+05, 3.12500000e+03,
       2.70000000e+01])

## Basic Indexing and Slicing

In [22]:
arr = np.arange(12)
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [23]:
# Choose one number arr[]
arr[11]

11

In [24]:
arr[4:6] # arr[start:end] from start to end-1

array([4, 5])

In [25]:
arr[4:6] = 100
arr

array([  0,   1,   2,   3, 100, 100,   6,   7,   8,   9,  10,  11])

In [26]:
arr_slice = arr[4:6] 
# This is part of arr, if you change the value here, the values in arr wiil also change
arr_slice[:] = 100000 
# [:] choose all the values
arr

array([     0,      1,      2,      3, 100000, 100000,      6,      7,
            8,      9,     10,     11])

In [27]:
arr2d = np.array([[1,2,3],[4,5,6],[7,8,9]])
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [28]:
arr2d[1, 2]

6

In [29]:
arr3d = np.array([[[1,2,3],[4,5,6]],[[7,8,9],[100,11,12]]])
arr3d

array([[[  1,   2,   3],
        [  4,   5,   6]],

       [[  7,   8,   9],
        [100,  11,  12]]])

In [30]:
arr3d.shape

(2, 2, 3)

In [31]:
arr3d[1]

array([[  7,   8,   9],
       [100,  11,  12]])

In [32]:
old_values = arr3d[1].copy()

In [33]:
arr3d[1] = 999
arr3d

array([[[  1,   2,   3],
        [  4,   5,   6]],

       [[999, 999, 999],
        [999, 999, 999]]])

In [34]:
old_values

array([[  7,   8,   9],
       [100,  11,  12]])

## Indexing with slicess


In [35]:
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [36]:
arr2d[:2]

array([[1, 2, 3],
       [4, 5, 6]])

In [37]:
arr2d[:2,1:]

array([[2, 3],
       [5, 6]])

In [38]:
arr2d[:,:1]

array([[1],
       [4],
       [7]])

In [39]:
arr2d[:2,2]

array([3, 6])

## Boolean Indexing


In [40]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
names == 'Bob'

array([ True, False, False,  True, False, False, False])

In [41]:
"""
The names =='Bob' equals to True for the first and fourth rows
It can be used to index a data metric
"""

data = np.random.randn(7,4)
data

array([[-1.02290261, -0.56803928,  1.24805792, -1.09479222],
       [ 0.58717653,  0.09159135, -0.94209083, -0.0810571 ],
       [-1.51590373, -0.46554619,  0.05884864, -0.01479947],
       [-0.65984055,  0.65806088, -0.73306268,  0.91861661],
       [-0.95565758,  0.3226248 , -0.73923581, -0.62165972],
       [-0.703043  , -1.06886204,  0.49173963, -0.81384139],
       [-1.60463055,  1.31669146, -0.4675409 ,  0.62784613]])

In [42]:
# Choose the 1st and 4th row of data
data[names == 'Bob']

array([[-1.02290261, -0.56803928,  1.24805792, -1.09479222],
       [-0.65984055,  0.65806088, -0.73306268,  0.91861661]])

In [43]:
# Choose the opposite cases: ~
data[~(names == 'Bob')]
# ~ is used to inverse a general condition

array([[ 0.58717653,  0.09159135, -0.94209083, -0.0810571 ],
       [-1.51590373, -0.46554619,  0.05884864, -0.01479947],
       [-0.95565758,  0.3226248 , -0.73923581, -0.62165972],
       [-0.703043  , -1.06886204,  0.49173963, -0.81384139],
       [-1.60463055,  1.31669146, -0.4675409 ,  0.62784613]])

In [44]:
data[data < 0] = 0
data

array([[0.        , 0.        , 1.24805792, 0.        ],
       [0.58717653, 0.09159135, 0.        , 0.        ],
       [0.        , 0.        , 0.05884864, 0.        ],
       [0.        , 0.65806088, 0.        , 0.91861661],
       [0.        , 0.3226248 , 0.        , 0.        ],
       [0.        , 0.        , 0.49173963, 0.        ],
       [0.        , 1.31669146, 0.        , 0.62784613]])

In [45]:
data[names != 'Joe'] = 7
data

array([[7.        , 7.        , 7.        , 7.        ],
       [0.58717653, 0.09159135, 0.        , 0.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [0.        , 0.        , 0.49173963, 0.        ],
       [0.        , 1.31669146, 0.        , 0.62784613]])

### Fancy Indexing


In [50]:
arr = np.empty((8, 4))
arr

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [51]:
for i in range(8):
    arr[i] = i
arr

array([[0., 0., 0., 0.],
       [1., 1., 1., 1.],
       [2., 2., 2., 2.],
       [3., 3., 3., 3.],
       [4., 4., 4., 4.],
       [5., 5., 5., 5.],
       [6., 6., 6., 6.],
       [7., 7., 7., 7.]])

In [52]:
# To choose some desired rows: pass a list or ndarray of integers specifying the desired orders
arr[[4,3,0,6]]

array([[4., 4., 4., 4.],
       [3., 3., 3., 3.],
       [0., 0., 0., 0.],
       [6., 6., 6., 6.]])

In [53]:
# Use negative value to select from the end: the end starts from 1 not 0
arr[[-3, -5, -7]]

array([[5., 5., 5., 5.],
       [3., 3., 3., 3.],
       [1., 1., 1., 1.]])