#### Numpy, short for numerical python, is one of the most important foundational packages for numerical computing

# 4.1 NumPy ndarray

In [95]:
import numpy as np

In [96]:
# Generate some random data
data = np.random.randn(2,3) # (2,3) means two rows and three columns
data

TypeError: 'tuple' object is not callable

In [4]:
data *10

array([[  1.27961081,  11.58513172,   5.21049437],
       [  4.43710786,   7.37597762, -12.2270696 ]])

In [5]:
data + data

array([[ 0.25592216,  2.31702634,  1.04209887],
       [ 0.88742157,  1.47519552, -2.44541392]])

In [6]:
data.shape

(2, 3)

In [7]:
data.dtype

dtype('float64')

## Creating ndarrays

In [8]:
# Use array function
data1 = [3,5,6.8,5,3]
arr1 = np.array(data1)
arr1

array([3. , 5. , 6.8, 5. , 3. ])

In [12]:
arr1.ndim

1

In [10]:
data2 = [[1,2,3,4],[5,6,7,8]] # It is a list of lists
arr2 = np.array(data2)
arr2

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [13]:
arr2.ndim

2

In [14]:
arr2.shape

(2, 4)

In [15]:
arr2.dtype

dtype('int32')

In [16]:
np.zeros((3,5))

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [94]:
np.empty((3,4,2))

TypeError: 'tuple' object is not callable

### arange: an array-valued version of the range function

In [23]:
np.arange(16) # from 0 to 15 (including 15)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [24]:
np.eye((3))

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

### Data Types for ndarrays

In [26]:
arr1 = np.array([1,2,3], dtype = np.float64)
arr1

array([1., 2., 3.])

In [27]:
# array of strings to numbers
# use the astype() function to convert
numeric_strings = np.array(['1.25', '-9.6', '42'], dtype=np.string_)
numeric_strings.astype(float)

array([ 1.25, -9.6 , 42.  ])

## Arithmetic with NumPy Arrays

In [29]:
# +, - , *, /, **, >, <, == and so on can be applied to equal-size arrays element-wise
arr1 ** arr1

array([ 1.,  4., 27.])

## Basic Indexing and Slicing

In [32]:
arr = np.arange(12)
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [36]:
# Choose one number arr[]
arr[11]

11

In [37]:
arr[4:6] # arr[start:end] from start to end-1

array([4, 5])

In [38]:
arr[4:6] = 100
arr

array([  0,   1,   2,   3, 100, 100,   6,   7,   8,   9,  10,  11])

In [40]:
arr_slice = arr[4:6] 
# This is part of arr, if you change the value here, the values in arr wiil also change
arr_slice[:] = 100000 
# [:] choose all the values
arr

array([     0,      1,      2,      3, 100000, 100000,      6,      7,
            8,      9,     10,     11])

In [52]:
arr2d = np.array([[1,2,3],[4,5,6],[7,8,9]])
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [55]:
arr2d[1, 2]

6

In [56]:
arr3d = np.array([[[1,2,3],[4,5,6]],[[7,8,9],[100,11,12]]])
arr3d

array([[[  1,   2,   3],
        [  4,   5,   6]],

       [[  7,   8,   9],
        [100,  11,  12]]])

In [58]:
arr3d.shape

(2, 2, 3)

In [59]:
arr3d[1]

array([[  7,   8,   9],
       [100,  11,  12]])

In [63]:
old_values = arr3d[1].copy()

In [64]:
arr3d[1] = 999
arr3d

array([[[  1,   2,   3],
        [  4,   5,   6]],

       [[999, 999, 999],
        [999, 999, 999]]])

In [66]:
old_values

array([[  7,   8,   9],
       [100,  11,  12]])

## Indexing with slicess

In [68]:
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [67]:
arr2d[:2]

array([[1, 2, 3],
       [4, 5, 6]])

In [69]:
arr2d[:2,1:]

array([[2, 3],
       [5, 6]])

In [70]:
arr2d[:,:1]

array([[1],
       [4],
       [7]])

In [71]:
arr2d[:2,2]

array([3, 6])

## Boolean Indexing

In [72]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
names == 'Bob'

array([ True, False, False,  True, False, False, False])

In [78]:
"""
The names =='Bob' equals to True for the first and fourth rows
It can be used to index a data metric
"""

data = np.random.randn(7,4)
data

array([[-0.52641381,  0.20299011, -0.82362558,  0.94668144],
       [-1.33877867,  1.26786851,  1.08057015, -1.0703389 ],
       [ 0.68202425, -0.27730925,  1.77073835, -0.30618402],
       [-0.32302046,  1.52869117,  0.15907913,  1.22329694],
       [-0.38498727,  0.75927823,  0.53697911, -0.6192008 ],
       [ 0.48950262, -0.32951976, -1.01037181, -2.1152899 ],
       [-0.0754128 ,  2.21130913, -0.59682723,  0.78546624]])

In [79]:
# Choose the 1st and 4th row of data
data[names == 'Bob']

array([[-0.52641381,  0.20299011, -0.82362558,  0.94668144],
       [-0.32302046,  1.52869117,  0.15907913,  1.22329694]])

In [80]:
# Choose the opposite cases: ~
data[~(names == 'Bob')]
# ~ is used to inverse a general condition

array([[-1.33877867,  1.26786851,  1.08057015, -1.0703389 ],
       [ 0.68202425, -0.27730925,  1.77073835, -0.30618402],
       [-0.38498727,  0.75927823,  0.53697911, -0.6192008 ],
       [ 0.48950262, -0.32951976, -1.01037181, -2.1152899 ],
       [-0.0754128 ,  2.21130913, -0.59682723,  0.78546624]])

In [83]:
data[data < 0] = 0
data

array([[0.        , 0.20299011, 0.        , 0.94668144],
       [0.        , 1.26786851, 1.08057015, 0.        ],
       [0.68202425, 0.        , 1.77073835, 0.        ],
       [0.        , 1.52869117, 0.15907913, 1.22329694],
       [0.        , 0.75927823, 0.53697911, 0.        ],
       [0.48950262, 0.        , 0.        , 0.        ],
       [0.        , 2.21130913, 0.        , 0.78546624]])

In [84]:
data[names != 'Joe'] = 7
data

array([[7.        , 7.        , 7.        , 7.        ],
       [0.        , 1.26786851, 1.08057015, 0.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [0.48950262, 0.        , 0.        , 0.        ],
       [0.        , 2.21130913, 0.        , 0.78546624]])

### Fancy Indexing

In [93]:
arr = np.empty((7, 4))
arr

TypeError: 'tuple' object is not callable