Learning from:


1. Python Data Science Handbook, 2016. Ch. 2




In [2]:
import numpy as np
from IPython.display import display
np.random.seed(0) # for reproduce


# Array attributes


* ndarray is a generic multidimentional container for homegeneous data.

```
.dtype
.shape   
.ndim
.size 
.nbytes
.itemsize
```
Generally: `nbytes = size * itemsize`.




# Create new arrays

* np.zeros() 
* np.ones((2,3))
* np.empty()
* np.random.randint(10, size=5)
* np.random.randint(10, size=(3,4))

# 1D Array Indexing and Slicing

```
x[0]          # First element
x[-1]         # Last element
x[-2]         # The next to last (counting from right
x[:3]         # Grab the first N(3) elements from LEFT 
x[:-2]        # Grab the last N(2) element from RIGHT 
x[3:-2]       # Drop first N(3) from LEFT and 
                 drop last M(2) elements from RIGHT 
x[::2]        # Every other elements 
```

# 2D Array Indexing and Slicing

```
x[:, 1]      # ask for all rows, and column 1 
x[:2, :3]    # first two rows, three columns
x[:3, ::2]   # first three rows, every other columns
x[:, 0]      # first column (all rows)
x[0, :]      # first row (all columns), which is the same as x[0]

```

# Array Reshape

* array can be reshaped.
* A dimension-less array `x = np.array([1, 2, 3])` can be reshaped.

    * `x.reshape((1,3))`
    * `x[np.newaxis, :]`
    * both will give you a row vector
    
* To reshape to column vector:

    * `x.reshape((3,1))`
    * `x[:, np.newaxis]`
    

   

# Sub-array is a view

This is one difference from native Python array/list. ndarray is a view, not a copy. That means, if you modify what is returned, you are changing the original.

# Array data type and casting

np defines a number of data types, such as 'int8', 'int16', 'int32', 'int64', 'float32', 'float64', etc.
you can also cast it from one type to another.

In [12]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [14]:
# here we assign a scalar value to a slice
arr[5:8] = 12
arr

array([ 0,  1,  2,  3,  4, 12, 12, 12,  8,  9])

In [15]:
# [:] bare slice will assign to all values in an array
arr[:] = 20
arr

array([20, 20, 20, 20, 20, 20, 20, 20, 20, 20])

In [16]:
arr_slice = arr[1:3]
arr_slice

array([20, 20])

In [17]:
# here we change the slice itself, and the changes get reflected to the orignal array
arr_slice[:] = 10
arr


array([20, 10, 10, 20, 20, 20, 20, 20, 20, 20])

# Array Concatenation and Split

* see hstack, vstack

In [15]:
import numpy as np
x = np.array([1,2,3])
y = np.array([[4,5,6],
            [7,8,9]])
y.shape

(2, 3)

In [16]:
np.vstack([x,y])

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [20]:
z = np.array([[99],
              [99]])

np.hstack([y,z])

array([[ 4,  5,  6, 99],
       [ 7,  8,  9, 99]])

In [18]:
x = np.arange(1,7).reshape(2,3)
x

array([[1, 2, 3],
       [4, 5, 6]])

In [10]:
np.concatenate([x,x])

array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])

In [11]:
np.concatenate([x,x], axis=1)

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

# Array of Boolean and Mask

In [21]:
import numpy as np
summer = (np.arange(365) - 172 < 90) & (np.arange(365) - 172 > 0)

In [48]:
points = np.arange(-5, 5)
points

array([-5, -4, -3, -2, -1,  0,  1,  2,  3,  4])

In [49]:
xs, ys = np.meshgrid(points, points)
xs

array([[-5, -4, -3, -2, -1,  0,  1,  2,  3,  4],
       [-5, -4, -3, -2, -1,  0,  1,  2,  3,  4],
       [-5, -4, -3, -2, -1,  0,  1,  2,  3,  4],
       [-5, -4, -3, -2, -1,  0,  1,  2,  3,  4],
       [-5, -4, -3, -2, -1,  0,  1,  2,  3,  4],
       [-5, -4, -3, -2, -1,  0,  1,  2,  3,  4],
       [-5, -4, -3, -2, -1,  0,  1,  2,  3,  4],
       [-5, -4, -3, -2, -1,  0,  1,  2,  3,  4],
       [-5, -4, -3, -2, -1,  0,  1,  2,  3,  4],
       [-5, -4, -3, -2, -1,  0,  1,  2,  3,  4]])

In [50]:
ys

array([[-5, -5, -5, -5, -5, -5, -5, -5, -5, -5],
       [-4, -4, -4, -4, -4, -4, -4, -4, -4, -4],
       [-3, -3, -3, -3, -3, -3, -3, -3, -3, -3],
       [-2, -2, -2, -2, -2, -2, -2, -2, -2, -2],
       [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 1,  1,  1,  1,  1,  1,  1,  1,  1,  1],
       [ 2,  2,  2,  2,  2,  2,  2,  2,  2,  2],
       [ 3,  3,  3,  3,  3,  3,  3,  3,  3,  3],
       [ 4,  4,  4,  4,  4,  4,  4,  4,  4,  4]])

In [51]:
xs**2

array([[25, 16,  9,  4,  1,  0,  1,  4,  9, 16],
       [25, 16,  9,  4,  1,  0,  1,  4,  9, 16],
       [25, 16,  9,  4,  1,  0,  1,  4,  9, 16],
       [25, 16,  9,  4,  1,  0,  1,  4,  9, 16],
       [25, 16,  9,  4,  1,  0,  1,  4,  9, 16],
       [25, 16,  9,  4,  1,  0,  1,  4,  9, 16],
       [25, 16,  9,  4,  1,  0,  1,  4,  9, 16],
       [25, 16,  9,  4,  1,  0,  1,  4,  9, 16],
       [25, 16,  9,  4,  1,  0,  1,  4,  9, 16],
       [25, 16,  9,  4,  1,  0,  1,  4,  9, 16]])

# Math and Stat Methods

In [53]:
arr = np.random.randn(5,4)
arr

array([[-0.68526153, -1.35694705, -1.71300452, -0.1088929 ],
       [-0.18976062, -0.5529742 ,  1.06027773,  0.28738873],
       [-1.98615195, -1.42883851, -2.01439224, -0.36615657],
       [ 1.05188858,  0.14833062,  0.35421616,  0.09991926],
       [ 0.95672598,  1.43709354, -0.6566138 ,  0.68523887]])

In [54]:
# compute mean over all columns (axis=1)
arr.mean(axis=1)

array([-0.9660265 ,  0.15123291, -1.44888482,  0.41358865,  0.60561115])

In [55]:
# compute mean over all rows (axis=0)
arr.mean(axis=0)

array([-0.17051191, -0.35066712, -0.59390333,  0.11949948])

# Rules of Broadcasting


* Rule 1: If two arrays differs in their number of dimensions, the shape of the one with fewer dimensions is **padded** with ones on its leading (left) side.

Note: padding is for dimension tuples.
```
x.shape = (2, 3)
y.shape = (3, )

y's dimension needs padding --> after padding on the left with 1, we have
y.newshape = (1, 3)
```

* Rule 2: If the shape of two arrays does not match in ANY dimension, the array with shape equal to 1 in that dimension is **stretched** to match the other shape.

Using example above:

```
Use rule #2, the first dimension disagrees, we will stretch 
y.newshape = (2, 3)
```

* Rule 3: If in any dimension the size disagree and neither is equal to 1, an error is raised.



In [24]:
x = np.ones((2,3))
y = np.arange(3)
print(x)
print(y)

[[1. 1. 1.]
 [1. 1. 1.]]
[0 1 2]


# pairwise diff

In [7]:
import numpy as np
X = np.random.random((5,3))

In [8]:
X

array([[0.65257242, 0.11152295, 0.62112462],
       [0.7828069 , 0.3480797 , 0.7832907 ],
       [0.40614279, 0.91045563, 0.58669911],
       [0.85975637, 0.58531391, 0.04023556],
       [0.98824861, 0.78657306, 0.16168186]])

In [12]:
X.shape

(5, 3)

In [11]:
X.reshape(5,1,3)

array([[[0.65257242, 0.11152295, 0.62112462]],

       [[0.7828069 , 0.3480797 , 0.7832907 ]],

       [[0.40614279, 0.91045563, 0.58669911]],

       [[0.85975637, 0.58531391, 0.04023556]],

       [[0.98824861, 0.78657306, 0.16168186]]])

In [14]:
diff = X.reshape(5,1,3) - X
diff.shape

(5, 5, 3)

In [15]:
diff

array([[[ 0.        ,  0.        ,  0.        ],
        [-0.13023448, -0.23655675, -0.16216609],
        [ 0.24642962, -0.79893268,  0.03442551],
        [-0.20718395, -0.47379096,  0.58088906],
        [-0.33567619, -0.67505011,  0.45944276]],

       [[ 0.13023448,  0.23655675,  0.16216609],
        [ 0.        ,  0.        ,  0.        ],
        [ 0.37666411, -0.56237593,  0.1965916 ],
        [-0.07694947, -0.23723421,  0.74305514],
        [-0.20544171, -0.43849336,  0.62160884]],

       [[-0.24642962,  0.79893268, -0.03442551],
        [-0.37666411,  0.56237593, -0.1965916 ],
        [ 0.        ,  0.        ,  0.        ],
        [-0.45361357,  0.32514173,  0.54646355],
        [-0.58210582,  0.12388257,  0.42501725]],

       [[ 0.20718395,  0.47379096, -0.58088906],
        [ 0.07694947,  0.23723421, -0.74305514],
        [ 0.45361357, -0.32514173, -0.54646355],
        [ 0.        ,  0.        ,  0.        ],
        [-0.12849224, -0.20125916, -0.1214463 ]],

       [[ 0.