# Numpy

Numpy is similar to python list, but it only allows a single data type, which makes it efficient to do calculations



## Initialize

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Creating Numpy Arrays

In [7]:
# Create a three-dimension (aka tensor) arrays with 0 for each cell
a = np.zeros((5,3,4))

# Create a one dimension array with value from 1 to 10
b = np.arange(1, 11)

# Create a two-dimension aray (aka matrix) with random value beween 0 and 1
c = np.random.random((2,4))


## Flatten & Reshape


In [13]:
# np.ndarray objects has attribute .shape
a.shape

# np.ndarray.flatten() collapse multi-dimension arrays into an one dimension array
a_flat = a.flatten()

# np.ndarray.reshape() changes the shape of np.ndarray objects
a_reshaped = a.reshape((5,3,4))
# a == a_reshaped



NameError: name 'product' is not defined

## Data type



In [15]:
# dtype forces the conversion of data type
float32_array = np.arange(0,11,dtype=np.float32)
float32_array

array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.],
      dtype=float32)

In [16]:
# np.ndarray objects has attribute .dtype
float32_array.dtype

dtype('float32')

In [18]:
int32_array = float32_array.astype(np.int32)
int32_array

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10], dtype=int32)

## Indexing & Slicing & Sorting

Similar to list indexing and slicing

In [52]:
np.random.seed(1)
d = np.random.randint(100, size=20).reshape((4,5))
d

array([[37, 12, 72,  9, 75],
       [ 5, 79, 64, 16,  1],
       [76, 71,  6, 25, 50],
       [20, 18, 84, 11, 28]])

In [53]:
# np.sort() by default sort by last axis: in a 2D array, the last axis is column, denoted by axis=1
np.sort(d)

array([[ 9, 12, 37, 72, 75],
       [ 1,  5, 16, 64, 79],
       [ 6, 25, 50, 71, 76],
       [11, 18, 20, 28, 84]])

In [54]:
# Can designate a specific axis
np.sort(d, axis=0)

array([[ 5, 12,  6,  9,  1],
       [20, 18, 64, 11, 28],
       [37, 71, 72, 16, 50],
       [76, 79, 84, 25, 75]])

## Filtering


### Filter by mask

Create a boolean array by checking if each element matches criterion

In [71]:
mask = d.flatten()>50
print(f"mask:\n {mask}")
print(f'filtered:\n{d.flatten()[mask]}')

mask:
 [False False  True False  True False  True  True False False  True  True
 False False False False False  True False False]
filtered:
[72 75 79 64 76 71 84]


### Filter by np.where()

Returns array of indeces

In [72]:
# np.where(d.flatten()>70)
np.where(d>70)

# Result represents row number array and col number array
# For example, d[0,2], d[0,4], d[1,1], ...

(array([0, 0, 1, 2, 2, 3]), array([2, 4, 1, 0, 1, 2]))

In [73]:
# np.where() can also be used to find and replace

# FOr example, find all value larger than 70 and replace with 99
np.where(d>70, 99, d)

array([[37, 12, 99,  9, 99],
       [ 5, 99, 64, 16,  1],
       [99, 99,  6, 25, 50],
       [20, 18, 99, 11, 28]])

## Adding & Removing


In [None]:
# np.concatenate((array1, array2), axis=)

In [None]:
# np.delete(array, index=, axis=)

## Summarizing Data


In [None]:
# Sum all value; Result is a int value
# array.sum()

# Sum each single col; Result is a single dimension array
# array.sum(axis=0)

# Sum each single row; Result is a single dimension array
# array.sum(axis=1)

# Can keep the original dimension by setting keepdims=True

In [74]:
d

array([[37, 12, 72,  9, 75],
       [ 5, 79, 64, 16,  1],
       [76, 71,  6, 25, 50],
       [20, 18, 84, 11, 28]])

In [75]:
d.sum(axis=0)

array([138, 180, 226,  61, 154])

## Vectorize



In [None]:
# Many functions takes an object as a whole, for example, len(array) would output the number of rows of array. If we want to get the len() of each element, we have to vectorize the function first by:
# np.vectorize(function)



## Broadcast

In [None]:
# Single number can be applied to all array

# Single-dimension array can be applied to array that has the same col length
# shape(3,) can be applied to shape(5,3)

# Two-dimension array can be applied to another two-dimension array when:
# 1. col length are the same & row length of either array is 1
# 2. col length of either array is 1 & row length are the same
# 3. Shapes are identical

## Saving & Loading

### Loading
```python
with open("npdata.npy", "rb") as f:
    array = np.load(f)
```

### Saving
```python
with open("npdata.npy", "wb") as f:
    np.save(f, array)
```

## Flip & Transpose

In [None]:
# np.flip(array, axis=)

# np.transpose(array, axes=())

## Split & Stack

In [None]:
# np.split(array, number_arrays, axis=)

# np.stack([array1, array2], axis=)