<a href="https://colab.research.google.com/github/kbehrman/foundational-python-for-data-science/blob/main/Chapter_07_Numpy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# NumPy

[numpy.org](https://numpy.org/)

[Documentation](https://numpy.org/doc/stable/contents.html)

[wikipedia](https://en.wikipedia.org/wiki/NumPy)


## Installation
If you are using Google Colab, numpy is already available in your environment, just import it. If you are working locally you may need to install it the package. There are many ways to install NumPy, depending on your system and Python installation, see [Installation](https://scipy.org/install.html). But the simplest way is usually to use pip.

```bash
pip install numpy
```

In [2]:
import numpy as np

## ndarray vs standard library sequences

- fixed size
- must contain one type (with exception of objects, will have be same size)
- designed to perform operations on large amounts of data, generally more efficiently and simply than standard library
- large number of scientific libraries use ndarrys as input and output

## Creating an Array

In [2]:
np.array([1,2,3])

array([1, 2, 3])

In [3]:
np.zeros(3)

array([0., 0., 0.])

In [4]:
np.ones(3)

array([1., 1., 1.])

In [5]:
np.empty(3)

array([1., 1., 1.])

In [6]:
np.arange(3)

array([0, 1, 2])

In [7]:
np.arange(0, 12, 3)

array([0, 3, 6, 9])

In [8]:
np.linspace(0, 21, 7)

array([ 0. ,  3.5,  7. , 10.5, 14. , 17.5, 21. ])

In [9]:
oned = np.arange(21)
oned

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20])

In [10]:
oned.dtype

dtype('int32')

In [11]:
oned.size

21

In [12]:
oned.nbytes

84

## N-Dimensions

In [13]:
type(oned)

numpy.ndarray

In [14]:
oned.shape

(21,)

In [15]:
oned.ndim

1

In [16]:
list_o_lists = [[1,2,3], [4,5,6], [7,8,9]]
twod = np.array(list_o_lists)
twod

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [17]:
twod.shape

(3, 3)

In [18]:
twod.ndim

2

### Reshape

In [19]:
oned = np.arange(12)
oned

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [20]:
twod = oned.reshape(3,4)
twod

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [21]:
twod.reshape(12)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [22]:
twod.reshape(2,2,3)

array([[[ 0,  1,  2],
        [ 3,  4,  5]],

       [[ 6,  7,  8],
        [ 9, 10, 11]]])

In [23]:
twod.reshape(2,3)

ValueError: cannot reshape array of size 12 into shape (2,3)

In [None]:
np.ones(12).reshape(2,3,2)

### Slicing

In [24]:
oned = np.arange(21)
oned

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20])

In [25]:
oned[3]

3

In [26]:
oned[-1]

20

In [27]:
oned[3:9]

array([3, 4, 5, 6, 7, 8])

In [5]:
twod = np.arange(21).reshape(3,7)
twod

array([[ 0,  1,  2,  3,  4,  5,  6],
       [ 7,  8,  9, 10, 11, 12, 13],
       [14, 15, 16, 17, 18, 19, 20]])

In [6]:
twod[2]

array([14, 15, 16, 17, 18, 19, 20])

In [None]:
twod[2, 3]

In [None]:
twod[0:2]

In [None]:
twod[:, 3]

In [None]:
twod[0:2,-3:]

In [7]:
twod = np.arange(21).reshape(3,7)
twod

array([[ 0,  1,  2,  3,  4,  5,  6],
       [ 7,  8,  9, 10, 11, 12, 13],
       [14, 15, 16, 17, 18, 19, 20]])

In [8]:
twod[0,0] = 33
twod

array([[33,  1,  2,  3,  4,  5,  6],
       [ 7,  8,  9, 10, 11, 12, 13],
       [14, 15, 16, 17, 18, 19, 20]])

In [9]:
twod[1:,:3] = 0
twod

array([[33,  1,  2,  3,  4,  5,  6],
       [ 0,  0,  0, 10, 11, 12, 13],
       [ 0,  0,  0, 17, 18, 19, 20]])

## Filtering

In [10]:
twod = np.arange(21).reshape(3,7)
twod

array([[ 0,  1,  2,  3,  4,  5,  6],
       [ 7,  8,  9, 10, 11, 12, 13],
       [14, 15, 16, 17, 18, 19, 20]])

In [None]:
mask = np.array([[ True,  False,  True,  True,  False, True, False],
       [ True,  False,  True,  True,  False, True, False],
       [ True,  False,  True,  True,  False, True, False]])

In [None]:
twod[mask]

In [11]:
mask = twod < 5
mask

array([[ True,  True,  True,  True,  True, False, False],
       [False, False, False, False, False, False, False],
       [False, False, False, False, False, False, False]])

In [None]:
twod[mask]

In [None]:
mask = (twod < 5) & (twod%2 == 0)
mask

In [None]:
twod[mask]

## view vs copy

In [3]:
data1 = np.arange(24).reshape(4,6)
data1

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

In [None]:
data2 = data1[:2,3:]
data2

In [None]:
data2[1,2] = -1
data2

In [None]:
data1

In [None]:
data1 = np.arange(24).reshape(4,6)
data1

In [None]:
data2 = data1[:2,3:].copy()
data2

In [None]:
data2[1,2] = -1
data2

In [None]:
data1

## Introspection

In [13]:
data = np.arange(12).reshape(3,4)
data

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [15]:
data.max()

11

In [16]:
data.min()

0

In [None]:
data.sum()

In [None]:
data.sum(axis=1)

In [None]:
data.sum(axis=0)

In [None]:
data.mean()

In [None]:
data.std()

In [None]:
data.std(axis=0)

In [None]:
data.std(axis=1)

## Element by Element Operations

In [None]:
L1 = list(range(10))
L2 = list(range(10, 0, -1))

In [None]:
L1*0

[]

In [None]:
L1

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [None]:
L2

[10, 9, 8, 7, 6, 5, 4, 3, 2, 1]

In [None]:
L3 = []
for i, j in zip(L1, L2):
    L3.append(i*j)
L3

[0, 9, 16, 21, 24, 25, 24, 21, 16, 9]

In [None]:
L1*L2

TypeError: ignored

In [None]:
array1 = np.array(L1)
array2 = np.array(L2)

array1*array2

array([ 0,  9, 16, 21, 24, 25, 24, 21, 16,  9])

In [None]:
array1 + array2

In [None]:
array1 / array2

## Setting type

In [None]:
darray = np.arange(100)
darray

In [None]:
darray.dtype

In [None]:
darray.nbytes

In [None]:
darray = np.arange(100, dtype=np.int8)
darray

In [None]:
darray.dtype

In [None]:
darray.nbytes

In [None]:
darray[14] = 'a'

In [None]:
darray[14] = 0.5
darray[14]

## Broadcasting

In [None]:
A1 = np.array([[1,2,3],
               [4,5,6],
               [7,8,9]])

In [None]:
A2 = np.array([[1,1,1],
               [1,1,1],
               [1,1,1]])
A1 + A2

array([[ 2,  3,  4],
       [ 5,  6,  7],
       [ 8,  9, 10]])

In [None]:
A2 = np.array([1,1,1])
A1 + A2

array([[ 2,  3,  4],
       [ 5,  6,  7],
       [ 8,  9, 10]])

In [None]:
A1 + 1

array([[ 2,  3,  4],
       [ 5,  6,  7],
       [ 8,  9, 10]])

In [None]:
A4 = np.arange(10).reshape(2,1,5)
A4

array([[[0, 1, 2, 3, 4]],

       [[5, 6, 7, 8, 9]]])

In [None]:
A5 = np.arange(14).reshape(2,7,1)
A5

array([[[ 0],
        [ 1],
        [ 2],
        [ 3],
        [ 4],
        [ 5],
        [ 6]],

       [[ 7],
        [ 8],
        [ 9],
        [10],
        [11],
        [12],
        [13]]])

In [None]:
A6 = A4 - A5
A6

array([[[ 0,  1,  2,  3,  4],
        [-1,  0,  1,  2,  3],
        [-2, -1,  0,  1,  2],
        [-3, -2, -1,  0,  1],
        [-4, -3, -2, -1,  0],
        [-5, -4, -3, -2, -1],
        [-6, -5, -4, -3, -2]],

       [[-2, -1,  0,  1,  2],
        [-3, -2, -1,  0,  1],
        [-4, -3, -2, -1,  0],
        [-5, -4, -3, -2, -1],
        [-6, -5, -4, -3, -2],
        [-7, -6, -5, -4, -3],
        [-8, -7, -6, -5, -4]]])

In [None]:
A6.shape

(2, 7, 5)

In [None]:
## Matrix Operations

In [None]:
A1 = np.arange(9).reshape(3,3)
A1

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [None]:
A1.T

array([[0, 3, 6],
       [1, 4, 7],
       [2, 5, 8]])

In [None]:
A2 = np.ones(9).reshape(3,3)
A2

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [None]:
A1 @ A2 # matrix product

array([[ 3.,  3.,  3.],
       [12., 12., 12.],
       [21., 21., 21.]])

In [None]:
A1.dot(A2)

array([[ 3.,  3.,  3.],
       [12., 12., 12.],
       [21., 21., 21.]])

In [None]:
A1.diagonal()

In [None]:
d1 = np.array([[0, 1, 3], 
               [4, 2, 9]])
d1


array([[0, 1, 3],
       [4, 2, 9]])

In [None]:
d2 = d1[:, 1:]
d2

array([[1, 3],
       [2, 9]])

In [None]:
d2[0,1] = 0
d2

array([[1, 0],
       [2, 9]])

In [None]:
d1

array([[0, 1, 0],
       [4, 2, 9]])

In [None]:
d1[0,2]

In [None]:
import numpy as np
np.arange(9) + 2


array([ 2,  3,  4,  5,  6,  7,  8,  9, 10])