# Numpy: Numeric computing library

NumPy (Numerical Python) is one of the core packages for numerical computing in Python. Pandas, Matplotlib, Statmodels and many other Scientific libraries rely on NumPy.

NumPy major contributions are:

* Efficient numeric computation with C primitives
* Efficient collections with vectorized operations
* An integrated and natural Linear Algebra API
* A C API for connecting NumPy with libraries written in C, C++, or FORTRAN.

Let's develop on efficiency. In Python, **everything is an object**, which means that even simple ints are also objects, with all the required machinery to make object work. We call them "Boxed Ints". In contrast, NumPy uses primitive numeric types (floats, ints) which makes storing and computation efficient.

<img src="https://docs.google.com/drawings/d/e/2PACX-1vTkDtKYMUVdpfVb3TTpr_8rrVtpal2dOknUUEOu85wJ1RitzHHf5nsJqz1O0SnTt8BwgJjxXMYXyIqs/pub?w=726&h=396" />


In [130]:
import sys
import numpy as np

## Basic Numpy Arrays

## Creating Numpy arrays

In [131]:
np.array([1, 2, 3, 4])

array([1, 2, 3, 4])

In [132]:
a = np.array([1, 2, 3, 4])
a

array([1, 2, 3, 4])

In [133]:
b = np.array([0, .5, 1, 1.5, 2])
b

array([0. , 0.5, 1. , 1.5, 2. ])

## Slicing

In [134]:
a[0:]

array([1, 2, 3, 4])

In [135]:
a[1:3]

array([2, 3])

In [136]:
a[1:-1]

array([2, 3])

In [137]:
a[::2]

array([1, 3])

In [138]:
b

array([0. , 0.5, 1. , 1.5, 2. ])

## Extracting index data

In [139]:
a[0], a[1]

(1, 2)

In [140]:
b[0], b[2], b[-1]

(0.0, 1.0, 2.0)

In [141]:
b[[0, 2, -1]]

array([0., 1., 2.])

## Array Types

In [142]:
a

array([1, 2, 3, 4])

### Get the type of an numpy object

In [143]:
a.dtype

dtype('int64')

In [144]:
b

array([0. , 0.5, 1. , 1.5, 2. ])

In [145]:
b.dtype

dtype('float64')

In [146]:
np.array([1, 2, 3, 4], dtype=np.float)

array([1., 2., 3., 4.])

In [147]:
np.array([1, 2, 3, 4], dtype=np.int8)

array([1, 2, 3, 4], dtype=int8)

In [148]:
c = np.array(['a', 'b', 'c'])
c

array(['a', 'b', 'c'], dtype='<U1')

In [149]:
c.dtype

dtype('<U1')

In [150]:
d = np.array([{'a': 1}, sys])
d

array([{'a': 1}, <module 'sys' (built-in)>], dtype=object)

In [151]:
d.dtype

dtype('O')

## Dimensions and shapes

In [152]:
A = np.array([
    [1, 2, 3],
    [4, 5, 6]
])
A

array([[1, 2, 3],
       [4, 5, 6]])

### Get the shape of A

In [153]:
A.shape

(2, 3)

### Get the number of dimentions

In [154]:
A.ndim

2

Get the numbers of the matrix elements

In [155]:
A.size

6

In [156]:
B = np.array([
    [
        [12, 11, 10],
        [9, 8, 7],
    ],
    [
        [6, 5, 4],
        [3, 2, 1]
    ]
])
B

array([[[12, 11, 10],
        [ 9,  8,  7]],

       [[ 6,  5,  4],
        [ 3,  2,  1]]])

In [157]:
B.shape

(2, 2, 3)

In [158]:
B.ndim

3

In [159]:
B.size

12

If the shape isn't consistent, it'll just fall back to regular Python objects:

In [160]:
C = np.array([
    [
        [12, 11, 10],
        [9, 8, 7],
    ],
    [
        [6, 5, 4]
    ]
])
C

  import sys


array([list([[12, 11, 10], [9, 8, 7]]), list([[6, 5, 4]])], dtype=object)

In [161]:
C.dtype

dtype('O')

In [162]:
C.shape

(2,)

In [163]:
C.size

2

### Get the type of an object

In [164]:
type(C[0])

list

## Indexing and Slicing of Matrices

In [165]:
# Square matrix
A = np.array([
#    l  l  l
#    o  o  o
#.   c  c  c
#.   0. 1. 2
    [1, 2, 3], # 0 row
    [4, 5, 6], # 1 row
    [7, 8, 9]  # 2 row
])
A

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

### Get a row of the matrix A

In [166]:
A[1]

array([4, 5, 6])

### Get the element in the row 1 and column 0 of matriz A

In [167]:
A[1][0]

4

In [168]:
A[1, 0]

4

### Get the rows from 0 to 2 outer of matrix A

In [169]:
A[0:2]

array([[1, 2, 3],
       [4, 5, 6]])

### Get the columns 0 to 2 outer from all rows of matrix A

In [170]:
A[:, :2]

array([[1, 2],
       [4, 5],
       [7, 8]])

### Get the colums 0 to 2 outer from rows 0 to 2 outer

In [171]:
A[:2, :2]

array([[1, 2],
       [4, 5]])

### Get all the colums from 2 inner from the rows 0 to 2 outer

In [172]:
A[:2, 2:]

array([[3],
       [6]])

In [173]:
A

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [174]:
A[1] = np.array([10, 10, 10])

In [175]:
A

array([[ 1,  2,  3],
       [10, 10, 10],
       [ 7,  8,  9]])

In [176]:
A[2] = 99

In [177]:
A

array([[ 1,  2,  3],
       [10, 10, 10],
       [99, 99, 99]])

## Summary statistics

In [178]:
a = np.array([1, 2, 3, 4])
a

array([1, 2, 3, 4])

### Get the sum of all elements in a

In [179]:
a.sum()

10

### Get the mean of all elements in a

In [180]:
a.mean()

2.5

### Get the standart deviation of all element in a

In [181]:
a.std()

1.118033988749895

### Get the variance of all elements in a

In [182]:
a.var()

1.25

In [183]:
A = np.array([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9]
])
A

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [184]:
A.sum()

45

In [185]:
A.mean()

5.0

In [186]:
A.std()

2.581988897471611

## Remember: axis=0 -> columns | axis=1 -> rows

In [187]:
A.sum(axis=0)

array([12, 15, 18])

In [188]:
A.sum(axis=1)

array([ 6, 15, 24])

In [189]:
A.mean(axis=0)

array([4., 5., 6.])

In [190]:
A.mean(axis=1)

array([2., 5., 8.])

In [191]:
A.std(axis=0)

array([2.44948974, 2.44948974, 2.44948974])

In [192]:
A.std(axis=1)

array([0.81649658, 0.81649658, 0.81649658])

And [many more](https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.ndarray.html#array-methods)...

## Broadcasting and Vectorized operations

In [193]:
a = np.arange(4)
a

array([0, 1, 2, 3])

### Sum 10 to all elements in a

In [194]:
a + 10

array([10, 11, 12, 13])

### Multiply 10 to all elements in a

In [195]:
a * 10

array([ 0, 10, 20, 30])

In [196]:
a

array([0, 1, 2, 3])

### Sum 100 to all elements in a and store the new array in a

In [197]:
a += 100

In [198]:
a

array([100, 101, 102, 103])

In [199]:
l = [0, 1, 2, 3]

### Multiply 10 to all elements in a list python

In [200]:
[i * 10 for i in l]

[0, 10, 20, 30]

In [201]:
a = np.arange(4)
a

array([0, 1, 2, 3])

In [202]:
b = np.array([10, 10, 10, 10])
b

array([10, 10, 10, 10])

### Sum two numpy arrays in scalar way

In [203]:
a + b

array([10, 11, 12, 13])

### Multiply two numpy arrays in scalar way

In [204]:
a * b

array([ 0, 10, 20, 30])

## Boolean arrays
_(Also called masks)_

In [205]:
a = np.arange(4)
a

array([0, 1, 2, 3])

In [206]:
a[0], a[-1]

(0, 3)

In [207]:
a[[0, -1]]

array([0, 3])

### Get all the indices with True state

In [208]:
a[[True, False, False, True]]

array([0, 3])

In [209]:
a

array([0, 1, 2, 3])

### Check what are the values greater or equal to 2

In [210]:
a >= 2

array([False, False,  True,  True])

### Get all the values greater or equal to 2

In [211]:
a[a >= 2]

array([2, 3])

In [212]:
a.mean()

1.5

### Get all the values greater than the mean of a

In [213]:
a[a > a.mean()]

array([2, 3])

### Get all the values lesser or equal to the mean of a

In [214]:
a[~(a > a.mean())]

array([0, 1])

### Get the value equal to 0 or 1

In [215]:
a[(a == 0) | (a == 1)]

array([0, 1])

### Get the values lesser or equal to 2 and even

In [216]:
a[(a <= 2) & (a % 2 == 0)]

array([0, 2])

### Create a random 3 x 3 matrix with value form 0 to 100

In [217]:
A = np.random.randint(100, size=(3, 3))
A

array([[21, 71, 83],
       [95, 56, 46],
       [72, 34, 76]])

In [218]:
A[np.array([
    [True, False, True],
    [False, True, False],
    [True, False, True]
])]

array([21, 83, 56, 72, 76])

In [219]:
A > 30

array([[False,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])

In [220]:
A[A > 30]

array([71, 83, 95, 56, 46, 72, 34, 76])

## Linear Algebra

In [221]:
A = np.array([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9]
])
A

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [222]:
B = np.array([
    [6, 5],
    [4, 3],
    [2, 1]
])
B

array([[6, 5],
       [4, 3],
       [2, 1]])

### Get the dot product from A and B Note: .dot and @ are the same

In [223]:
A.dot(B)

array([[20, 14],
       [56, 41],
       [92, 68]])

In [224]:
A @ B

array([[20, 14],
       [56, 41],
       [92, 68]])

### Get B transpouse

In [225]:
B.T

array([[6, 4, 2],
       [5, 3, 1]])

In [226]:
A

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

### Get the dot product from B transpouse and A

In [227]:
B.T @ A

array([[36, 48, 60],
       [24, 33, 42]])

## Size of objects in Memory

### Int, floats

In [228]:
# An integer in Python is > 24bytes
sys.getsizeof(1)

28

In [229]:
# Longs are even larger
sys.getsizeof(10**100)

72

In [230]:
# Numpy size is much smaller
np.dtype(int).itemsize

8

In [231]:
# Numpy size is much smaller
np.dtype(np.int8).itemsize

1

In [232]:
np.dtype(float).itemsize

8

### Lists are even larger

In [233]:
# A one-element list
sys.getsizeof([1])

72

In [234]:
# An array of one element in numpy
np.array([1]).nbytes

8

### And performance is also important

In [235]:
l = list(range(100000))

In [236]:
a = np.arange(100000)
a

array([    0,     1,     2, ..., 99997, 99998, 99999])

In [237]:
%time np.sum(a ** 2)

CPU times: user 1.02 ms, sys: 1 µs, total: 1.02 ms
Wall time: 808 µs


333328333350000

In [238]:
%time sum([x ** 2 for x in l])

CPU times: user 41.4 ms, sys: 4.36 ms, total: 45.7 ms
Wall time: 64.4 ms


333328333350000

## Useful Numpy functions

#### Note: Look out the documentation for knowing all the parameter of the next functions

### `random` 

In [239]:
np.random.random(size=2)

array([0.63634651, 0.25338369])

In [240]:
np.random.normal(size=2)

array([-0.49409138,  1.20962426])

In [241]:
np.random.rand(2, 4)

array([[0.59643703, 0.48707303, 0.91078877, 0.46017514],
       [0.55083768, 0.85260071, 0.00150116, 0.17612639]])

---
### `arange`

In [242]:
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [243]:
np.arange(5, 10)

array([5, 6, 7, 8, 9])

In [244]:
np.arange(0, 1, .1)

array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])

---
### `reshape`

#### Change the colums and rows for a given dimention

In [245]:
np.arange(10).reshape(2, 5)

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [246]:
np.arange(10).reshape(5, 2)

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7],
       [8, 9]])

---
### `linspace`

In [247]:
np.linspace(0, 1, 5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [248]:
np.linspace(0, 1, 20)

array([0.        , 0.05263158, 0.10526316, 0.15789474, 0.21052632,
       0.26315789, 0.31578947, 0.36842105, 0.42105263, 0.47368421,
       0.52631579, 0.57894737, 0.63157895, 0.68421053, 0.73684211,
       0.78947368, 0.84210526, 0.89473684, 0.94736842, 1.        ])

In [249]:
np.linspace(0, 1, 20, False)

array([0.  , 0.05, 0.1 , 0.15, 0.2 , 0.25, 0.3 , 0.35, 0.4 , 0.45, 0.5 ,
       0.55, 0.6 , 0.65, 0.7 , 0.75, 0.8 , 0.85, 0.9 , 0.95])

---
### `zeros`, `ones`, `empty`

In [250]:
np.zeros(5)

array([0., 0., 0., 0., 0.])

In [251]:
np.zeros((3, 3))

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [252]:
np.zeros((3, 3), dtype=np.int)

array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]])

In [253]:
np.ones(5)

array([1., 1., 1., 1., 1.])

In [254]:
np.ones((3, 3))

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [255]:
np.empty(5)

array([1., 1., 1., 1., 1.])

In [256]:
np.empty((2, 2))

array([[0.25, 0.5 ],
       [0.75, 1.  ]])

---
### `identity` and `eye`

In [257]:
np.identity(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [258]:
np.eye(3, 3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [259]:
np.eye(8, 4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [260]:
np.eye(8, 4, k=1)

array([[0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [261]:
np.eye(8, 4, k=-3)

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 0.]])