In [1]:
import numpy as np

# Numpy, DataTypes & Attributes

Main datatype is `ndarray` for n-dimensional array.

In [2]:
# 1D
a1 = np.array([1, 2, 3])
# 2D
a2 = np.array([[1, 2, 3], [4, 5, 6]])
a2

array([[1, 2, 3],
       [4, 5, 6]])

In [3]:
# Rows, cols
a2.shape

(2, 3)

You can view number of dimensions through: `arr.ndim`

In [4]:
a1.ndim, a2.ndim

(1, 2)

In [5]:
# Number of elements
a2.size

6

In [6]:
type(a1)

numpy.ndarray

### Create df from ndarray

In [7]:
import pandas as pd
df = pd.DataFrame(a2)
df

Unnamed: 0,0,1,2
0,1,2,3
1,4,5,6


## Creating np arrays

In [8]:
ones = np.ones((3, 3))
ones

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [9]:
range_arr = np.arange(1, 10, 2)
range_arr

array([1, 3, 5, 7, 9])

In [10]:
rand_nd_arr = np.random.randint(1, 10, (4, 5))
rand_nd_arr

array([[4, 9, 6, 1, 2],
       [7, 1, 9, 2, 7],
       [6, 1, 2, 3, 6],
       [2, 3, 7, 2, 3]])

In [11]:
rand_nd_arr2 = np.random.random((3, 4))
rand_nd_arr2

array([[0.08941689, 0.9534904 , 0.84267458, 0.32701926],
       [0.55329855, 0.02042933, 0.89785377, 0.96808942],
       [0.58113454, 0.88115092, 0.40582285, 0.11828997]])

In [12]:
rand_nd_arr3 = np.random.rand(3, 4)
rand_nd_arr3

array([[0.58863258, 0.5657926 , 0.61985598, 0.08110073],
       [0.89611302, 0.07028457, 0.70461984, 0.53303094],
       [0.01084955, 0.73740651, 0.34842513, 0.35155185]])

## Random Seed

Creates random numbers that are reproduceable

In [13]:
np.random.seed(seed=333)
rand_seed_arr = np.random.rand(3, 3)
rand_seed_arr

array([[0.54329109, 0.72895073, 0.01688145],
       [0.3303388 , 0.36872182, 0.04830367],
       [0.10453019, 0.09743752, 0.24540331]])

### Array Slicing

In [14]:
rand_array = np.random.randint(1, 14, (2, 3, 4, 5))
rand_array

array([[[[13,  5, 10,  8, 13],
         [ 6,  6,  9,  9,  8],
         [11,  4,  4,  4, 10],
         [ 4, 13, 11,  4, 12]],

        [[ 8,  8,  2,  1,  9],
         [ 3,  8,  1,  2, 11],
         [ 1,  9,  6,  4,  4],
         [13,  8,  6, 12,  1]],

        [[ 6,  3, 13, 11,  9],
         [ 5,  5, 10, 12, 13],
         [ 7, 13,  7,  5,  2],
         [13,  2,  1, 13, 10]]],


       [[[12,  4, 12,  8,  4],
         [13, 11,  3,  5,  9],
         [13, 13, 11,  3,  6],
         [ 9,  1,  7,  6, 11]],

        [[ 4,  8,  1,  8, 11],
         [ 1,  2,  9, 13,  2],
         [ 4, 11,  5,  7,  9],
         [ 6,  8,  8,  2,  2]],

        [[ 6,  4,  4,  5,  8],
         [ 8, 11,  6, 11,  3],
         [ 3,  5, 10,  5,  8],
         [13,  9,  4,  8,  7]]]])

In [15]:
# Get the first 2 els of the deepest arrays
rand_array.shape

(2, 3, 4, 5)

In [16]:
rand_array[:, :, :, :2]

array([[[[13,  5],
         [ 6,  6],
         [11,  4],
         [ 4, 13]],

        [[ 8,  8],
         [ 3,  8],
         [ 1,  9],
         [13,  8]],

        [[ 6,  3],
         [ 5,  5],
         [ 7, 13],
         [13,  2]]],


       [[[12,  4],
         [13, 11],
         [13, 13],
         [ 9,  1]],

        [[ 4,  8],
         [ 1,  2],
         [ 4, 11],
         [ 6,  8]],

        [[ 6,  4],
         [ 8, 11],
         [ 3,  5],
         [13,  9]]]])

## Aggregation

Involves performing the same operation on a number of things.

Comparing the `sum()` for `np` and `py`

In [17]:
big_arr = np.random.random(1000)
%timeit sum(big_arr)
%timeit np.sum(big_arr)

82.7 µs ± 1.65 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
4.33 µs ± 184 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [18]:
a2

array([[1, 2, 3],
       [4, 5, 6]])

In [19]:
np.mean(a2)

3.5

In [21]:
# Standard deviation - How spread out nums are from the mean => sqrt(variance)
np.std(a2)

1.707825127659933

In [22]:
# Variance - Range of each num from the mean
np.var(a2)

2.9166666666666665

## Reshape & Transpose

Reshape chages the shape & dimensions of an array while transpose flips the axes of the matrix.

In [24]:
a2, a2.shape

(array([[1, 2, 3],
        [4, 5, 6]]),
 (2, 3))

In [25]:
a2_reshaped = a2.reshape(2, 3, 1)
a2_reshaped

array([[[1],
        [2],
        [3]],

       [[4],
        [5],
        [6]]])

In [26]:
a2_trans = a2.T
a2_trans

array([[1, 4],
       [2, 5],
       [3, 6]])

### Dot Products

Apart from `element-wise` multiplication of matrices which just multiplies corresponding items, the `dot product` multiplies matrices in a matrical manner. The matrices must be a `transpose` of ech other with respect to their dimensions for this to work.

For it to work, dim nums on the inside must match: Example `(4 x 4).dot(4 x 2) = (4 x 2)`, that gives a result with dimensions of the outside.

## Sorting Arrays

In [29]:
a2

array([[1, 2, 3],
       [4, 5, 6]])

`np.sort()` sorts the array per axis.

In [31]:
np.sort(a2)

array([[1, 2, 3],
       [4, 5, 6]])

`np.argsort()` returns an array of indices starting from 0 - the smallest array element, to whatever index for the largest dependion on how they are spread in the array.

In [46]:
np.random.seed(346)
a3 = np.random.randint(10, size=(3, 3))
a3

array([[4, 8, 5],
       [5, 9, 2],
       [5, 5, 6]])

In [47]:
np.argsort(a3)

array([[0, 2, 1],
       [2, 0, 1],
       [0, 1, 2]], dtype=int64)

To get index of smallest array element:

In [48]:
np.argmin(a3)

5

In [51]:
a3

array([[4, 8, 5],
       [5, 9, 2],
       [5, 5, 6]])

In [52]:
# Axis 0 == y axis comparisons
np.argmax(a3, axis=0)

array([1, 1, 2], dtype=int64)

In [50]:
np.argmax(a3, axis=1)

array([1, 1, 2], dtype=int64)

## Numpy in Action

In [53]:
# Converting images to pixel colors in numbers
from matplotlib.image import imread

<img src="images/panda.png" alt="Panda" />

In [56]:
panda = imread('images/panda.png')
print(type(panda))

<class 'numpy.ndarray'>


In [58]:
panda[:1], panda.size

(array([[[0.05490196, 0.10588235, 0.06666667],
         [0.05490196, 0.10588235, 0.06666667],
         [0.05490196, 0.10588235, 0.06666667],
         ...,
         [0.16470589, 0.12941177, 0.09411765],
         [0.16470589, 0.12941177, 0.09411765],
         [0.16470589, 0.12941177, 0.09411765]]], dtype=float32),
 24465000)