In [1]:
import numpy as np

# Numpy Examples

## Creating Arrays
a
: A 2D array made from a 2D python list

b
: An array of all zeros with length 10 (dtype ensures they're int types)

c
: A 3x5 array full with 2s

d
: An array from 0 to 20 stepping by 2

e
: An array from 0 to 1 with 11 elements stepping by equal amounts

f
: A 3x3 array of random float values between 0 and 1

g
: A 3x3 array with mean 0 and standard deviation 1

h
: A 3x3 array with random ints between 0 and 10

i
: A 3x3 identity matrix

j
: A diagonal array with the diagonal using the given python list

In [2]:
a = np.array([[1,2],[3,4]])
b = np.zeros(10, dtype=int) # there's also np.ones()
c = np.full((3,5), 2)
d = np.arange(0, 20, 2)
e = np.linspace(0, 1, 11)
f = np.random.random((3,3))
g = np.random.normal(0, 1, (3,3))
h = np.random.randint(0, 10, (3,3))
i = np.eye(3)
j = np.diag([1,2,3,4])

print('a:', a, '\nb:', b, '\nc:', c, '\nd:', d, '\ne:', e, '\nf:', f, '\ng:', g, '\nh:', h, '\ni:', i, '\nj:', j, sep='\n')

a:
[[1 2]
 [3 4]]

b:
[0 0 0 0 0 0 0 0 0 0]

c:
[[2 2 2 2 2]
 [2 2 2 2 2]
 [2 2 2 2 2]]

d:
[ 0  2  4  6  8 10 12 14 16 18]

e:
[0.  0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1. ]

f:
[[0.64138851 0.46856374 0.16729378]
 [0.02165957 0.83437551 0.32001145]
 [0.2473193  0.15516348 0.49662939]]

g:
[[ 0.47484559  2.13883664  0.21949439]
 [-0.0618923   0.1436913   2.04177286]
 [-0.47213122 -1.41235471  1.04799181]]

h:
[[5 0 9]
 [2 4 4]
 [5 5 9]]

i:
[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]

j:
[[1 0 0 0]
 [0 2 0 0]
 [0 0 3 0]
 [0 0 0 4]]


### Main Data Types
- bool
- ints
    - int8
    - int16
    - int32
    - int64 (long)
- unsigned ints
    - uint8
    - uint16
    - uint32
    - uint64 (long)
- floats
    - float16
    - float32
    - float64 (double)

### Array Characteristics
- Shape
: Length of each dimension of the matrix
- Size
: Number of elements in the matrix
- ndim
: Number of dimensions of the matrix
- dtype
: Data type of matrix (see above)

In [3]:
a = np.random.randint(10, size=(2,3))
b = np.random.randint(10, size=6)
c = np.random.randint(100, size=(10,10,10))

print('shape:', a.shape, b.shape, c.shape)
print('size:', a.size, b.size, c.size)
print('ndim:', a.ndim, b.ndim, c.ndim)
print('dtype:', a.dtype, b.dtype, c.dtype)

shape: (2, 3) (6,) (10, 10, 10)
size: 6 6 1000
ndim: 2 1 3
dtype: int32 int32 int32


### Array Indexing
a
: Row 1, column 2

b
: Row 0, column 0

c
: Last row, last column

d
: Row 1 (equivalent to `x[1,::]` and `x[1]`)

e
: Column 2

f
: Rows 0 and 1

g
: Rows in reverse order

h
: Row 0, Column 1 and Row 1, Column 1 as a 2x1 array

In [4]:
x = np.array([[1,2,3],[4,5,6],[7,8,9]])


print('a:', x[1,2],
    '\nb:', x[0,0],
    '\nc:', x[-1,-1],
    '\nd:', x[1,:],
    '\ne:', x[:,2],
    '\nf:', x[:2],
    '\ng:', x[::-1],
    '\nh:', x[0:2:1,1:2:1], sep='\n')

a:
6

b:
1

c:
9

d:
[4 5 6]

e:
[3 6 9]

f:
[[1 2 3]
 [4 5 6]]

g:
[[7 8 9]
 [4 5 6]
 [1 2 3]]

h:
[[2]
 [5]]


### Array Reshaping
a
: Normal array from 1-9

b
: Array *a* turned into a 3x3

c
: Array *b* turned back into a 1D array

d
: Appending 10 to array *c* and turning it into a 5x2

In [27]:
a = np.arange(1,10)
print(a)

[1 2 3 4 5 6 7 8 9]


In [28]:
b = a.reshape((3,3))
c = a.reshape((9,))
d = np.append(c,10).reshape((5,2))
print(b)
print(c)
print(d)

[[1 2 3]
 [4 5 6]
 [7 8 9]]
[1 2 3 4 5 6 7 8 9]
[[ 1  2]
 [ 3  4]
 [ 5  6]
 [ 7  8]
 [ 9 10]]


### Concatenating Arrays

In [7]:
a = np.array([1,2,3])
b = np.array([4,5,6])
print(np.concatenate([a,b]))

[1 2 3 4 5 6]


In [8]:
a = np.array([[1,2,3],[4,5,6]])
b = np.array([[6,7,8],[9,10,11]])
print(np.concatenate([a,b], axis=0)) # axis=0 on default
print(np.concatenate([a,b], axis=1))

[[ 1  2  3]
 [ 4  5  6]
 [ 6  7  8]
 [ 9 10 11]]
[[ 1  2  3  6  7  8]
 [ 4  5  6  9 10 11]]


Alternative way to specify stacking horizontally or vertically

In [9]:
print(np.vstack([a,b]))
print(np.hstack([a,b]))

[[ 1  2  3]
 [ 4  5  6]
 [ 6  7  8]
 [ 9 10 11]]
[[ 1  2  3  6  7  8]
 [ 4  5  6  9 10 11]]


### Merging 1D Arrays

c
: Normal concatenation

d
: Vertically stacking

e
: Taking the transpose of the vertical stack, resulting in a column format

f
: Same result as *e* but using horizontal stack (np.axis adds a dimension to array/matrix but without adding any new rows)


In [10]:
a = np.arange(10,0,-1)
b = np.arange(1,11)
c = np.concatenate([a,b])
d = np.vstack([a,b])
e = np.vstack([a,b]).T
f = np.hstack([a[np.newaxis].T,b[np.newaxis].T])
print(c)
print(d)
print(e)
print(f)

[10  9  8  7  6  5  4  3  2  1  1  2  3  4  5  6  7  8  9 10]
[[10  9  8  7  6  5  4  3  2  1]
 [ 1  2  3  4  5  6  7  8  9 10]]
[[10  1]
 [ 9  2]
 [ 8  3]
 [ 7  4]
 [ 6  5]
 [ 5  6]
 [ 4  7]
 [ 3  8]
 [ 2  9]
 [ 1 10]]
[[10  1]
 [ 9  2]
 [ 8  3]
 [ 7  4]
 [ 6  5]
 [ 5  6]
 [ 4  7]
 [ 3  8]
 [ 2  9]
 [ 1 10]]


### Splitting of Arrays

The split method takes the split points as inputs in the form of array indices

Similar to stacking, the axis parameter can be 0/1 to indicate horizontal or vertical, as well as the vsplit and hsplit methods

In [11]:
a = np.array([1,2,3,4,5,6])
x,y,z = np.split(a, [2,4])
print(x,y,z,sep='\n')
b = np.array([[1,2], [3,4], [5,6]])
u,w = np.vsplit(b, [2])
print(u,w,sep='\n')

[1 2]
[3 4]
[5 6]
[[1 2]
 [3 4]]
[[5 6]]


### Vectorized Operations

Reducing the use of for loops

b
: Adding 5 to every element in array *a*

c
: Vector division of 2 arrays

In [12]:
a = np.array([[1,2],[3,4],[5,6]])
b = a + 5
c = np.arange(5) / np.arange(1,6)
print(b)
print(c)

[[ 6  7]
 [ 8  9]
 [10 11]]
[0.         0.5        0.66666667 0.75       0.8       ]


### Custom Function Vectorization

Using custom vectorization is for non trivial operations (trivial ones can be directly done on array)

Optional parameter otypes is for output types

In [13]:
a = np.full(shape=(3,3), fill_value=10)
b = np.arange(1,10).reshape((3,3))

def customFunc(x,y):
    if max(x,y) > 2 * min(x,y):
        return min(x,y)
    else:
        return (x + y) / 2

# Use otypes in order to switch to floats (maintain decimal values)
print(np.vectorize(customFunc, otypes=[np.float64])(a,b))

[[1.  2.  3. ]
 [4.  7.5 8. ]
 [8.5 9.  9.5]]


Can also apply a function to each row or column of an array with the `apply_along_axis` function

In [14]:
a = np.array([[3,4],[2,1]])

def customFunc(r):
    if r[0] > 3:
        return 2
    else:
        return r[1]

print(np.apply_along_axis(customFunc,1,a)) # 1 is the axis, use 0 to apply on column
print(np.apply_along_axis(customFunc,0,a))

[4 1]
[2 2]


### Outer Product

The `outer` function takes 2 np arrays and creates a 2D array that has a value for every possible combination of the 2 arrays

Example below is basically like a multiplication table

In [15]:
x = np.arange(1,5)
y = np.arange(1,10)
print(np.multiply.outer(x,y)) # there's also add

[[ 1  2  3  4  5  6  7  8  9]
 [ 2  4  6  8 10 12 14 16 18]
 [ 3  6  9 12 15 18 21 24 27]
 [ 4  8 12 16 20 24 28 32 36]]


### Predefined Functions

* Called on the array

    `sum`
    : Add all elements in the array

    `max`
    : Max value in array

    `mean`
    : Mean of all elements in array

    `std`
    : Standard deviation of all elements in array

    `var`
    : Variance of all elements in array

    `argmin`/`argmax`
    : Index of smallest/largest element in array

* Called on `np`

    `any`
    : Is any element `True`?

    `all`
    : Are all elements `True`?

    `median`
    : Get median of all elements in array

    `percentile`
    : Get specific percentile of all elements in array

## Using Axis

For an n-dimensional array, axis can be from 0 to n-1

For the functions above, a specific axis can be given, otherwise it assumes the entire array

In [16]:
a = np.array([[1,2,3],[4,5,6]])
print(a.min())
print(a.min(axis=0))
print(a.min(axis=1))

1
[1 2 3]
[1 4]


## Broadcasting

Ability for arrays with different but compatible shapes can be used as args together

Below, 10 is a scalar that is broadcast to an array with shape (5,)

It copies the scalar value onto each element

In [17]:
a = np.arange(5)
print(a + 10)

[10 11 12 13 14]


Now array *b*, with shape (3,1) is broadcast to (3,2)

It copies the first column to make the second column

In [18]:
a = np.array([[0,1],[2,3],[4,5]])
b = np.array([[10],[20],[30]])
print(a + b)

[[10 11]
 [22 23]
 [34 35]]


## Creating Boolean Arrays and Using Conditions for Counting

Doing conditional vectorized operations on arrays creates boolean arrays

In [19]:
a = np.arange(5) + 1
print(a > 3)
print(2 * a == a ** 2)

[False False False  True  True]
[False  True False False False]


You can also count elements that satisfy a condition

In [20]:
a = np.array([[5,0,3,3],[7,9,3,5],[2,4,7,6]])
print((a < 6).sum()) # how many elements are < 6
print((a < 6).sum(axis=0)) # how many elements are < 6 in each column
print((a < 8).all(axis=1)) # for each row, are all elements < 8

8
[2 2 2 2]
[ True False  True]


## Filtering

In [21]:
a = np.array([[5,0,3,3],[7,9,3,5],[2,4,7,6]])
print(a[a > 3])

[5 7 9 5 4 7 6]


## Fancy Indexing

Input array contains indices that are to be selected

b
: Array that includes `a[0,2]`, `a[1,1]`, and `a[2,3]`

c
: Columns 2,1,3

d
: Elements 2,0,1 of row 2

e
: Elements 2,0,1 of all rows >= 1

f
: Elements 0 and 2 of row 0 using boolean array as a mask

In [22]:
a = np.array([[5,0,3,3],[7,9,3,5],[2,4,7,6]])
b = a[[0,1,2], [2,1,3]]
c = a[:,[2,1,3]]
d = a[2, [2,0,1]]
e = a[1:,[2,0,1]]
mask = np.array([True,False,True,False])
f = a[:1,mask]
print(b)
print(c)
print(d)
print(e)
print(f)

[3 9 6]
[[3 0 3]
 [3 9 5]
 [7 4 6]]
[7 2 4]
[[3 7 9]
 [7 2 4]]
[[5 3]]


Fancy indexing can also be used to modify an array

In [23]:
a = np.arange(10)
i = np.array([2,1,8,4])
a[i] = 99
print(a)
a[i] += 1
print(a)

[ 0 99 99  3 99  5  6  7 99  9]
[  0 100 100   3 100   5   6   7 100   9]


## Sorting Arrays

b
: Array *a* sorted by value

c
: Array of indices in order of sorted array *a*

d
: Sorting the matrix by column

In [24]:
a = np.array([2,1,4,3,5])
b = np.sort(a)
c = np.argsort(a)
d = np.sort(np.array([[5,0,3,3],[7,9,3,5],[2,4,7,6]]), axis=0)
print(b,c,d,sep='\n')

[1 2 3 4 5]
[1 0 3 2 4]
[[2 0 3 3]
 [5 4 3 5]
 [7 9 7 6]]


Use `partition` to partially sort

In [25]:
a = np.array([3,6,4,1,5,20,10])
print(np.partition(a,3))

[ 1  3  4  5  6 20 10]


## Reading/Writing Numpy Arrays

* `genfromtxt` reads tet file into 2D np array

* Use delimiter parameter to separate each column

* Data on each row must be formatted the same to avoid errors

* `savetxt` writes the 2D array to a file