# Numpy

[www.numpy.org](http://www.numpy.org)

NumPy is the fundamental package for scientific computing with Python. It contains among other things:

* a powerful N-dimensional array object
* sophisticated (broadcasting) functions
* tools for integrating C/C++ and Fortran code
* useful linear algebra, Fourier transform, and random number capabilities

Besides its obvious scientific uses, NumPy can also be used as an efficient multi-dimensional container of generic data. Arbitrary data-types can be defined. This allows NumPy to seamlessly and speedily integrate with a wide variety of databases.

In [None]:
%matplotlib inline
import numpy as np
import matplotlib

### Fast and memory efficient 

In [None]:
L = range(1000)
%timeit [i**2 for i in L]

In [None]:
a = np.arange(1000)
%timeit a**2

### Numpy Reference documentation

In [None]:
np.lookfor('create array')

In [None]:
np.con*?

### Creating arrays

In [None]:
a = np.array([1,2,3])
a

In [None]:
type(a)

In [None]:
a.ndim

In [None]:
b = np.array([[0 , 1 , 2 ], [3 , 4 , 5 ]]) # 2 x 3 array
b

In [None]:
b.ndim

In [None]:
b.shape

In [None]:
b.size

In [None]:
b.dtype

In [None]:
# the size in bytes of each element of the array
b.itemsize

In [None]:
c = np.array([[0 , 1 , 2 ], [3 , 4 , 5 ]], dtype=np.int8)

In [None]:
c.itemsize

In [None]:
d = np.array( [ [1,2], [3,4] ], dtype=np.complex )

In [None]:
d

In [None]:
d.itemsize

In [None]:
np.arange(1 , 9 , 2 ) # start, end (exclusive), step

In [None]:
np.arange( 0, 2, 0.3 ) # it accepts float arguments

In [None]:
np.linspace(0 , 1 , 6 ) # start, end, number of points

In [None]:
np.ones((3, 3)) # reminder: (3, 3) is a tuple

In [None]:
np.zeros((2, 2))

In [None]:
# creates an array whose initial content is random and depends on the state of the memory
np.empty((3, 3))

In [None]:
np.eye(3)

In [None]:
np.diag(np. array([1 ,2 ,3 ,4 ]))

In [None]:
np.random.seed(1234)

In [None]:
np.random.rand(4) # uniform in [0, 1]

In [None]:
np.random.randn(4) # Gaussian

### reshape resize newaxis

In [None]:
x = np.arange(12)
x

In [None]:
x.shape

In [None]:
x.reshape((3,4))

In [None]:
x.reshape((6,-1))

In [None]:
x.reshape((2,3,2))

In [None]:
x

In [None]:
x.resize((3,4))
x

In [None]:
x = np.arange(12)
y = x[:, np.newaxis]

In [None]:
print(x.shape)
print(y.shape)

### Basic data types

In [None]:
a = np.array([1 , 2 , 3 ])
a.dtype

In [None]:
b = np.array([ 0.1, 2., 3.6])
b.dtype

In [None]:
b.astype(int)

In [None]:
b.astype(np.int8)

In [None]:
c = np.array([1 , 2 , 3 ], dtype = float )
c.dtype

In [None]:
d = np.array([1 + 2j, 3 + 4j, 5 + 6 * 1j ])
d.dtype

In [None]:
e = np.array([True, False , False , True])
e.dtype

In [None]:
e.itemsize

In [None]:
f = np.array(['Bonjour' , ' Hello' , ' Hallo' ,])
f.dtype # <--- strings containing max. 7 letters

In [None]:
f.itemsize

In [None]:
g = np.array(['Bonjour Madam' ,'çğıöüş'])
g.dtype # <--- strings containing max. 21 letters

In [None]:
g.itemsize

### Indexing and slicing

In [None]:
a = np.arange(10)
a

In [None]:
# reversed
a[:: -1]

In [None]:
a

In [None]:
a[2:9:3] # [start:end:step]

In [None]:
b = np.diag(np.arange(3))

In [None]:
b[1, 1]

<img src='../img/numpy_array_slicing.png'>

In [None]:
row_vector = np.arange(6)
print(row_vector)
print(row_vector.ndim)
print(row_vector.shape)

In [None]:
row_vector.T

In [None]:
column_vector = np.arange(0, 51, 10).reshape((-1,1))
print(column_vector)
print(column_vector.ndim)
print(column_vector.shape)

In [None]:
column_vector.T

In [None]:
column_vector.T.shape

In [None]:
np.arange(0, 51, 10)[:, np.newaxis]

In [None]:
row_vector + column_vector

In [None]:
np.tile?

In [None]:
a = np.tile(np.array([1,2,3]),(2,1))

In [None]:
a.ndim

In [None]:
a.shape

#### Exercise

In [None]:
is_prime = np.ones((100, ), dtype=bool)

In [None]:
is_prime[:2] = 0
is_prime

In [None]:
N_max = int(np.sqrt(len(is_prime)))
for j in range(2,N_max):
    is_prime[j*j::j] = False

In [None]:
is_prime

In [None]:
np.nonzero(is_prime)

### Fancy indexing

In [None]:
np.random.seed( 3 )
a = np.random.random_integers(0 , 20, 15 )
a

In [None]:
mask = (a % 3 == 0)

In [None]:
a[mask]

<img src='../img/numpy_fancy_indexing.png'>

### 3 dimensional Arrays

In [None]:
a = np.arange(24).reshape(2,3,4)
a

In [None]:
a.shape

In [None]:
a[0]

In [None]:
a[1]

In [None]:
a[0].shape

In [None]:
a[0,0:2,2:]

In [None]:
a[...,2]

In [None]:
c = np.array( [[[  0,  1,  2],               # a 3D array (two stacked 2D arrays)
                [ 10, 12, 13]],
                [[100,101,102],
                [110,112,113]]])
c.shape

In [None]:
c[1,...] # same as c[1,:,:] or c[1]

In [None]:
c[...,2] # same as c[:,:,2]

In [None]:
b = a.sum(axis=0)
b

In [None]:
b.sum(axis=1)

## Iteration
Iterating over multidimensional arrays is done with respect to the first axis.

In [None]:
for row in a:
    print(row)
    print("----------------")

In [None]:
for element in a.flat:
    print(element)

### Copies and views

In [None]:
a = np.arange(10)
a

In [None]:
b = a[::2]
b

In [None]:
np.may_share_memory(a, b)

In [None]:
b[0] = 12
print(a)
print(b)

In [None]:
a = np.arange(10)
c = a[:: 2 ].copy()

In [None]:
np.may_share_memory(a, c)

###  Operations

In [None]:
c = np.ones((3 , 3 ))
c * c

In [None]:
a = np.array([1, 2, 3, 4])
b = np.array([4, 2, 2, 4])

In [None]:
a + b

In [None]:
a == b

In [None]:
a > b

In [None]:
a.dot(b)

In [None]:
a = np.array([[1, 2, 3], [3, 4, 5]])
b = np.array([1,1,1])

In [None]:
a.shape

In [None]:
b.shape

In [None]:
a.dot(b)

In [None]:
a.T

In [None]:
a.conj()

In [None]:
b = a.flatten()
b

In [None]:
a

In [None]:
np.ravel(a)

In [None]:
a

In [None]:
a = np.array([1 , 1 , 0 , 0 ], dtype = bool)
b = np.array([1 , 0 , 1 , 0 ], dtype = bool)

In [None]:
np.logical_or(a, b)

In [None]:
np.logical_and(a, b)

In [None]:
a = np.array([4, 3, 1, 2])
a

In [None]:
a.sum()

In [None]:
d = np.arange(12).reshape(3,4)

In [None]:
d

In [None]:
# row-wise
d.sum(axis=0)

In [None]:
# column-wise
d.sum(axis=1)

### Extremes and sorting

In [None]:
x = np.array([1 , 3 , 2 ])

In [None]:
print(x.min())
print(x.argmin())

In [None]:
print(x.max())
print(x.argmax())

In [None]:
a = np.array([4, 3, 1, 2])
np.argsort(a)

In [None]:
a = np.array([[4 , 3 , 5 ], [1 , 2 , 1 ]])
a

In [None]:
np.sort(a, axis=0)

In [None]:
np.sort(a, axis=1)

## Basic statistics

In [None]:
x = np.array([1 , 2 , 3 , 1 ])
y = np.array([[1 , 2 , 3 ], [5 , 6 , 1 ]])

In [None]:
x.mean()

In [None]:
np.median(x)

In [None]:
np.median(y, axis = - 1 ) # last axis

In [None]:
x.std() # full population standard dev.

## Questions

In [None]:
np.all([ True, True, False ])

In [None]:
np.any([ True, True, False ])

In [None]:
a = np.array([1 , 2 , 3 , 2 ])
b = np.array([2 , 2 , 3 , 2 ])
c = np.array([6 , 4 , 4 , 5 ])
((a <= b) & (b <= c)).all()

In [None]:
x = np.array([4,0,-1,0,12,9])
x

In [None]:
indices = np.nonzero(x)
indices

In [None]:
x[indices]

In [None]:
x < 5

In [None]:
np.nonzero(x < 5)

In [None]:
x = np.arange(12).reshape(3,4)

In [None]:
rows, columns = np.where(x < 5)
print(rows)
print(columns)

In [None]:
x[rows, columns]

In [None]:
ix = np.isin(x, [3, 4, 7])
print(ix)

In [None]:
np.where(ix)

In [None]:
np.where(x < 5, x, -1)               # Note: broadcasting.

In [None]:
np.where([[True, False], [True, True]],
          [[1, 2], [3, 4]],
          [[9, 8], [7, 6]])

### Broadcasting

<img src='../img/numpy_broadcasting.png'>

In [None]:
b = np.array([0 , 1 , 2 ])

In [None]:
b

In [None]:
a = np.arange(0 , 40, 10)
a.shape

In [None]:
a

In [None]:
a + b

In [None]:
a = a[:, np. newaxis] # adds a new axis -> 2D array
a.shape

In [None]:
a

In [None]:
a + b

In [None]:
np.random.shuffle(x)
x

### Mesh

In [None]:
x, y = np.ogrid[ 0:5 , 0:5 ]
x, y

In [None]:
x.shape, y.shape

In [None]:
np.sqrt(x**2 + y**2)

In [None]:
x, y = np.mgrid[0:5, 0:5]

In [None]:
x

In [None]:
y

In [None]:
np.sqrt(x**2 + y**2)

In [None]:
aa, bb, cc = np.ogrid[0:1:24j, 0:1:12j, 0:1:6j]

## Array manipulation

### tile

In [None]:
np.tile?

In [None]:
a = np.array([0, 1, 2])
np.tile(a, 2)

In [None]:
np.tile(a, (2,3))

### repeat

In [None]:
np.repeat?

In [None]:
np.repeat(3, 4)

In [None]:
x = np.array([[1,2],[3,4]])
x

In [None]:
np.repeat(x, 2)

In [None]:
np.repeat(x, 3, axis=0)

In [None]:
np.repeat(x, 3, axis=1)

In [None]:
# specify repeat count for each element
np.repeat(x, (3,2), axis=0)

### concatenate

In [None]:
np.concatenate((np.arange(3),np.arange(3)), axis=0)

In [None]:
np.concatenate((np.arange(3),np.arange(3)), axis=1)

In [None]:
a = np.arange(12).reshape((3,4))
a

In [None]:
b = np.arange(4).reshape(1,4)
b

In [None]:
np.concatenate((a,b), axis=0)

In [None]:
c = np.arange(3).reshape(3,1)
c

In [None]:
np.concatenate((a,c), axis=1)

### hstack vstack

In [None]:
def iprint(seq, sep='--------'):
    for item in seq:
        print(item)
        print(sep)

In [None]:
iprint([1,2,3])

In [None]:
arrays = [np.random.randint(0,10, (2, 3)) for i in range(3)]

In [None]:
iprint(arrays)

In [None]:
h = np.hstack(arrays)
h

In [None]:
h.shape

In [None]:
v = np.vstack(arrays)
v

In [None]:
v.shape

### stack

In [None]:
arrays = [np.random.randint(0,10, (2, 4)) for i in range(3)]

In [None]:
iprint(arrays)

In [None]:
s0 = np.stack(arrays, axis=0)
s0

In [None]:
s0.shape

In [None]:
s1 = np.stack(arrays, axis=1)
s1

In [None]:
s1.shape

In [None]:
s2 = np.stack(arrays, axis=2)
s2

In [None]:
s2.shape

In [None]:
a = np.array((1,2,3))
b = np.array((2,3,4))
np.column_stack((a,b))

In [None]:
np.stack((a,b), axis=1)

### split

In [None]:
np.split?

In [None]:
x = np.arange(9)
np.split(x, 3)

In [None]:
x = np.arange(8)
np.split(x, [3, 5, 6, 10])

### array_split

For an array of length L that should be split into n sections, it returns L % n sub-arrays of size L//n + 1 and the rest of size L//n.

In [None]:
np.split(np.arange(9), 4)

In [None]:
np.array_split(np.arange(9), 4)

### hsplit vsplit

In [None]:
x = np.arange(24).reshape(6, 4)
x

In [None]:
iprint(np.hsplit(x, 2))

In [None]:
iprint(np.hsplit(x, np.array([3, 6])))

In [None]:
iprint(np.vsplit(x, 2))

In [None]:
iprint(np.vsplit(x, np.array([2, 4])))

## IO

#### Byte file

In [7]:
x = np.arange(100).reshape((10,10))

In [5]:
np.save?

In [None]:
np.save('./data/x.npy', x)

In [None]:
y = np.load('./data/x.npy')

In [None]:
y

In [None]:
np.all(x==y)

#### Text file

In [None]:
x = np.arange(100).reshape((10,10))

In [None]:
np.savetxt('./data/x.txt', x)

In [None]:
y = np.loadtxt('./data/x.txt')

In [None]:
np.all(x==y)

In [13]:
np.savetxt('./data/array2D.csv', x, fmt='%.6f', delimiter=';')

In [14]:
np.loadtxt('./data/array2D.csv',delimiter=';')

array([[ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.],
       [10., 11., 12., 13., 14., 15., 16., 17., 18., 19.],
       [20., 21., 22., 23., 24., 25., 26., 27., 28., 29.],
       [30., 31., 32., 33., 34., 35., 36., 37., 38., 39.],
       [40., 41., 42., 43., 44., 45., 46., 47., 48., 49.],
       [50., 51., 52., 53., 54., 55., 56., 57., 58., 59.],
       [60., 61., 62., 63., 64., 65., 66., 67., 68., 69.],
       [70., 71., 72., 73., 74., 75., 76., 77., 78., 79.],
       [80., 81., 82., 83., 84., 85., 86., 87., 88., 89.],
       [90., 91., 92., 93., 94., 95., 96., 97., 98., 99.]])

## Advanced Operations

#### Vectorize

In [None]:
def myfunc(a, b):
    "Return a-b if a>b, otherwise return a+b"
    if a > b:
        return a - b
    else:
        return a + b

In [None]:
vfunc = np.vectorize(myfunc)

In [None]:
vfunc([1, 2, 3, 4], 2)

In [None]:
a = np.arange(6).reshape(3,2)
a

In [None]:
vfunc(a, 2)

In [None]:
vfunc(2, [1, 2, 3, 4])

In [None]:
vfunc([1, 2, 3, 4], [1, 2, 3, 4])

#### Apply along axis

In [None]:
np.apply_along_axis?

In [None]:
def my_func(a):
    """Average first and last element of a 1-D array"""
    return (a[0] + a[-1]) * 0.5

In [None]:
a = np.array([[1,2,3], [4,5,6], [7,8,9]])
a

In [None]:
np.apply_along_axis(my_func, axis=0, arr=a)

In [None]:
np.apply_along_axis(my_func, axis=1, arr=a)

In [None]:
np.apply_along_axis?

#### Apply along axis

In [None]:
np.apply_over_axes?

In [None]:
a = np.arange(24).reshape(2,3,4)

In [None]:
a.shape

In [None]:
b = np.apply_over_axes(np.sum, a, [0,2])
b

In [None]:
b.shape

In [None]:
np.sum(a, axis=(0,2), keepdims=True)

### Structured data types

In [None]:
a = np.arange(24).reshape(2,3,4)
a

In [None]:
a[0,...]

In [None]:
a[:,:,0]

In [None]:
samples = np.zeros((6,), dtype= [('sensor_code', 'S4'),
                                 ('position', float), 
                                 ('value', float)])

In [None]:
print samples.ndim
print samples.shape
print samples.dtype.names

In [None]:
samples[:] = [('ALFA', 1, 0.37), 
              ('BETA', 1, 0.11),
              ('TAU', 1, 0.13),
              ('ALFA', 1.5, 0.37),
              ('ALFA', 3, 0.11),
              ('TAU', 1.2, 0.13)]

In [None]:
samples

In [None]:
samples['sensor_code']

In [None]:
samples['value']

In [None]:
samples[ 0 ]

### Masked arrays

In [None]:
x = np.ma.array([1 , 2 , 3 , 4 ], mask= [ 0 , 1 , 0 , 1 ])
x

In [None]:
np.ma.sqrt([ 1 , - 1 , 2 , - 2 ])

### Polynomials

In [None]:
p = np.poly1d([3 , 2 , - 1 ])
p(0)

In [None]:
p.roots

In [None]:
print(p.coeffs)
print(p.order)

In [None]:
x = np.linspace(- 1 , 1 , 2000)
y = np.cos(x) + 0.3 * np.random.rand(2000)
p = np.polynomial.Chebyshev.fit(x, y, 90)

In [None]:
p.degree()

### Exercises

#### Mesh and vectorize

In [None]:
frequency = np.arange(1,6)

In [None]:
ratioMesh = np.outer(frequency, 1.0 / frequency)
ratioMesh

In [None]:
zeta = 0.05

In [None]:
transmissibility = lambda r: (abs((1 + 1j * 2 * zeta * r) / (1 - r ** 2 + 1j * 2 * zeta * r)))

In [None]:
transmissibility(0.5)

In [None]:
transmissibility_vec = np.vectorize(TR)

In [None]:
transmissibility_vec(ratioMesh)

#### Simple dataset operations

In [None]:
data = np.loadtxt('data/populations.txt')
year, hares, lynxes, carrots = data.T # trick: columns to variables

In [None]:
data

In [None]:
plt.axes([0.2 , 0.1 , 0.5 , 0.8 ])
plt.plot(year, hares, year, lynxes, year, carrots)
plt.legend(('Hare ', 'Lynx' , ' Carrot'), loc = ( 1.05, 0.5))

In [None]:
populations = data[:, 1 :]
populations.mean(axis=0)

In [None]:
populations.std(axis=0)

In [None]:
np.argmax(populations, axis=1)

#### Distance

In [None]:
a = np.random.rand(30).reshape((10,3))

In [None]:
inds = np.argsort(np.abs(a-0.5))

In [None]:
a[range(10), inds[:,0]]

#### Simple image operations

In [None]:
from scipy import misc
face = misc.face(gray=True) # 2D grayscale image

In [None]:
plt.imshow(face, cmap=plt.cm.gray)

In [None]:
sy, sx = face.shape
y, x = np.ogrid[0:sy, 0:sx] # x and y indices of pixels

centerx, centery = ( 660 , 300 ) # center of the image
mask = ((y - centery)**2 + (x - centerx)**2 ) > 200**2 # circle

In [None]:
face[mask] = 0
plt.imshow(face, cmap=plt.cm.gray)

## Scribbles

In [None]:
dm = np.random.rand(3,3)

In [None]:
dm

In [None]:
np.cov(dm)

In [None]:
np.cov?

In [None]:
a = np.arange(10)

In [None]:
a[:,np.newaxis]

In [None]:
a.shape

In [None]:
np.insert?

In [None]:
a = np.array([[1, 1], [2, 2], [3, 3]])

In [None]:
np.insert(a, 2, 5, axis=0)


In [None]:
np.insert(a, 2, 5, axis=1)


In [None]:
def sliding_mean(x, N):
    cumsum = np.cumsum(np.insert(x,0,0))
    return (cumsum[N:] - cumsum[:-N])/N

In [None]:
def sliding_mean2(x, N):
    cumsum = np.cumsum(np.insert(x,0,0))
    return (cumsum[N:] - cumsum[:-N])/N

In [None]:
raw_array = np.random.random(100)

In [None]:
import matplotlib.pyplot as plt
import seaborn

%matplotlib notebook

# Set seaborn aesthetic parameters to defaults
seaborn.set()
seaborn.set_style('whitegrid')

In [None]:
plt.figure(figsize=(12, 8))
plt.plot(raw_array, label='Raw')
#plt.plot(np.cumsum(raw_array))
plt.plot(sliding_mean(raw_array, 10), label='Sliding Mean')
plt.legend()
plt.show()

In [None]:
sliding_mean(raw_array, 10)

In [None]:
matrice = np.random.random(100).reshape(25,4)

In [None]:
matrice

In [None]:
matrice.mean(axis=0)

In [None]:
sliding_mean(matrice, 10)