In [1]:
from IPython import utils  
from IPython.core.display import HTML  
import os  
def css_styling():  
    """Load the CSS sheet 'custom.css' located in the directory"""
    styles = "<style>\n%s\n</style>" % (open('./custom.css','r').read())
    return HTML(styles)
css_styling()  

#Part 3 - Introduction to NumPy

At the contrary to languages like Matlab, IDL or Fortran, the support of powerfull array construct is not built-in in Python and require an external library: _NumPy_.

NumPy (pronounced 'Numb-pie') is the standard library for numerical arrays in the scientific Python. 

NumPy also understand both C and Fortran style arrays.

In [2]:
# we need to import the NumPy library in order to use it
import numpy as np  

#NumPy arrays
The basic type that NumPy provides in the N-dimensional array class _ndarray_.

In [3]:
# to create a NumPy array, call array() on a sequence 
my_array = np.array([0,1,2,3,4])

print(my_array)

[0 1 2 3 4]


In [4]:
# A NumPy array is not a Python list. 
type(my_array)

numpy.ndarray

In [5]:
# It is more similar to a Matlab array :
my_array + 1

array([1, 2, 3, 4, 5])

In [6]:
# multiplication operator acts elements
my_array * 2

array([0, 2, 4, 6, 8])

NumPy provides a number of ways to create arrays, similar to Matlab functions:

    arange([start,] stop[, step,]) # NumPy version of Python's arange()
    zeros(shape) # array of 0
    ones(shape)  # array of 1
    empty(shape) # Simply allocate memory without assigning it. 
            # Usefull if you are going to owerwrite them
            
    # create even linearly or logarithmically spaced grid of points
    # between a lower and upper bound that is inclusive on both ends.        
    linspace()
    logspace()

In [7]:
# creating an array going from 42 up to 142 by steps of 10
np.arange(42, 142, 10) 

array([ 42,  52,  62,  72,  82,  92, 102, 112, 122, 132])

In [8]:
np.ones(4) # the shape can be a single number (=lenght)

array([ 1.,  1.,  1.,  1.])

In [9]:
np.zeros((2,2)) # or a N-dimensional tuple

array([[ 0.,  0.],
       [ 0.,  0.]])

In [10]:
np.empty((2,3,4))

array([[[  6.94916636e-310,   1.01245118e-316,   6.94916675e-310,
           6.94913701e-310],
        [  6.94916672e-310,   6.94913701e-310,   6.94916675e-310,
           6.94913701e-310],
        [  6.94916672e-310,   6.94913701e-310,   6.94916675e-310,
           6.94913701e-310]],

       [[  6.94916672e-310,   6.94913701e-310,   6.94916675e-310,
           6.94913701e-310],
        [  6.94916672e-310,   6.94913701e-310,   6.94916675e-310,
           6.94913701e-310],
        [  6.94916672e-310,   6.94913701e-310,   6.94916675e-310,
           6.94913701e-310]]])

##dtypes
Because the `ndarray` object has been conceived to be performant (fast) for numerical operations, it can't mix types (at the contrary of Python lists). 

As a consequence, the type of a `ndarray` is fixed for all the elements of the array. 

The _dtype_ (for data type) is the most important `ndarray` attribute. The data type determines
the size and meaning of each element of the array.

The _dtypes_ all have string character codes, as a concise mechanism for specifying the
type. 

Examples: int, foat, complex, etc. Precision can be defined (int8, int32, in64, etc...)


In [11]:
my_array = np.array([1.5, 2, 5.9, 9.0]) # auto-guess
my_array.dtype

dtype('float64')

In [12]:
# forces dtype to integer
my_array = np.array([1.5, 2, 5.9, 9.0], dtype=int) 
print(my_array)
print(my_array.dtype)

[1 2 5 9]
int64


In [13]:
# note that 'j' or '1j' is the imaginary symbol in NumPy
my_array = np.array([1.5 + 5j, 2, 5.9 + 6*1j, 9.0], dtype=complex) 
print(my_array)
print(my_array.dtype)

[ 1.5+5.j  2.0+0.j  5.9+6.j  9.0+0.j]
complex128


#Mathematical Operations

In [14]:
# All mathematical operators (exp, log, sqrt, etc) or constants are included of the NumPy library. 
# They act on NumPy array and return NumPy arrays
x = np.linspace(-np.pi, np.pi, 101)
result = np.cos(x)
print(result)
type(result)

[ -1.00000000e+00  -9.98026728e-01  -9.92114701e-01  -9.82287251e-01
  -9.68583161e-01  -9.51056516e-01  -9.29776486e-01  -9.04827052e-01
  -8.76306680e-01  -8.44327926e-01  -8.09016994e-01  -7.70513243e-01
  -7.28968627e-01  -6.84547106e-01  -6.37423990e-01  -5.87785252e-01
  -5.35826795e-01  -4.81753674e-01  -4.25779292e-01  -3.68124553e-01
  -3.09016994e-01  -2.48689887e-01  -1.87381315e-01  -1.25333234e-01
  -6.27905195e-02   2.83276945e-16   6.27905195e-02   1.25333234e-01
   1.87381315e-01   2.48689887e-01   3.09016994e-01   3.68124553e-01
   4.25779292e-01   4.81753674e-01   5.35826795e-01   5.87785252e-01
   6.37423990e-01   6.84547106e-01   7.28968627e-01   7.70513243e-01
   8.09016994e-01   8.44327926e-01   8.76306680e-01   9.04827052e-01
   9.29776486e-01   9.51056516e-01   9.68583161e-01   9.82287251e-01
   9.92114701e-01   9.98026728e-01   1.00000000e+00   9.98026728e-01
   9.92114701e-01   9.82287251e-01   9.68583161e-01   9.51056516e-01
   9.29776486e-01   9.04827052e-01

numpy.ndarray

#Slicing
NumPy arrays have the same slicing semantics as Python list.

In [15]:
a = np.arange(8)
a

array([0, 1, 2, 3, 4, 5, 6, 7])

In [16]:
a[2:6] # from the third element to the 6th (excluded) by stride of 1 

array([2, 3, 4, 5])

In [17]:
a[1::3] # from the second element to the last by stride of 3

array([1, 4, 7])

NumPy array can be N-dimensional, so you may slice along any and all axes. You can even use multiple axes. 

In [18]:
# create a 4x4 matrix
a = np.arange(16)
a.shape = (4,4)
print(a)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]


In [19]:
# all the elements of the second row
a[1,:]

array([4, 5, 6, 7])

In [20]:
# all the elements of the second column
a[:,1]

array([ 1,  5,  9, 13])

In [21]:
# the first and third rows and third to fourth column
a[[0,2], 2:4]

array([[ 2,  3],
       [10, 11]])

The `ndarray` type is - as everything in Python - an object, with several attributes: 

In [22]:
# number of dimensions (int)
a.ndim

2

In [23]:
# Tuple of integers that represents the rank along each dimensions
a.shape

(4, 4)

In [24]:
# total number of elements (int)
a.size

16

As well as several methods. 

In [38]:
a.reshape((2,8))

array([[ 0,  1,  2,  3,  4,  5,  6,  7],
       [ 8,  9, 10, 11, 12, 13, 14, 15]])

In [57]:
b=np.array([1,4,70,2,5,8])
# get the indice where the maximum value is 
b.argmax()

2

In [39]:
a.max()

15

In [31]:
# matrix multiplication 
a.dot(a)

array([[ 56,  62,  68,  74],
       [152, 174, 196, 218],
       [248, 286, 324, 362],
       [344, 398, 452, 506]])

In [37]:
a.sum()

120

__Pay attention__: 
 
 * Operations that involve attributes or methods of `ndarray` occur _in-place_. While functions that take an `ndarray` as an argument return a modified copy.
 * With NumPy `ndarray`, a=b creates a new _reference_ to b, not a _copy_. 
 

In [65]:
b=a # b is a new reference to a.
b[0]=10
print(b)

[[10 10 10 10]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]


In [64]:
# Because b refers to a, modifyng b also modify a !
print(a)

[[10 10 10 10]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]


In [66]:
b=a.copy() 
b[0] = 20
print(b)
print(a) # a has not been modified.

[[20 20 20 20]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]
[[10 10 10 10]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]
