# Welcome to the Dark Art of Coding:
## Introduction to Python
numpy module, part I

<img src='../../images/dark_art_logo.600px.png' width='300' style="float:right">

In [4]:
# Numpy is the fundamental package for scientific computing in Python.
# It provides:
#   a multidimensional array object
#   a collection of functions that perform operations including:
#       mathematical,
#       logical, 
#       shape manipulation, 
#       sorting, 
#       selection, 
#       Fourier transforms, 
#       linear algebra, 
#       statistical 
#       etc.

In [5]:
# Several important items to note:
#   NumPy arrays are a fixed size (note: Python's lists are not).
#   Elements in a NumPy array are required to be the same type
#   NumPy arrays execute vector mathematics/transforms without the need 
#        for 'for loops' resulting in a performance and efficiency improvement
#   Most scientific/math libraries use NumPy under the hood.
# 

# Simple array range
---

In [6]:
import numpy as np

In [7]:
a = np.arange(42)
a

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41])

## Shape manipulation

In [8]:
a1 = a.reshape(6, 7)
a1

array([[ 0,  1,  2,  3,  4,  5,  6],
       [ 7,  8,  9, 10, 11, 12, 13],
       [14, 15, 16, 17, 18, 19, 20],
       [21, 22, 23, 24, 25, 26, 27],
       [28, 29, 30, 31, 32, 33, 34],
       [35, 36, 37, 38, 39, 40, 41]])

In [9]:
# With any given array, we can explore the various characteristics

print('number of dimenstions:', a1.ndim)
print('shape of the array:', a1.shape)
print('size of the array:', a1.size)
print('datatype:', a1.dtype)
print('size in bytes of each element:', a1.itemsize)
print('type of object:', type(a1))

number of dimenstions: 2
shape of the array: (6, 7)
size of the array: 42
datatype: int64
size in bytes of each element: 8
type of object: <class 'numpy.ndarray'>


In [10]:
# numpy does a best guess approach to determining a suitable datatype
#     when you create a new numpy array.

f = np.array([1.0, 2.1, 3.2])
print(f.dtype)
f

float64


array([1. , 2.1, 3.2])

In [11]:
# You can create multidimensional arrays manually...

dim2 = np.array([[1.0, 2.1, 3.2], [4.3, 5.4, 6.5]])
dim2

array([[1. , 2.1, 3.2],
       [4.3, 5.4, 6.5]])

In [12]:
# You can prescribe the datatype manually

i8 = np.array([[1, 2, 3], [7, 8, 9]], dtype='int8')
i8.dtype

dtype('int8')

In [13]:
# There a several predefined methods that allow you to create 
#     specialized OR commonly used arrays.

# An array composed of all zeros:

np.zeros((4,5))

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [14]:
# An array composed of all ones:

np.ones((2, 3, 12))

array([[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]],

       [[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]])

In [15]:
# An empty array is not exactly empty...
#     Each of the stored values varies widely and is somewhat random

np.empty((4, 3))

array([[2.68156159e+154, 2.68156159e+154, 5.92878775e-323],
       [0.00000000e+000, 0.00000000e+000, 0.00000000e+000],
       [0.00000000e+000, 0.00000000e+000, 0.00000000e+000],
       [0.00000000e+000, 0.00000000e+000, 0.00000000e+000]])

In [16]:
np.empty((2, 3, 2), dtype=int)

array([[[6917529027641081856, 6917529027641081856],
        [                 12,                   0],
        [                  0,                   0]],

       [[                  0,                   0],
        [                  0,                   0],
        [                  0,                   0]]])

In [17]:
# Much like Python ranges, numpy ranges can have
#     starting points, ending points and increments

np.arange(2, 20, 3)

array([ 2,  5,  8, 11, 14, 17])

In [18]:
# Unlike Python ranges, numpy aranges can increment by float values

np.arange(2, 10, 0.3)

array([2. , 2.3, 2.6, 2.9, 3.2, 3.5, 3.8, 4.1, 4.4, 4.7, 5. , 5.3, 5.6,
       5.9, 6.2, 6.5, 6.8, 7.1, 7.4, 7.7, 8. , 8.3, 8.6, 8.9, 9.2, 9.5,
       9.8])

In [19]:
# It is also possible to create arrays with a set number of values
#     and allow numpy to identify the values for us.

np.linspace(2, 10, 30)

array([ 2.        ,  2.27586207,  2.55172414,  2.82758621,  3.10344828,
        3.37931034,  3.65517241,  3.93103448,  4.20689655,  4.48275862,
        4.75862069,  5.03448276,  5.31034483,  5.5862069 ,  5.86206897,
        6.13793103,  6.4137931 ,  6.68965517,  6.96551724,  7.24137931,
        7.51724138,  7.79310345,  8.06896552,  8.34482759,  8.62068966,
        8.89655172,  9.17241379,  9.44827586,  9.72413793, 10.        ])

In [20]:
# When printing an array, the values will display slightly differently:

i = np.array([7, 13, 42, 99])

print(i)

[ 7 13 42 99]


In [21]:
j = np.ones(4)

print(j)

[1. 1. 1. 1.]


In [22]:
# numpy arrays can be subtracted 

k = i - j
print(k)

[ 6. 12. 41. 98.]


In [23]:
# or added

k2 = i + j
print(k2)

[  8.  14.  43. 100.]


In [24]:
x = np.array([[3, 4],
              [5, 6]])

In [25]:
y = np.array([[7, 8],
              [9, 0]])

In [26]:
# Addition of multidimensional arrays works just as 
#     expected:

print(x * y)

[[21 32]
 [45  0]]


In [27]:
# matrix multiplication is also simple and convenient:

prices = np.array([3.49, 4.49, 3.99])

sales = np.array([[5, 6, 7, 6, 5],
                  [10, 11, 11, 11, 12],
                  [7, 6, 7, 6, 7]])

prices.dot(sales)

# output 1:  3.49 * 5 + 4.49 * 10 + 3.99 * 7 (total sales on day 1: 90.28)
# output 2:  3.49 * 6 + 4.49 * 11 + 3.99 * 6 (total sales on day 2: 94.27)
# and so on...

array([ 90.28,  94.27, 101.75,  94.27,  99.26])

In [28]:
# Scalar multiplication also works as expected

u = np.ones((4, 3), dtype=int)
u *= 5
u

array([[5, 5, 5],
       [5, 5, 5],
       [5, 5, 5],
       [5, 5, 5]])

In [29]:
# numpy arrays also allow you to create random numbers:

r = np.random.random((3,4))
r

array([[0.16939891, 0.49186988, 0.34277433, 0.26901334],
       [0.61568432, 0.26861894, 0.5548106 , 0.57892899],
       [0.14327459, 0.38418056, 0.6912007 , 0.19464466]])

In [30]:
# numpy arrays come with a wide collection of methods 
#     that allow you to identify characteristics of the
#     arrays

print('Max:', r.max(), '\n')
print('Min:', r.min(), '\n')
print('Sum:', r.sum(), '\n')

Max: 0.6912007036903437 

Min: 0.14327459251467545 

Sum: 4.704399812185196 



In [31]:
i = np.arange(12).reshape(4, 3)
print(i)

[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]


In [32]:
# Summations can be by column (axis 0) OR by row (axis 1)...

print('Sum (by col):', i.sum(axis=0), '\n')
print('Sum (by row):', i.sum(axis=1), '\n')

Sum (by col): [18 22 26] 

Sum (by row): [ 3 12 21 30] 



In [33]:
# numpy also provides access to Universal functions that can be 
#     applied to given arrays:

print(np.sin(i))

[[ 0.          0.84147098  0.90929743]
 [ 0.14112001 -0.7568025  -0.95892427]
 [-0.2794155   0.6569866   0.98935825]
 [ 0.41211849 -0.54402111 -0.99999021]]


In [34]:
print(np.sqrt(i))

[[0.         1.         1.41421356]
 [1.73205081 2.         2.23606798]
 [2.44948974 2.64575131 2.82842712]
 [3.         3.16227766 3.31662479]]


In [35]:
print(np.mean(i, axis=1))

[ 1.  4.  7. 10.]


In [36]:
c = np.arange(8) ** 0.5

In [37]:
# numpy arrays can be indexed, sliced and sliced incrementally

print(c[2])
print(c[2:6])
print(c[3:7:2])

1.4142135623730951
[1.41421356 1.73205081 2.         2.23606798]
[1.73205081 2.23606798]


In [38]:
# numpy allows you to make assignment based on indexes

c[1] = 1000
c

array([   0.        , 1000.        ,    1.41421356,    1.73205081,
          2.        ,    2.23606798,    2.44948974,    2.64575131])

In [39]:
# numpy arrays can be assigned 

c[:7:2] = 99
print(c)

[  99.         1000.           99.            1.73205081   99.
    2.23606798   99.            2.64575131]


In [40]:
m = np.array([[11, 12, 13],
              [21, 22, 23],
              [31, 32, 33]])

In [41]:
# Arrays can be indexed across multiple axis at the same time...
# Row zero

m[0]

array([11, 12, 13])

In [42]:
# Row one

m[1]

array([21, 22, 23])

In [43]:
# Row zero, element zero

m[0, 0]

11

In [44]:
# Row one, element one

m[1, 1]

22

In [45]:
# Row one, all elements from 0 to 2 (not including 2)
m[1, 0:2]

array([21, 22])

In [46]:
# Row one, all elements (it is possible to index outside the range)
#     numpy will simply return all values

m[1, 0:4]

array([21, 22, 23])

In [47]:
# Extraction of columns is possible, as well
#     To extract a specific column, simply slice across all rows at
#     a particular index
# The following extracts all elements from the one-th column

print(m[0:3, 1])

[12 22 32]


In [48]:
# Much like Python, you can simply use a colon as a placeholder
#     to select all rows.

print(m[:, 2])

[13 23 33]


In [49]:
# numpy arrays can be flattened using the ravel function:

m.ravel()

array([11, 12, 13, 21, 22, 23, 31, 32, 33])

In [50]:
# Let's reshape the array 'u'

u.shape = (2, 6)
u

array([[5, 5, 5, 5, 5, 5],
       [5, 5, 5, 5, 5, 5]])

In [51]:
# Arrays can be easily transposed

u.T

array([[5, 5],
       [5, 5],
       [5, 5],
       [5, 5],
       [5, 5],
       [5, 5]])

In [52]:
# Let's create two arrays

v1 = np.arange(6).reshape((2,3))
v2 = np.arange(10, 16).reshape((2,3))

print(v1)
print()
print(v2)

[[0 1 2]
 [3 4 5]]

[[10 11 12]
 [13 14 15]]


In [53]:
# numpy allows us to stack the data in the arrays vertically

np.vstack((v1, v2))

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [10, 11, 12],
       [13, 14, 15]])

In [54]:
# similarly, it is possible to stack arrays side-by-side
#     or horizontally

np.hstack((v1, v2))

array([[ 0,  1,  2, 10, 11, 12],
       [ 3,  4,  5, 13, 14, 15]])

In [76]:
# Splitting an array

split = np.array([11, 12, 13, 21, 22, 23, 31, 32, 33, 41, 42, 43])

post_split = np.split(split, [3, 6, 9])
post_split

[array([11, 12, 13]),
 array([21, 22, 23]),
 array([31, 32, 33]),
 array([41, 42, 43])]

In [83]:
hsplit = np.array([[11, 11, 22], 
                   [11, 11, 22]])


lsplit, rsplit = np.hsplit(hsplit, [2])


In [84]:
lsplit

array([[11, 11],
       [11, 11]])

In [85]:
rsplit

array([[22],
       [22]])