# Chapter 4_pt.2: structured NumPy Arrays

In [1]:
import numpy as np
dt = np.dtype([('Name', 'S10'), ('Age', 'i4'),
                ('Height', 'f'), ('Children/Pets', 'i4', 2)]) 

dt

dtype([('Name', 'S10'), ('Age', '<i4'), ('Height', '<f4'), ('Children/Pets', '<i4', (2,))])

In [2]:
dt = np.dtype({'names': ['Name', 'Age', 'Height', 'Children/Pets'],
               'formats':'O int float int,int'.split()})
dt

dtype([('Name', 'O'), ('Age', '<i8'), ('Height', '<f8'), ('Children/Pets', [('f0', '<i8'), ('f1', '<i8')])])

In [3]:
s = np.array([('Smith', 45, 1.83, (0, 1)),
              ('Jones', 53, 1.72, (2, 2))], dtype=dt)

s

array([('Smith', 45, 1.83, (0, 1)), ('Jones', 53, 1.72, (2, 2))],
      dtype=[('Name', 'O'), ('Age', '<i8'), ('Height', '<f8'), ('Children/Pets', [('f0', '<i8'), ('f1', '<i8')])])

In [4]:
type(s)

numpy.ndarray

In a sense, this construction comes quite close to the operation initializing tables in a SQL database: one has column names and column data types, with maybe some additional information

In [5]:
s['Name']

array(['Smith', 'Jones'], dtype=object)

In [6]:
s['Height'].mean()

np.float64(1.775)

In [7]:
s[0] # first record

np.void(('Smith', 45, 1.83, (0, 1)), dtype=[('Name', 'O'), ('Age', '<i8'), ('Height', '<f8'), ('Children/Pets', [('f0', '<i8'), ('f1', '<i8')])])

In [8]:
s[1]['Age']

np.int64(53)

## Vectorization of Code

### Basic Vectorization

In [9]:
np.random.seed(100)
r = np.arange(12).reshape((4, 3))   # 4 rows, 3 columns
s = np.arange(12).reshape((4, 3)) * 0.5 

r, s

(array([[ 0,  1,  2],
        [ 3,  4,  5],
        [ 6,  7,  8],
        [ 9, 10, 11]]),
 array([[0. , 0.5, 1. ],
        [1.5, 2. , 2.5],
        [3. , 3.5, 4. ],
        [4.5, 5. , 5.5]]))

In [10]:
r+s

array([[ 0. ,  1.5,  3. ],
       [ 4.5,  6. ,  7.5],
       [ 9. , 10.5, 12. ],
       [13.5, 15. , 16.5]])

In [11]:
r + 3         # add 3 to each element, broadcasting

array([[ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [12]:
2 * r

array([[ 0,  2,  4],
       [ 6,  8, 10],
       [12, 14, 16],
       [18, 20, 22]])

In [13]:
2 * r + 3

array([[ 3,  5,  7],
       [ 9, 11, 13],
       [15, 17, 19],
       [21, 23, 25]])

<br>

In [14]:
r

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [16]:
r.shape

(4, 3)

In [18]:
s = np.arange(0, 12, 4) # this means start at 0, stop before 12, step by 4
s

array([0, 4, 8])

In [19]:
r + s  # broadcasting again, s is treated as a column vector

array([[ 0,  5, 10],
       [ 3,  8, 13],
       [ 6, 11, 16],
       [ 9, 14, 19]])

In [20]:
s = np.arange(0, 12, 3) # this means start at 0, stop before 12, step by 3
s

array([0, 3, 6, 9])

In [21]:
r.transpose() + s  # now s is treated as a row vector

array([[ 0,  6, 12, 18],
       [ 1,  7, 13, 19],
       [ 2,  8, 14, 20]])

In [23]:
sr = s.reshape(-1, 1)
sr

array([[0],
       [3],
       [6],
       [9]])

In [24]:
sr.shape

(4, 1)

In [25]:
r + s.reshape(-1, 1)

array([[ 0,  1,  2],
       [ 6,  7,  8],
       [12, 13, 14],
       [18, 19, 20]])

custom-defined Python functions work with ndarray objects as well

In [26]:
def f(x):
    return 3 * x + 5
f(0.5)

6.5

In [27]:
f(r)

array([[ 5,  8, 11],
       [14, 17, 20],
       [23, 26, 29],
       [32, 35, 38]])

### Memory layout

In [28]:
x = np.random.standard_normal((1000000, 5))
y = 2 *x + 3
C = np.array((x, y), order = 'C')
F = np.array((x, y), order = 'F')

x = 0.0; y = 0.0

C[:2].round(2)   # Some numbers from the C object.

array([[[-1.75,  0.34,  1.15, -0.25,  0.98],
        [ 0.51,  0.22, -1.07, -0.19,  0.26],
        [-0.46,  0.44, -0.58,  0.82,  0.67],
        ...,
        [-0.05,  0.14,  0.17,  0.33,  1.39],
        [ 1.02,  0.3 , -1.23, -0.68, -0.87],
        [ 0.83, -0.73,  1.03,  0.34, -0.46]],

       [[-0.5 ,  3.69,  5.31,  2.5 ,  4.96],
        [ 4.03,  3.44,  0.86,  2.62,  3.51],
        [ 2.08,  3.87,  1.83,  4.63,  4.35],
        ...,
        [ 2.9 ,  3.28,  3.33,  3.67,  5.78],
        [ 5.04,  3.6 ,  0.54,  1.65,  1.26],
        [ 4.67,  1.54,  5.06,  3.69,  2.07]]], shape=(2, 1000000, 5))