# Structured Data : NumPy's Structured Arrays

In [1]:
import numpy as np

name = ['Alice', 'Bob', 'Cathy', 'Dough']
age = [25, 45, 37, 19]
weight = [55.0, 85.5, 68.0, 61.5]

In [2]:
''' Use compound data type for structured arrays
                                                                           -> Unicode string max length 10  
                                                                          |       -> 4 byte integer
                                                                          |      |      -> 8 byte float
                                                                          |      |     |                   ''';
data = np.zeros(4, dtype={'names':('name', 'age', 'weight'), 'formats':('U10', 'i4', 'f8')})

print(data.dtype)

[('name', '<U10'), ('age', '<i4'), ('weight', '<f8')]


In [3]:
data['name'] = name
data['age'] = age
data['weight'] = weight
print(data)

[('Alice', 25, 55. ) ('Bob', 45, 85.5) ('Cathy', 37, 68. )
 ('Dough', 19, 61.5)]


In [4]:

data[0]

('Alice', 25, 55.)

In [5]:
data[1]

('Bob', 45, 85.5)

In [6]:
# Indexing and slicing works the same
data[-1]['name']

'Dough'

In [7]:
# You can do boolean operations too
data[data['age'] <30]['name']

array(['Alice', 'Dough'], dtype='<U10')

### Creating Structured Arrays

In [8]:
# There are a few ways to assign data types to a structured array

# Dictionary Method with NumPy dtypes:

np.dtype({'names':('name', 'age', 'weight'), 'formats':('U10', 'i4', 'f8')})

dtype([('name', '<U10'), ('age', '<i4'), ('weight', '<f8')])

In [9]:
# Datatypes can also be specified using the Python dtypes

np.dtype({'names':('name', 'age', 'weight'), 'formats':((np.str_,10), int, np.float32)})

dtype([('name', '<U10'), ('age', '<i8'), ('weight', '<f4')])

In [10]:
# A list of tuples can also be used to specify the data types

np.dtype([('name', 'S10'), ('age', 'i4'), ('weight', 'f8')])

dtype([('name', 'S10'), ('age', '<i4'), ('weight', '<f8')])

In [11]:
# If you don't need to add names you can just put the datatypes in a comma separated string

np.dtype('S10,i4,f8')


dtype([('f0', 'S10'), ('f1', '<i4'), ('f2', '<f8')])

### Advanced Compound Datatypes

In [12]:
# You can set an array element to be a matrix

tp = np.dtype([('id', 'i8'), ('mat', 'f8', (3,3))])
X = np.zeros(1, dtype=tp)
print(X[0])
print(X['mat'][0])

(0, [[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]])
[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


### Record Arrays

In [13]:
# A record array allows you to call a row or column as an object property rather than a dictionary key

# Record Arrays cost extra time to access compared to a typical structured array.

data_rec = data.view(np.recarray)
data_rec.age

array([25, 45, 37, 19], dtype=int32)