# Structured Data: Numpy's Structured Arrays

In [1]:
import numpy as np

In [2]:
name = ['Alice', 'Bob', 'Cathy', 'Doug']
age = [25, 45, 37, 19]
weight = [55.0, 85.0, 68.0, 61.5]

Not functional: you can't correlate any of the items

In [3]:
x = np.zeros(4, dtype=int)

In [5]:
# Use a compound data type for structured arrays
data = np.zeros(4, dtype={'names':('name', 'age', 'weight'), 'formats':('U10', 'i4', 'f8')})
print(data.dtype)

[('name', '<U10'), ('age', '<i4'), ('weight', '<f8')]


* U10 = Unicode string of maximum lenght 10
* i4 = 4-byte integer
* f8 = 8-byte float

In [6]:
data['name'] = name
data['age'] = age
data['weight'] = weight
print(data)

[('Alice', 25, 55. ) ('Bob', 45, 85. ) ('Cathy', 37, 68. )
 ('Doug', 19, 61.5)]


In [7]:
# Get all names
data['name']

array(['Alice', 'Bob', 'Cathy', 'Doug'], dtype='<U10')

In [8]:
# Get first row of data
data[0]

('Alice', 25, 55.)

In [9]:
# Get the name from the last row
data[-1]['name']

'Doug'

In [10]:
# Get names where age is under 30
data[data['age'] < 30]['name']

array(['Alice', 'Doug'], dtype='<U10')

## Creating Structured Arrays

Dictionaru method:

In [11]:
np.dtype({'names': ('name', 'age', 'weight'), 'formats': ('U10', 'i4', 'f8')})

dtype([('name', '<U10'), ('age', '<i4'), ('weight', '<f8')])

Numerical types can be specified using Python types or NumPy dtypes instead

In [15]:
np.dtype({'names': ('name', 'age', 'weight'), 'formats':((np.str_, 10), int, np.float)})

dtype([('name', '<U10'), ('age', '<i4'), ('weight', '<f8')])

A compund type can also be specified as a list of tuples:

In [16]:
np.dtype([('name', 'S10'), ('age', 'i4'), ('weight', 'f8')])

dtype([('name', 'S10'), ('age', '<i4'), ('weight', '<f8')])

In [17]:
np.dtype('S10, i4, f8') #name of the types do not matter

dtype([('f0', 'S10'), ('f1', '<i4'), ('f2', '<f8')])

* Character  	Description	           Example
* 'b'	        Byte	               np.dtype('b')
* 'i'	        Signed integer	       np.dtype('i4') == np.int32
* 'u'	        Unsigned integer       np.dtype('u1') == np.uint8
* 'f'	        Floating point	       np.dtype('f8') == np.int64
* 'c'	        Complex floating point np.dtype('c16') == np.complex128
* 'S', 'a'	    String	               np.dtype('S5')
* 'U'	        Unicode string	       np.dtype('U') == np.str_
* 'V'	        Raw data (void)	       np.dtype('V') == np.void

## More Advanced Compound Types

In [19]:
tp = np.dtype([('id', 'i8'), ('mat', 'f8', (3, 3))])
X = np.zeros(1, dtype=tp)
print(X[0])
print(X['mat'][0])

(0, [[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]])
[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


## RecordArrays: Structured Arrays with a Twist

In [20]:
data['age']

array([25, 45, 37, 19])

In [21]:
data_rec = data.view(np.recarray) #view the array as an record
data_rec.age

array([25, 45, 37, 19])

In [22]:
%timeit data['age']
%timeit data_rec['age']
%timeit data_rec.age

120 ns ± 1.49 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)
2.63 µs ± 144 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
3.39 µs ± 99.7 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
