In [2]:
# structure data: structured arrays
# structuerd arrays and record arrays provide efficient storage for compound heterogeneous data
import numpy as np

In [7]:
name = ["Alice", "Bob", "Cathy", "Doug"]
age = [25, 45, 37, 19]
weight = [55.0, 85.5, 68.0, 61.5]

# use a compound data type for structured arrays
# name: U10 = unicode string of max len 10
# age: i4 = 4 byte integer
# weight: f8 = 8 byte float
data = np.zeros(4, dtype={"names": ("name", "age", "weight"),
                          "formats":("U10", "i4", "f8")})
data["name"] = name
data["age"] = age
data["weight"] = weight

# can refer to values by index or name
data["name"] # get all the names
data[0] # get first row of data
data[-1]["name"] # get the name from the last row

# can also use boolean masking
data[data["age"] < 30]["name"] # get all names where age is under 30



array([('Alice', 25, 55. ), ('Bob', 45, 85.5), ('Cathy', 37, 68. ),
       ('Doug', 19, 61.5)],
      dtype=[('name', '<U10'), ('age', '<i4'), ('weight', '<f8')])

In [8]:
# numpy data types
'''
'b' Byte                   np.dtype('b')
'i' Signed integer         np.dtype('i4') == np.int32
'u' Unsigned integer       np.dtype('u1') == np.uint8
'f' Floating point         np.dtype('f8') == np.int64
'c' Complex floating point np.dtype('c16') == np.complex128
'S' , 'a' String           np.dtype('S5')
'U' Unicode string         np.dtype('U') == np.str_
'V' Raw data (void)        np.dtype('V') == np.void
'''

"\n'b' Byte                   np.dtype('b')\n'i' Signed integer         np.dtype('i4') == np.int32\n'u' Unsigned integer       np.dtype('u1') == np.uint8\n'f' Floating point         np.dtype('f8') == np.int64\n'c' Complex floating point np.dtype('c16') == np.complex128\n'S' , 'a' String           np.dtype('S5')\n'U' Unicode string         np.dtype('U') == np.str_\n'V' Raw data (void)        np.dtype('V') == np.void\n"

In [9]:
# more advanced compound types
# create at ype where each element contains an array of matrix of values
tp = np.dtype([("id", "i8"), ("mat", "f8", (3, 3))])
X = np.zeros(1, dtype=tp)
X

array([(0, [[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]])],
      dtype=[('id', '<i8'), ('mat', '<f8', (3, 3))])

In [11]:
# also have structured arrays
# can be accesses as attributes rather than dictionary keys
data_rec = data.view(np.recarray)
data_rec.age

# some extra overehad invovled with accessing these fields though

array([25, 45, 37, 19], dtype=int32)