In [1]:
import numpy as np

Introduction

In [5]:
x = np.array([('Jack', 12, 61.5),('Sayem', 25, 72.6), ('Sojib', 27, 72.4)],
             dtype = [('name', 'U10'), ('age', 'i4'), ('weight', 'f4')])

In [6]:
x[1]

('Sayem', 25, 72.6)

In [7]:
x['age']

array([12, 25, 27])

Structured Datatypes

In [9]:
np.dtype([('x','f4'), ('y',np.float32), ('z', 'f4', (2,2))])

dtype([('x', '<f4'), ('y', '<f4'), ('z', '<f4', (2, 2))])

In [11]:
np.dtype([('x', 'f4'), ('', np.float32), ('z', 'f4', (2,2))])

dtype([('x', '<f4'), ('f1', '<f4'), ('z', '<f4', (2, 2))])

In [13]:
np.dtype('f4, i4, U10')

dtype([('f0', '<f4'), ('f1', '<i4'), ('f2', '<U10')])

In [14]:
np.dtype('3int8, float32, (2,3)float64')

dtype([('f0', 'i1', (3,)), ('f1', '<f4'), ('f2', '<f8', (2, 3))])

In [17]:
np.dtype({'names' : ['col1', 'col2'], 'formats':['i4', 'f4']})

dtype([('col1', '<i4'), ('col2', '<f4')])

In [18]:
np.dtype({'names': ['col1', 'col2'],
         'formats': ['i4','f4'],
         'offsets': [0, 4],
         'itemsize': 12})

dtype({'names':['col1','col2'], 'formats':['<i4','<f4'], 'offsets':[0,4], 'itemsize':12})

In [21]:
np.dtype({'col1': ('i1', 0), 'col2': ('f4', 1)})

dtype([('col1', 'i1'), ('col2', '<f4')])

Manipulating and Displaying Structured Datatypes

In [28]:
d = np.dtype([('x','f4'), ('y', 'S3')])
d.names

('x', 'y')

In [29]:
d.fields

mappingproxy({'x': (dtype('float32'), 0), 'y': (dtype('S3'), 4)})

In [33]:
def print_offsets(d):
    print('Offsets', [d.fields[name][1] for name in d.names])
    print('Itemsize', d.itemsize)
print_offsets(np.dtype('u1, u1, u1, i4, u1, i8'))

Offsets [0, 1, 2, 3, 7, 8]
Itemsize 16


In [34]:
print_offsets(np.dtype('u1, u1, u1, i4, u1, i8', align= True))

Offsets [0, 1, 2, 4, 8, 16]
Itemsize 24


In [35]:
np.dtype([(('my title', 'names'), 'f4')])

dtype([(('my title', 'names'), '<f4')])

In [36]:
np.dtype({'names': ('i4', 0, 'my title')})

dtype([(('my title', 'names'), '<i4')])

## Indexing and Assignment to Structured arrays
Assigning data to a Structured Array

In [38]:
d = np.array([(1,2,3), (4,2,6)], dtype='i8, f4, f8')
d

array([(1, 2., 3.), (4, 2., 6.)],
      dtype=[('f0', '<i8'), ('f1', '<f4'), ('f2', '<f8')])

In [39]:
d[1] = (9,6,0)
d

array([(1, 2., 3.), (9, 6., 0.)],
      dtype=[('f0', '<i8'), ('f1', '<f4'), ('f2', '<f8')])

Assignment from Scalars

In [50]:
x = np.zeros(3, dtype='i4, f4, ?, S1')
x[:] = 3
x

array([(3, 3.,  True, b'3'), (3, 3.,  True, b'3'), (3, 3.,  True, b'3')],
      dtype=[('f0', '<i4'), ('f1', '<f4'), ('f2', '?'), ('f3', 'S1')])

In [51]:
x[:] = np.arange(3)
x

array([(0, 0., False, b'0'), (1, 1.,  True, b'1'), (2, 2.,  True, b'2')],
      dtype=[('f0', '<i4'), ('f1', '<f4'), ('f2', '?'), ('f3', 'S1')])

Assignment from other Structured Arrays

In [55]:
x = np.zeros(3, dtype=[('a','i4'),('b', 'f4'), ('c', 'S1')])
y = np.zeros(3, dtype=[('x', 'f4'), ('y', 'f4'), ('z', 'i4')])
x[:] = y
x

array([(0, 0., b'0'), (0, 0., b'0'), (0, 0., b'0')],
      dtype=[('a', '<i4'), ('b', '<f4'), ('c', 'S1')])

Indexing Structured Arrays

In [57]:
x = np.array([(1,2),(4,7)], dtype=[('foo', 'i4'), ('bar', 'f4')])
x['foo']

array([1, 4])

In [59]:
x['foo'] = 10
x['foo']

array([10, 10])

In [61]:
x['bar'] = 12
x['bar']

array([12., 12.], dtype=float32)

In [64]:
x['bar'].shape, x['bar'].dtype, x['bar'].strides

((2,), dtype('float32'), (8,))

Accessing Multiple Fields

In [67]:
a = np.zeros(3, dtype=[('a', 'f4'), ('b', 'i8'), ('c', 'f4')])
a[['a', 'c']] = 1
a[['a', 'c']]

array([(1., 1.), (1., 1.), (1., 1.)],
      dtype={'names':['a','c'], 'formats':['<f4','<f4'], 'offsets':[0,12], 'itemsize':16})

In [73]:
from numpy.lib.recfunctions import repack_fields
repack_fields(a[['a','c']]).view('i8')

array([4575657222473777152, 4575657222473777152, 4575657222473777152],
      dtype=int64)

In [78]:
from numpy.lib.recfunctions import structured_to_unstructured
structured_to_unstructured(a[['a','c']])

array([[1., 1.],
       [1., 1.],
       [1., 1.]], dtype=float32)

Indexing with an Integer to get a Structured Scalar

In [79]:
x = np.array([(1, 2., 3.)], dtype='i, f, f')
scalar = x[0]
scalar

(1, 2., 3.)

In [81]:
x = np.array([(1, 2), (3, 4)], dtype=[('foo', 'i8'), ('bar', 'f4')])
s = x[0]
s['bar'] = 100
x

array([(1, 100.), (3,   4.)], dtype=[('foo', '<i8'), ('bar', '<f4')])

Viewing Structured Arrays Containing Objects

In [3]:
a = np.zeros(2, dtype=[('a', 'i4'), ('b', 'i4')])
b = np.ones(2, dtype=[('a', 'i4'), ('b', 'i4')])
a == b

array([False, False])

In [4]:
recordarry = np.rec.array([(1, 2, 'Jack'), (3, 4, 'Nayem')], dtype=[('foo', 'i4'), ('bar', 'f4'), ('baz', 'S10')])
recordarry

rec.array([(1, 2., b'Jack'), (3, 4., b'Nayem')],
          dtype=[('foo', '<i4'), ('bar', '<f4'), ('baz', 'S10')])

In [5]:
recordarry.bar

array([2., 4.], dtype=float32)

In [6]:
recordarry[0:1]

rec.array([(1, 2., b'Jack')],
          dtype=[('foo', '<i4'), ('bar', '<f4'), ('baz', 'S10')])

In [7]:
recordarry[0:1].baz

array([b'Jack'], dtype='|S10')

In [11]:
arr = np.array([(1, 2., 'Hello'), (2, 3., "World")], dtype=[('foo', 'i4'),('bar', 'f4'), ('baz', 'a10')])
recordarry = arr.view(dtype=np.dtype((np.record, arr.dtype)), type=np.recarray)
recordarry

rec.array([(1, 2., b'Hello'), (2, 3., b'World')],
          dtype=[('foo', '<i4'), ('bar', '<f4'), ('baz', 'S10')])

In [12]:
recordarr = arr.view(np.recarray)
recordarr.dtype

dtype((numpy.record, [('foo', '<i4'), ('bar', '<f4'), ('baz', 'S10')]))

In [13]:
arr2 = recordarr.view(recordarr.dtype.fields or recordarr.dtype, np.ndarray)
arr2

array([(1, 2., b'Hello'), (2, 3., b'World')],
      dtype=[('foo', '<i4'), ('bar', '<f4'), ('baz', 'S10')])

Recarray Helper Functions

In [14]:
from numpy.lib import recfunctions as rfn
b = np.array([(1, 2, 5), (4, 5, 7), (7, 8 ,11), (10, 11, 12)], dtype=[('x', 'i4'), ('y', 'f4'), ('z', 'f8')])
rfn.apply_along_fields(np.mean, b)

array([ 2.66666667,  5.33333333,  8.66666667, 11.        ])

In [16]:
rfn.drop_fields(b, 'x')

array([( 2.,  5.), ( 5.,  7.), ( 8., 11.), (11., 12.)],
      dtype=[('y', '<f4'), ('z', '<f8')])

In [23]:
ndtype = [('a', int)]
a = np.ma.array([1, 1, 1, 2, 2, 3, 3], mask=[0, 0, 1, 0, 0, 0, 1]).view(ndtype)
rfn.find_duplicates(a, ignoremask=True, return_index=True)

(masked_array(data=[(1,), (1,), (2,), (2,)],
              mask=[(False,), (False,), (False,), (False,)],
        fill_value=(999999,),
             dtype=[('a', '<i4')]),
 array([0, 1, 3, 4], dtype=int64))

In [27]:
ndtype = np.dtype([('a', '<i4'), ('b', [('ba', 'f8'), ('bb', '<i4')])])
rfn.flatten_descr(ndtype)

(('a', dtype('int32')), ('ba', dtype('float64')), ('bb', dtype('int32')))

In [30]:
ndtype = np.dtype([('A', int),
                  ('B', [('BA', int),
                        ('BB', [('BBA', int), ('BBB', int)])])])
rfn.get_fieldstructure(ndtype)

{'A': [],
 'B': [],
 'BA': ['B'],
 'BB': ['B'],
 'BBA': ['B', 'BB'],
 'BBB': ['B', 'BB']}