### Advanced Numpy

In [1]:
import warnings

warnings.filterwarnings('ignore')

In [2]:
import numpy as np

#### Block of memory

In [3]:
x = np.array([1, 2, 3], dtype=np.int32)
x.data

<memory at 0x000001F3BD4125C0>

In [4]:
bytes(x.data)

b'\x01\x00\x00\x00\x02\x00\x00\x00\x03\x00\x00\x00'

In [5]:
x.__array_interface__['data'][0]

2146365094592

In [6]:
x.__array_interface__

{'data': (2146365094592, False),
 'strides': None,
 'descr': [('', '<i4')],
 'typestr': '<i4',
 'shape': (3,),
 'version': 3}

In [7]:
x = np.array([1, 2, 3, 4])
y = x[:-1]
x[0] = 9
y

array([9, 2, 3])

In [8]:
y = np.frombuffer(x, dtype=np.int8)
y.data

<memory at 0x000001F3BD412740>

In [9]:
y.base is x

True

In [10]:
y.flags

  C_CONTIGUOUS : True
  F_CONTIGUOUS : True
  OWNDATA : False
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False

#### Data types

In [11]:
np.dtype(int).type

numpy.int32

In [12]:
np.dtype(int).itemsize

4

In [13]:
np.dtype(int).byteorder

'='

In [14]:
wav_header_dtype = np.dtype([("chunk_id", (bytes, 4)), # flexible-sized scalar type, item size 4
                             ("chunk_size", "<u4"), # little-endian unsigned 32-bit integer
                             ("format", "S4"), # 4-byte string
                             ("fmt_id", "S4"),
                             ("fmt_size", "<u4"),
                             ("audio_fmt", "<u2"), #
                             ("num_channels", "<u2"), # .. more of the same ...
                             ("sample_rate", "<u4"), #
                             ("byte_rate", "<u4"),
                             ("block_align", "<u2"),
                             ("bits_per_sample", "<u2"),
                             ("data_id", ("S1", (2, 2))), # sub-array, just for fun!
                             ("data_size", "u4"),
                             #
                             # the sound data itself cannot be represented here:
                             # 
                            ])

In [None]:
wav_header_dtype = np.dtype(dict(names=['format', 'sample_rate', 'data_id'],
                                 offsets = ['offset_1', 'offset_2', 'offset_3'], # counted from start of structure in bytes
                                 formats = list of dtypes for each of the fields,
                                 ))

In [None]:
f = open('data/test.wav', 'r')
wav_header = np.fromfile(f, dtype=wav_header_dtype, count=1)
f.close()
print(wav_header)


In [None]:
wav_header['sample_rate']

In [None]:
wav_header['data_id']

In [None]:
wav_header.shape

In [None]:
wav_header['data_id'].shape

In [19]:
x = np.array([1, 2, 3, 4], 
             dtype = np.float)

x

array([1., 2., 3., 4.])

In [20]:
y = x.astype(np.int8)
y

array([1, 2, 3, 4], dtype=int8)

In [21]:
y + 1

array([2, 3, 4, 5], dtype=int8)

In [22]:
y + 256

array([257, 258, 259, 260], dtype=int16)

In [23]:
y + 256.0

array([257., 258., 259., 260.])

In [24]:
y + np.array([256], dtype=np.int32)

array([257, 258, 259, 260])

In [25]:
y + 256.0

array([257., 258., 259., 260.])

In [26]:
y + np.array([256], dtype=np.int32)

array([257, 258, 259, 260])

In [27]:
y[:] = y + 1.5
y

array([2, 3, 4, 5], dtype=int8)

In [28]:
x = np.array([1, 2, 3, 4], dtype=np.uint8)
x.dtype = "<i2"
x

array([ 513, 1027], dtype=int16)

In [29]:
y = x.view("<i4")
y

array([67305985])

In [30]:
x[1] = 5
y

array([328193])

In [31]:
y.base is x

True

In [32]:
x = np.zeros((10, 10, 4), dtype=np.int8)
x[:, :, 0] = 1
x[:, :, 1] = 2
x[:, :, 2] = 3
x[:, :, 3] = 4

In [33]:
y = x.view([('r', 'i1'),
            ('g', 'i1'),
            ('b', 'i1'),
            ('a', 'i1')])[:, :, 0]

In [34]:
y = np.array([[1, 3], [2, 4]], dtype=np.uint8).transpose()
x = y.copy()
x

array([[1, 2],
       [3, 4]], dtype=uint8)

In [35]:
y


array([[1, 2],
       [3, 4]], dtype=uint8)

In [36]:
x.view(np.int16)

array([[ 513],
       [1027]], dtype=int16)

In [None]:

y.view(np.int16)

#### Indexing scheme: strides

In [38]:
x = np.array([[1, 2, 3],
              [4, 5, 6],
              [7, 8, 9]], dtype=np.int8)

In [39]:
x.tobytes('A')

b'\x01\x02\x03\x04\x05\x06\x07\x08\t'

In [40]:
x.strides

(3, 1)

In [41]:

byte_offset = 3*1 + 1*2 # to find x[1, 2]
x.flat[byte_offset]

6

In [42]:
x[1, 2]

6

In [43]:
x = np.array([[1, 2, 3],
              [4, 5, 6]], 
              dtype = np.int16, 
              order = 'C')


In [44]:
x.strides

(6, 2)

In [45]:
x.tobytes('A')

b'\x01\x00\x02\x00\x03\x00\x04\x00\x05\x00\x06\x00'

In [46]:
y = np.array(x, order='F')
y.strides

(2, 4)

In [47]:
y.tobytes('A')

b'\x01\x00\x04\x00\x02\x00\x05\x00\x03\x00\x06\x00'

In [48]:
y = np.array([[1, 3], 
              [2, 4]],
              dtype = np.uint8).transpose()
x = y.copy()

In [49]:
x.strides

(2, 1)

In [50]:
y.strides

(1, 2)

In [51]:
x.tobytes('A')

b'\x01\x02\x03\x04'

In [52]:
y.tobytes('A')

b'\x01\x03\x02\x04'

In [53]:
x = np.array([1, 2, 3, 4, 5, 6], dtype=np.int32)
y = x[::-1]
y

array([6, 5, 4, 3, 2, 1])

In [54]:
y.strides

(-4,)

In [55]:
y = x[2:]
y.__array_interface__['data'][0] - x.__array_interface__['data'][0]

8

In [56]:
x = np.zeros((10, 10, 10), dtype=np.float)
x.strides

(800, 80, 8)

In [57]:
x[::2,::3,::4].strides

(1600, 240, 32)

In [58]:
x = np.zeros((10, 10, 10), dtype=np.float)
x.strides

(800, 80, 8)

In [59]:
x.T.strides

(8, 80, 800)

In [60]:
a = np.arange(6, dtype=np.int8).reshape(3, 2)
b = a.T
b.strides

(1, 2)

In [61]:
bytes(a.data)

b'\x00\x01\x02\x03\x04\x05'

In [62]:
b

array([[0, 2, 4],
       [1, 3, 5]], dtype=int8)

In [63]:
c = b.reshape(3*2)
c

array([0, 2, 4, 1, 3, 5], dtype=int8)

In [64]:
from numpy.lib.stride_tricks import as_strided

help(as_strided)

Help on function as_strided in module numpy.lib.stride_tricks:

as_strided(x, shape=None, strides=None, subok=False, writeable=True)
    Create a view into the array with the given shape and strides.
    
    
    Parameters
    ----------
    x : ndarray
        Array to create a new.
    shape : sequence of int, optional
        The shape of the new array. Defaults to ``x.shape``.
    strides : sequence of int, optional
        The strides of the new array. Defaults to ``x.strides``.
    subok : bool, optional
        .. versionadded:: 1.10
    
        If True, subclasses are preserved.
    writeable : bool, optional
        .. versionadded:: 1.12
    
        If set to False, the returned array will always be readonly.
        Otherwise it will be writable if the original array was. It
        is advisable to set this to False if possible (see Notes).
    
    Returns
    -------
    view : ndarray
    
    See also
    --------
    broadcast_to : broadcast an array to a given shap

In [65]:
x = np.array([1, 2, 3, 4],
             dtype = np.int16)

as_strided(x, 
           strides = (2 * 2, ), 
           shape = (2, ))

array([1, 3], dtype=int16)

In [66]:
x[::2]

array([1, 3], dtype=int16)

In [None]:
# Exercise

array([1, 2, 3, 4], 
      dtype = np.int8)

In [68]:
x = np.array([1, 2, 3, 4],
             dtype = np.int8)

y = as_strided(x,
               strides = (0, 1), 
               shape = (3, 4))
y


array([[1, 2, 3, 4],
       [1, 2, 3, 4],
       [1, 2, 3, 4]], dtype=int8)

In [69]:
y.base.base is x

True

In [70]:
# Broadcasting

x = np.array([1, 2, 3, 4], dtype=np.int16)
x2 = as_strided(x, strides=(0, 1*2), shape=(3, 4))
x2

array([[1, 2, 3, 4],
       [1, 2, 3, 4],
       [1, 2, 3, 4]], dtype=int16)

In [71]:
y = np.array([5, 6, 7], 
             dtype = np.int16)

y2 = as_strided(y,
                strides = (1 * 2, 0),
                shape = (3,  4))
y2

array([[5, 5, 5, 5],
       [6, 6, 6, 6],
       [7, 7, 7, 7]], dtype=int16)

In [72]:
x2 * y2

array([[ 5, 10, 15, 20],
       [ 6, 12, 18, 24],
       [ 7, 14, 21, 28]], dtype=int16)

In [73]:
x = np.array([1, 2, 3, 4], 
             dtype = np.int16)

y = np.array([5, 6, 7],
             dtype = np.int16)

x[np.newaxis,:] * y[:,np.newaxis]

array([[ 5, 10, 15, 20],
       [ 6, 12, 18, 24],
       [ 7, 14, 21, 28]], dtype=int16)

In [74]:
x = np.array([[1, 2, 3],
              [4, 5, 6],
              [7, 8, 9]], 
              dtype = np.int32)

In [None]:
x_diag = as_strided(x, 
                    shape = (3, ),
                    strides = (???,))

In [76]:
# Solution

x_diag = as_strided(x,
                    shape = (3, ), 
                    strides = ((3 + 1) * x.itemsize, ))
x_diag

array([1, 5, 9])

In [77]:
as_strided(x[0, 1:],
           shape = (2, ),
           strides = ((3 + 1) * x.itemsize, ))

array([2, 6])

In [78]:
as_strided(x[1:, 0], 
           shape = (2, ),
           strides = ((3 + 1) * x.itemsize, ))

array([4, 8])

In [79]:
y = np.diag(x, k=1)
y

array([2, 6])

In [80]:
y.flags.owndata

False

Challenge

In [81]:
x = np.arange(5 * 5 * 5 * 5).reshape(5, 5, 5, 5)
s = 0

for i in range(5):
    for j in range(5):
        s += x[j, i, j, i]

In [None]:
y = as_strided(x,
               shape = (5, 5), 
               strides = (TODO, TODO))

s2 = 
assert s == s2

Solution

In [83]:
y = as_strided(x,
               shape = (5, 5), 
               strides = ((5 * 5 * 5 + 5) * x.itemsize,
                          (5 * 5 + 1) * x.itemsize))


In [84]:
s2 = y.sum()

CPU cache effects

In [85]:
x = np.zeros((20000,))


In [86]:
y = np.zeros((20000 * 67,))[ : :67]

In [87]:
x.shape, y.shape

((20000,), (20000,))

In [88]:
%timeit 

x.sum()

0.0

In [89]:
%timeit

y.sum()

0.0

In [90]:
x.strides, y.strides

((8,), (536,))

#### Findings in dissection

In [91]:
%reload_ext watermark
%watermark -a "Caique Miranda" -gu "caiquemiranda" -iv

Author: Caique Miranda

Github username: caiquemiranda

numpy: 1.23.0

