# Coding Practice Session 11
## Memory Management in NumPy

In [1]:
import numpy as np

In [2]:
arr = np.array([1, 2, 3])

In [3]:
arr.shape

(3,)

In [4]:
arr.flags

  C_CONTIGUOUS : True
  F_CONTIGUOUS : True
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False

In [9]:
a = np.array([[1, 2, 3], [4, 5, 6]], dtype="i1")

In [10]:
a[0, 1]

np.int8(2)

In [11]:
a.shape

(2, 3)

In [14]:
a.dtype

dtype('int8')

In [12]:
a.flags

  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False

In [None]:
a.strides  # (row-wise strides, column-wise strides) in bytes

(3, 1)

In [None]:
a[1, 1]  # retrieved using strides attribute

np.int8(5)

In [18]:
arr = np.random.rand(2, 3, 4)
arr

array([[[0.40460914, 0.90623019, 0.87687786, 0.84286516],
        [0.18117263, 0.37257489, 0.98400391, 0.37510078],
        [0.57089361, 0.63500156, 0.33550326, 0.94325497]],

       [[0.06339085, 0.22325771, 0.71264488, 0.54924908],
        [0.15518402, 0.13546458, 0.48675143, 0.30227645],
        [0.21806038, 0.10119147, 0.07402865, 0.16053531]]])

In [19]:
arr.shape

(2, 3, 4)

In [21]:
arr.dtype

dtype('float64')

In [20]:
arr.strides

(96, 32, 8)

In [22]:
arr[1, 2, 2]

np.float64(0.0740286546498855)

In [23]:
96 * 1 + 32 * 2 + 8 * 2

176

To get the value at `arr[1, 2, 2]`, from the address of the first element, you should traverse **176** bytes.

### Row-major Indexing Vs. Column-major Indexing

In [24]:
c_array = np.array(
    [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]], dtype="u1"
)  # row-major indexing

In [None]:
f_array = np.asfortranarray(c_array)  # column-major indexing
f_array

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]], dtype=uint8)

In [26]:
c_array.shape

(3, 4)

In [27]:
f_array.shape

(3, 4)

In [28]:
c_array.strides

(4, 1)

In [29]:
f_array.strides

(1, 3)

In [31]:
def sum_row(x, index=0):
    """Return the sum of the index row."""
    return np.sum(x[index, :])


def sum_column(x, index=0):
    """Return the sum of the index column."""
    return np.sum(x[:, index])

In [None]:
c_array = np.random.rand(10_000, 10_000)

In [33]:
f_array = np.asfortranarray(c_array)

In [34]:
c_array[:5, :5]

array([[0.78656993, 0.69740608, 0.50020053, 0.84822112, 0.93695011],
       [0.70209537, 0.64689201, 0.83629071, 0.7290132 , 0.08987388],
       [0.10120361, 0.32498848, 0.15836997, 0.61363896, 0.56441722],
       [0.95025807, 0.71747767, 0.31414254, 0.6499332 , 0.30216943],
       [0.6679556 , 0.18277653, 0.27538511, 0.78026348, 0.37639849]])

In [35]:
f_array[:5, :5]

array([[0.78656993, 0.69740608, 0.50020053, 0.84822112, 0.93695011],
       [0.70209537, 0.64689201, 0.83629071, 0.7290132 , 0.08987388],
       [0.10120361, 0.32498848, 0.15836997, 0.61363896, 0.56441722],
       [0.95025807, 0.71747767, 0.31414254, 0.6499332 , 0.30216943],
       [0.6679556 , 0.18277653, 0.27538511, 0.78026348, 0.37639849]])

In [36]:
c_array.strides

(80000, 8)

In [37]:
f_array.strides

(8, 80000)

In [38]:
sum_row(c_array)

np.float64(4988.298996750329)

In [None]:
sum_row(f_array)

np.float64(4988.298996750329)

In [42]:
%timeit sum_row(c_array)

4.81 μs ± 1.12 μs per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [43]:
%timeit sum_row(f_array)

91.3 μs ± 16.1 μs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


As we can conclude, **row-major arrays (C-arrays)** are much more efficient for row-wise operations.

In [44]:
%timeit sum_column(c_array)

95.3 μs ± 7.09 μs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [45]:
%timeit sum_column(f_array)

5.2 μs ± 721 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


However, **column-major arrays (fortran arrays)** are much more efficient for column-wise operations.

In [59]:
arr = np.arange(10, dtype="u1")
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8)

In [60]:
arr.strides

(1,)

In [61]:
arr.shape

(10,)

In [62]:
arr.shape = (2, 5)

In [63]:
arr

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]], dtype=uint8)

In [64]:
arr.strides

(5, 1)

**Note:** Reshaping is fast in NumPy, because the memory layout of the array doesn't change; only the strides attribute is updated.

In [65]:
x = np.ones((10_000, ))

In [None]:
y = np.ones((10_000*100, ))

In [68]:
np.shares_memory(y, y[::100])

True

In [69]:
y = np.ones((10_000*100, ))[::100]

In [70]:
x.shape

(10000,)

In [71]:
y.shape

(10000,)

In [72]:
x.strides

(8,)

In [73]:
y.strides

(800,)

In [74]:
(y == x).all()

np.True_

In [75]:
%timeit x.sum()

3.45 μs ± 376 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [76]:
%timeit y.sum()

7.89 μs ± 1.08 μs per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


When we want perform an operation on an array some elements of that array are copied to cache based on the size of the cache. These elements are contiguous. That's why there is a difference between summation of elements in `x` and `y`arrays.

In [77]:
x.flags

  C_CONTIGUOUS : True
  F_CONTIGUOUS : True
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False

In [79]:
y.flags # elements are not contiguous

  C_CONTIGUOUS : False
  F_CONTIGUOUS : False
  OWNDATA : False
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False

In [80]:
arr = np.random.randint(1, 100, size=(5, 5))
arr

array([[66,  5, 74, 40, 97],
       [24, 59, 69,  9, 97],
       [ 2, 92, 43, 69, 63],
       [ 1, 61, 64, 71, 59],
       [99, 55, 11, 60,  5]])

In [81]:
arr_c_style = arr[:3, :]
arr_c_style

array([[66,  5, 74, 40, 97],
       [24, 59, 69,  9, 97],
       [ 2, 92, 43, 69, 63]])

In [82]:
arr_c_style.flags

  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : False
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False

In [83]:
arr_f_style = arr[:, :3]
arr_f_style

array([[66,  5, 74],
       [24, 59, 69],
       [ 2, 92, 43],
       [ 1, 61, 64],
       [99, 55, 11]])

In [84]:
arr_f_style.flags

  C_CONTIGUOUS : False
  F_CONTIGUOUS : False
  OWNDATA : False
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False