In [None]:
import numpy as np

In [26]:
# Setup the random Numpy array
# Seed the random number generator for reproducibility
np.random.seed(176)

rng = np.random.default_rng()
rand_array = rng.gamma(shape=7.2, scale=1.6, size=(10000, 10000))  # Random Gamma Distributed Array of shape (10000, 10000)
# print(f"Random Numpy Array: {rand_array}")

In [27]:
# Function for Row Major Loop Summation
def row_sum(arr):
    sum_row = 0
    for i in arr:
        row_val = 0
        for j in i:
            row_val += j
        sum_row += row_val
    # print(f"Sum Value of Row Major Loop Summation:{sum_row}")
    return sum_row

Sr = row_sum(rand_array)

In [28]:
%%timeit -n 1 -r 40 -o
row_sum(rand_array)

8.66 s ± 48.6 ms per loop (mean ± std. dev. of 40 runs, 1 loop each)


<TimeitResult : 8.66 s ± 48.6 ms per loop (mean ± std. dev. of 40 runs, 1 loop each)>

In [29]:
Sr_time = _
print(f"Sum Value of Row Major Loop Summation:{Sr}")

Sum Value of Row Major Loop Summation:1152009228.0450516


In [30]:
# Function for Column Major Loop Summation
def col_sum(arr):
    sum_col = 0
    arr_length = arr.shape[1]
    for c in range(0, arr_length):
        col_val = 0
        for i in arr:
            col_val += i[c]
        sum_col += col_val
    return sum_col
    
Sc = col_sum(rand_array)

In [31]:
%%timeit -n 1 -r 40 -o
col_sum(rand_array)

17.9 s ± 189 ms per loop (mean ± std. dev. of 40 runs, 1 loop each)


<TimeitResult : 17.9 s ± 189 ms per loop (mean ± std. dev. of 40 runs, 1 loop each)>

In [32]:
Sc_time = _
print(f"Sum Value of Column Major Loop Summation:{Sc}")

Sum Value of Column Major Loop Summation:1152009228.0450525


In [33]:
truth = Sr == Sc
print(f"Do the Column and Row Major Summations match? {truth}")
if truth == False:
    print(f"The Difference of the two summations is: {abs(Sr - Sc)}")
    percent_diff = abs(Sr - Sc)/np.mean([Sr, Sc]) * 100
    print(f"Or in percentage difference: {percent_diff} %")
    if percent_diff < 0.001:
        print("The difference is negligible (0.001%), so we can consider the summations equivalent. In other words, this is a rounding error.")
    else:
        print("The difference is significant.")

Do the Column and Row Major Summations match? False
The Difference of the two summations is: 9.5367431640625e-07
Or in percentage difference: 8.278356572061715e-14 %
The difference is negligible (0.001%), so we can consider the summations equivalent. In other words, this is a rounding error.


In [34]:
%%timeit -n 1 -r 40 -o
rand_array.sum()

33.8 ms ± 821 μs per loop (mean ± std. dev. of 40 runs, 1 loop each)


<TimeitResult : 33.8 ms ± 821 μs per loop (mean ± std. dev. of 40 runs, 1 loop each)>

In [35]:
Ib_time = _

print(f"Comparing the times taken for each function we have:\nRow Major Loop Summation Time: {Sr_time}\nColumn Major Loop Summation Time: {Sc_time}\nBuilt-in Numpy Sum Time: {Ib_time} ")

Comparing the times taken for each function we have:
Row Major Loop Summation Time: 8.66 s ± 48.6 ms per loop (mean ± std. dev. of 40 runs, 1 loop each)
Column Major Loop Summation Time: 17.9 s ± 189 ms per loop (mean ± std. dev. of 40 runs, 1 loop each)
Built-in Numpy Sum Time: 33.8 ms ± 821 μs per loop (mean ± std. dev. of 40 runs, 1 loop each) 


The built-in Numpy Sum function far surpasses either of my created functions. With the built-in function taking only milliseconds rather than the seconds for the other two functions. However, even within my own functions the Column-Major functions is almost twice as slow as the Row-Major summing. The difference in compute time to me comes from having to store more values in cached memory. This caching is slowing down the whole process of summation.