In [1]:
import ctypes 
import random
import numpy as np
random.seed(0)
# Load the DLL
dot = ctypes.cdll.LoadLibrary('./dot_product.so')
dot.dot_product.argtypes = [ctypes.c_int, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p]

# define some types
f32 = ctypes.c_float

# Create the data
N = 1_000_000
lista = [random.uniform(-1, 1) for _ in range(N)]
listb = [random.uniform(-1, 1) for _ in range(N)]

bufans = (f32*1)(0)
correct_ans = sum([a*b for a, b in zip(lista, listb)])

## Dot Product using C - Loading Buffers at each step

In [2]:
%%timeit
dot.dot_product(N, (f32*N)(*lista), (f32*N)(*listb), bufans)

271 ms ± 46.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Dot Product using Python

In [3]:
%%timeit
sum([a*b for a, b in zip(lista, listb)])

127 ms ± 13.2 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


## Loading the buffers

In [4]:
%%timeit
bufa, bufb = (f32*N)(*lista), (f32*N)(*listb)

406 ms ± 23.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [5]:
bufa, bufb = (f32*N)(*lista), (f32*N)(*listb)

## C code - After Loading the buffers

In [6]:
%%timeit
dot.dot_product(N, bufa, bufb, bufans)

3.33 ms ± 146 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [7]:
print(bufans[0], correct_ans)

-114.73899841308594 -114.73601551468235


## Numpy - Create arrays at each step

In [8]:
%%timeit
np.dot(lista, listb)

223 ms ± 16.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [9]:
arra, arrb = np.array(lista), np.array(listb)

## Numpy - After creating arrays

In [10]:
%%timeit
np.dot(arra, arrb)

1.64 ms ± 254 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [11]:
dot_fast = ctypes.cdll.LoadLibrary('./dot_product_avx.so')
dot_fast.dot_product.argtypes = [ctypes.c_int, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p]

## Using AVX

In [12]:
%%timeit
dot_fast.dot_product(N, bufa, bufb, bufans)

953 µs ± 107 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [13]:
bufans[0], correct_ans

(-114.73645782470703, -114.73601551468235)