In [1]:
import struct

SIZE = 1048576

with open("../c/data.binary", "rb") as f:
    x_iter = struct.iter_unpack('f', f.read(4*SIZE))
    w_iter = struct.iter_unpack('f', f.read(4*SIZE))

In [2]:
x = [x[0] for x in x_iter]
w = [w[0] for w in w_iter]

In [3]:
import numpy as np

np_x = np.array(x)
np_w = np.array(w)

In [4]:
%%timeit
a = np.dot(np_x, np_w)

60.1 µs ± 5.46 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [5]:
np.dot(np_x, np_w)

294.5037726055393

In [6]:
def dot(x, w):
    
    a = 0
    for i in range(SIZE):
        a += x[i] * w[i]
    return a

In [7]:
%%timeit
a = dot(x, w)

75.4 ms ± 995 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [8]:
dot(x,w)

294.5037726055409

In [3]:
import torch

In [4]:
torch_x = torch.tensor(x)
torch_w = torch.tensor(w)

In [5]:
%%timeit
torch.dot(torch_x, torch_w)

24.5 µs ± 897 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [6]:
torch_x.to("cuda")
torch_w.to("cuda")

tensor([-0.6405,  0.2296,  0.7666,  ...,  0.3870,  0.0557, -0.8330],
       device='cuda:0')

In [8]:
%%timeit
torch.dot(torch_x, torch_w)

24.1 µs ± 1.4 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
