In [1]:
%load_ext autoreload
%autoreload 2

In [1]:
import torch
from RAVEN.pe.mac_taylor_ops import *
import time

# test exp operator

In [2]:
a = torch.arange(0, 1, 0.001)
a = a.cuda()

start_time = time.time()
approximate = MACexp.apply(a, 5, 7, 8, True)
print("--- %s seconds ---" % (time.time() - start_time))

start_time = time.time()
precise = torch.exp(a)
print("--- %s seconds ---" % (time.time() - start_time))

error = (approximate - precise) / precise
print("min error rate:", error.min())
print("max error rate:", error.max())
print("avg error rate:", error.mean())
print("rms error rate:", error.mul(error).mean().sqrt())


--- 0.0030002593994140625 seconds ---
--- 0.0 seconds ---
min error rate: tensor(-0.0061, device='cuda:0')
max error rate: tensor(0.0032, device='cuda:0')
avg error rate: tensor(-0.0006, device='cuda:0')
rms error rate: tensor(0.0017, device='cuda:0')


# reference result using MAC_Taylor kernel

In [3]:
a = torch.arange(0, 1, 0.001)
a = a.cuda()

precise = torch.exp(a)

point = 0.
scale = torch.exp(torch.tensor([point])).cuda()
var = a - point

coeff = [1/1, 1/1, 1/2, 1/6, 1/24, 1/120, 1/720, 1/5040, 1/40320, 1/362880]

coeff = coeff[0:5]
print(coeff)

approximate = MAC_Taylor(scale, 
                         coeff, 
                         var, 
                         fxp=True, 
                         intwidth=7, 
                         fracwidth=8, 
                         rounding_coeff="round", 
                         rounding_var="round", 
                         keepwidth=False)

error = (approximate - precise) / precise
print("min error rate:", error.min())
print("max error rate:", error.max())
print("avg error rate:", error.mean())
print("rms error rate:", error.mul(error).mean().sqrt())

[1.0, 1.0, 0.5, 0.16666666666666666, 0.041666666666666664]
min error rate: tensor(-0.0061, device='cuda:0')
max error rate: tensor(0.0032, device='cuda:0')
avg error rate: tensor(-0.0006, device='cuda:0')
rms error rate: tensor(0.0017, device='cuda:0')


# test exp op grad

In [16]:
a = torch.tensor([0.5, 0.7]).cuda()
a.requires_grad_()

approximate = MACexp.apply(a, 5, 7, 8, True, "round", True, True, True)
print(approximate)
approximate.sum().backward()
print(a.grad)

a = torch.tensor([0.5, 0.7])
a.requires_grad_()

precise = torch.exp(a)
precise.sum().backward()
a.grad
print(precise)
print(a.grad)


tensor([1.6484, 2.0117], device='cuda:0', grad_fn=<MACexpBackward>)
tensor([1.6484, 2.0117], device='cuda:0')
tensor([1.6487, 2.0138], grad_fn=<ExpBackward>)
tensor([1.6487, 2.0138])


# test left most one

In [14]:
a = torch.tensor([127.5, 0.99])
b = torch.log2(a).floor()
print(a >> (b+1))
print(a)

tensor([0.9961, 0.9900])
tensor([127.5000,   0.9900])


# test div grad

In [17]:
a = torch.tensor([0.5]).cuda()
b = torch.tensor([2.]).cuda()
a.requires_grad_()
b.requires_grad_()

c = torch.div(a, b)
print(c)
c.backward()
print(a.grad)
print(b.grad)

tensor([0.2500], device='cuda:0', grad_fn=<DivBackward0>)
tensor([0.5000], device='cuda:0')
tensor([-0.1250], device='cuda:0')


# test MACdiv

In [10]:
num = 100000
y = torch.rand(num).mul(32).cuda()
x = torch.rand(num).mul(64).cuda()

y = torch.tensor([1.]).cuda()
x = torch.arange(0.5, 1., 0.001).cuda()

y = torch.tensor([32.12341, 6.123]).cuda()
x = torch.tensor([0.5123141, 0.1231]).cuda()

y.requires_grad_()
x.requires_grad_()

# print(y, x)

start_time = time.time()
approximate = MACdiv.apply(y, x, 6, 7, 8, True)
approximate.sum().backward()
print(y.grad)
print(x.grad)
print("--- %s seconds ---" % (time.time() - start_time))

y = torch.tensor([32.12341, 6.123]).cuda()
x = torch.tensor([0.5123141, 0.1231]).cuda()

y.requires_grad_()
x.requires_grad_()

start_time = time.time()
precise = torch.div(y, x)
precise.sum().backward()
print(y.grad)
print(x.grad)
print("--- %s seconds ---" % (time.time() - start_time))

error = (approximate - precise) / precise
print("min error rate:", error.min())
print("max error rate:", error.max())
print("avg error rate:", error.mean())
print("rms error rate:", error.mul(error).mean().sqrt())


tensor([1.9519, 8.1235], device='cuda:0')
tensor([-122.3909, -404.0622], device='cuda:0')
--- 0.008000850677490234 seconds ---
tensor([1.9519, 8.1235], device='cuda:0')
tensor([-122.3909, -404.0622], device='cuda:0')
--- 0.0050008296966552734 seconds ---
min error rate: tensor(-0.0133, device='cuda:0', grad_fn=<MinBackward1>)
max error rate: tensor(0.0002, device='cuda:0', grad_fn=<MaxBackward1>)
avg error rate: tensor(-0.0066, device='cuda:0', grad_fn=<MeanBackward0>)
rms error rate: tensor(0.0094, device='cuda:0', grad_fn=<SqrtBackward>)


# test MAClog

In [18]:
num = 100000
x = torch.rand(num).mul(64).cuda()

x = torch.arange(0.5, 1., 0.001)
# x = x.cuda()

x = torch.tensor([0.5123141, 43]).cuda()

x.requires_grad_()

start_time = time.time()
approximate = MAClog.apply(x, 8, 7, 8, False)
approximate.sum().backward()
print(x.grad)
print("--- %s seconds ---" % (time.time() - start_time))

x = torch.tensor([0.5123141, 43]).cuda()

x.requires_grad_()

start_time = time.time()
precise = torch.log(x)
precise.sum().backward()
print(x.grad)
print("--- %s seconds ---" % (time.time() - start_time))

error = (approximate - precise) / precise
print("min error rate:", error.min())
print("max error rate:", error.max())
print("avg error rate:", error.mean())
print("rms error rate:", error.mul(error).mean().sqrt())


tensor([1.9519, 0.0233], device='cuda:0')
--- 0.00499272346496582 seconds ---
tensor([1.9519, 0.0233], device='cuda:0')
--- 0.003998994827270508 seconds ---
min error rate: tensor(-2.6468e-05, device='cuda:0', grad_fn=<MinBackward1>)
max error rate: tensor(0., device='cuda:0', grad_fn=<MaxBackward1>)
avg error rate: tensor(-1.3234e-05, device='cuda:0', grad_fn=<MeanBackward0>)
rms error rate: tensor(1.8716e-05, device='cuda:0', grad_fn=<SqrtBackward>)


# test MACsigmoid

In [3]:
num = 100000
x = torch.rand(num).mul(64).cuda()

x = torch.arange(0.5, 1., 0.001)
# x = x.cuda()

x = torch.tensor([0.5123141, 43.]).cuda()

x.requires_grad_()

start_time = time.time()
approximate = MACsigmoid(x, 8, 7, 8, True)
approximate.sum().backward()
print(x.grad)
print("--- %s seconds ---" % (time.time() - start_time))

x = torch.tensor([0.5123141, 43.]).cuda()

x.requires_grad_()

start_time = time.time()
precise = torch.sigmoid(x)
precise.sum().backward()
print(x.grad)
print("--- %s seconds ---" % (time.time() - start_time))

error = (approximate - precise) / precise
print("min error rate:", error.min())
print("max error rate:", error.max())
print("avg error rate:", error.mean())
print("rms error rate:", error.mul(error).mean().sqrt())


tensor([2.3471e-01, 2.1151e-19], device='cuda:0')
--- 0.009000062942504883 seconds ---
tensor([0.2343, 0.0000], device='cuda:0')
--- 0.003000497817993164 seconds ---
min error rate: tensor(-0.0039, device='cuda:0', grad_fn=<MinBackward1>)
max error rate: tensor(-0.0006, device='cuda:0', grad_fn=<MaxBackward1>)
avg error rate: tensor(-0.0022, device='cuda:0', grad_fn=<MeanBackward0>)
rms error rate: tensor(0.0028, device='cuda:0', grad_fn=<SqrtBackward>)


# test MACtanh

In [9]:
num = 100000
x = torch.rand(num).mul(64).cuda()

x = torch.arange(0.5, 1., 0.001)
# x = x.cuda()

x = torch.tensor([0.5123141, 2.1]).cuda()

x.requires_grad_()

start_time = time.time()
approximate = MACtanh(x, 8, 7, 8, True)
approximate.sum().backward()
print(x.grad)
print("--- %s seconds ---" % (time.time() - start_time))

x = torch.tensor([0.5123141, 2.1]).cuda()

x.requires_grad_()

start_time = time.time()
precise = torch.tanh(x)
precise.sum().backward()
print(x.grad)
print("--- %s seconds ---" % (time.time() - start_time))

error = (approximate - precise) / precise
print("min error rate:", error.min())
print("max error rate:", error.max())
print("avg error rate:", error.mean())
print("rms error rate:", error.mul(error).mean().sqrt())


tensor([0.7794, 0.0585], device='cuda:0')
--- 0.008991718292236328 seconds ---
tensor([0.7775, 0.0582], device='cuda:0')
--- 0.003998994827270508 seconds ---
min error rate: tensor(-0.0018, device='cuda:0', grad_fn=<MinBackward1>)
max error rate: tensor(0.0019, device='cuda:0', grad_fn=<MaxBackward1>)
avg error rate: tensor(8.7631e-05, device='cuda:0', grad_fn=<MeanBackward0>)
rms error rate: tensor(0.0018, device='cuda:0', grad_fn=<SqrtBackward>)


# test MACsoftmax

In [20]:
x = torch.tensor([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]).cuda()

x.requires_grad_()

start_time = time.time()
approximate = MACsoftmax(x, 8, 7, 8, True)
approximate.sum().backward()
print(x.grad)
print("--- %s seconds ---" % (time.time() - start_time))

x = torch.tensor([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]).cuda()

x.requires_grad_()

start_time = time.time()
softmax = torch.nn.Softmax(dim=0)
precise = softmax(x)
precise.sum().backward()
print(x.grad)
print("--- %s seconds ---" % (time.time() - start_time))

error = (approximate - precise) / precise
print("min error rate:", error.min())
print("max error rate:", error.max())
print("avg error rate:", error.mean())
print("rms error rate:", error.mul(error).mean().sqrt())


tensor([0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0')
--- 0.007999658584594727 seconds ---
tensor([0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0')
--- 0.00199127197265625 seconds ---
min error rate: tensor(-0.0236, device='cuda:0', grad_fn=<MinBackward1>)
max error rate: tensor(0.0297, device='cuda:0', grad_fn=<MaxBackward1>)
avg error rate: tensor(0.0045, device='cuda:0', grad_fn=<MeanBackward0>)
rms error rate: tensor(0.0156, device='cuda:0', grad_fn=<SqrtBackward>)


# test MAClogsoftmax

In [7]:
x = torch.tensor([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]).cuda()

x.requires_grad_()

start_time = time.time()
approximate = MAClogsoftmax(x, 8, 7, 8, True)
approximate.sum().backward()
print(x.grad)
print("--- %s seconds ---" % (time.time() - start_time))

x = torch.tensor([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]).cuda()

x.requires_grad_()

start_time = time.time()
softmax = torch.nn.LogSoftmax(dim=0)
precise = softmax(x)
precise.sum().backward()
print(x.grad)
print("--- %s seconds ---" % (time.time() - start_time))

error = (approximate - precise) / precise
print("min error rate:", error.min())
print("max error rate:", error.max())
print("avg error rate:", error.mean())
print("rms error rate:", error.mul(error).mean().sqrt())


tensor([ 0.3150,  0.2157,  0.1588,  0.0657,  0.0044, -0.1209, -0.2449, -0.3935],
       device='cuda:0')
--- 0.010988950729370117 seconds ---
tensor([ 0.3135,  0.2413,  0.1615,  0.0733, -0.0242, -0.1319, -0.2509, -0.3825],
       device='cuda:0')
--- 0.004000186920166016 seconds ---
min error rate: tensor(-0.0127, device='cuda:0', grad_fn=<MinBackward1>)
max error rate: tensor(0.0108, device='cuda:0', grad_fn=<MaxBackward1>)
avg error rate: tensor(-0.0025, device='cuda:0', grad_fn=<MeanBackward0>)
rms error rate: tensor(0.0074, device='cuda:0', grad_fn=<SqrtBackward>)
