In [None]:
import torch
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
from time import perf_counter
from collections import defaultdict

In [None]:
!gpustat

In [None]:
N = 5000
dtype = torch.float
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# device = torch.device('cpu')

In [None]:
def kernel(inp):
    x = 0
    for _ in range(100):
        x += torch.sin(inp)
    return x

def run(device=device, dtype=dtype, size=N):
    #print(f"size  : {size}")
    #print(f"device: {device}")
    #print(f"dtype : {dtype}")
    start = perf_counter()
    inp = torch.eye(size, requires_grad=True, dtype=dtype)
    inp2 = inp.to(device)
    out = kernel(inp2)
    out.backward(torch.ones_like(inp2), retain_graph=True)
    stop = perf_counter()
    #print(f"Gradient {inp.grad}")
    return stop-start

# %time run()

In [None]:
df = defaultdict(list)
for size in np.logspace(1, 12, 12, base=2, dtype=np.int32):
    print(f"size: {size}")
    df['size'].append(size)
    df['cpu-float'].append( run('cpu'   , torch.float , size))
    df['gpu-float'].append( run('cuda:0', torch.float , size))
    df['cpu-double'].append(run('cpu'   , torch.double, size))
    df['gpu-double'].append(run('cuda:0', torch.double, size))

In [None]:
df = pd.DataFrame(df)
df.plot(x='size', y=['cpu-float', 'gpu-float', 'cpu-double', 'gpu-double'])
plt.ylabel('elapsed time')

df['speedup-float'] = df['cpu-float']/df['gpu-float']
df['speedup-double'] = df['cpu-double']/df['gpu-double']
df.plot.bar(x='size', y=['speedup-double', 'speedup-float'])
plt.axhline(1.0, ls='--', c='k')
plt.ylabel('speed up over gpu')
df.tail()