# Imperative vs. Symbolic (non-compiled vs. compiled)

In [None]:
%pip install d2l==1.0.0a0

In [4]:
import torch
from torch import nn
from d2l import torch as d2l

In [5]:
# Factory for networks
def get_net():
    net = nn.Sequential(nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 2))
    return net

x = torch.randn(size=(1, 512))

In [3]:
# Convenience function for benchmarking
class Benchmark:
    """For measuring running time."""
    def __init__(self, description='Done'):
        self.description = description

    def __enter__(self):
        self.timer = d2l.Timer()
        return self

    def __exit__(self, *args):
        print(f'{self.description}: {self.timer.stop():.4f} sec')

In [7]:
net = get_net()
with Benchmark('Without torchscript'):
    for i in range(1000): net(x)

net = torch.jit.script(net)
with Benchmark('With torchscript'):
    for i in range(1000): net(x)

Without torchscript: 0.1740 sec
With torchscript: 0.1649 sec


In [8]:
net.save('my_mlp')
%ls -lh my_mlp*

-rw-r--r-- 1 root root 651K Jun 12 18:54 my_mlp


# Asynchronous

In [2]:
import os
import subprocess
import numpy
import torch
from torch import nn
from d2l import torch as d2l

In [4]:
device = d2l.try_gpu()
device

device(type='cuda', index=0)

In [6]:
with Benchmark('Numpy on CPU'):
    for _ in range(10):
        a = numpy.random.normal(size=(1000, 1000))
        b = numpy.dot(a, a)

with Benchmark(f'Pytorch on {device}'):
    for _ in range(10):
        a = torch.randn(size=(1000, 1000))
        b = torch.mm(a, a)

Numpy on CPU: 1.6497 sec
Pytorch on cuda:0: 0.5682 sec
