# Compilers & Interpreters
- interpreted vs symbolic execution
- deep learning frameworks utilise symbolic execution for optimizing performance
- pytorch is imperative
- torchscript
    TorchScript code can be invoked in its own interpreter, which is basically a restricted Python interpreter. This interpreter does not acquire the Global Interpreter Lock, and so many requests can be processed on the same instance simultaneously.

    This format allows us to save the whole model to disk and load it into another environment, such as in a server written in a language other than Python

    TorchScript gives us a representation in which we can do compiler optimizations on the code to provide more efficient execution

    TorchScript allows us to interface with many backend/device runtimes that require a broader view of the program than individual operators.

In [26]:
#symbolic execution
# allows running not python env & thus avoids bottle necks from python interpreter
# can be compiled for GPU
def add_():
    return '''
def add(a, b):
    return a + b
'''

def fancy_func_():
    return '''
def fancy_func(a, b, c, d):
    e = add(a, b)
    f = add(c, d)
    g = add(e, f)
    return g
'''

def evoke_():
    return add_() + fancy_func_() + 'print(fancy_func(1, 2, 3, 4))'

prog = evoke_()
print(prog)
y  = compile(prog, "", "exec")
exec(y)


def add(a, b):
    return a + b

def fancy_func(a, b, c, d):
    e = add(a, b)
    f = add(c, d)
    g = add(e, f)
    return g
print(fancy_func(1, 2, 3, 4))
10


In [27]:
import torch
from torch import nn
from d2l import torch as d2l


# Factory for networks
def get_net():
    net = nn.Sequential(nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 2))
    return net

x = torch.randn(size=(1, 512))
net = get_net()
net(x)

tensor([[ 0.0010, -0.0412]], grad_fn=<AddmmBackward0>)

In [28]:
net = torch.jit.script(net)
net(x)

tensor([[ 0.0010, -0.0412]], grad_fn=<DifferentiableGraphBackward>)

In [29]:
print(net.code)


def forward(self,
    input: Tensor) -> Tensor:
  _0 = getattr(self, "0")
  _1 = getattr(self, "1")
  _2 = getattr(self, "2")
  _3 = getattr(self, "3")
  _4 = getattr(self, "4")
  input0 = (_0).forward(input, )
  input1 = (_1).forward(input0, )
  input2 = (_2).forward(input1, )
  input3 = (_3).forward(input2, )
  return (_4).forward(input3, )



In [30]:
#@save
class Benchmark:
    """For measuring running time."""
    def __init__(self, description='Done'):
        self.description = description

    def __enter__(self):
        self.timer = d2l.Timer()
        return self

    def __exit__(self, *args):
        print(f'{self.description}: {self.timer.stop():.4f} sec')

In [31]:
net = get_net()
with Benchmark('Without torchscript'):
    for i in range(10000): 
        net(x)
    
# will be much faster on multiple gpus
net = torch.jit.script(net)
with Benchmark('With torchscript'):
    for i in range(10000): 
        net(x)

Without torchscript: 1.2267 sec
With torchscript: 1.4464 sec
