# Enabling Deep Learning in Gravitational Wave Physics With Inference-as-a-Service
## Alec Gunny

## Challenges to DL Inference

Start with some imports

In [1]:
import time
from queue import Empty

import numpy as np
import torch

import utils

Use the model for inference in the normal way

In [2]:
model = utils.MLP(64, [256, 128, 64]).cuda(device=0)

x = np.random.randn(64).astype("float32")
with torch.no_grad():
    y = model(torch.from_numpy(x).cuda(0))
y.cpu().numpy()

array([0.52460766], dtype=float32)

Save and load the model to use at our leisure

In [3]:
torch.save(model.state_dict(), "model.pt")

model = utils.MLP(64, [256, 128, 64]).cuda(0)
with torch.no_grad():
    y_new = model(torch.from_numpy(x).cuda(0))
assert y_new != y

model.load_state_dict(torch.load("model.pt"))
with torch.no_grad():
    y_new = model(torch.from_numpy(x).cuda(0))
assert y_new == y

Inference on a chunk of data in batches

In [4]:
@torch.no_grad()
def do_some_inference(model, X, batch_size=8):
    dataset = torch.utils.data.TensorDataset(torch.from_numpy(X).cuda(0))
    for [x] in torch.utils.data.DataLoader(dataset, batch_size=batch_size):
        y = model(x)
        yield y.cpu().numpy()

In [5]:
X = np.random.randn(1000000, 64).astype("float32")

with utils.get_progbar([0]) as progbar:
    task_id = progbar.add_task("[cyan]Inference", total=len(X))
    outputs = []
    for y in do_some_inference(model, X):
        outputs.append(y)
        progbar.update(task_id, advance=len(y))

    output = np.concatenate(outputs, axis=0)
    progbar.console.log(output.shape)

Output()

In [6]:
def parallel_inference(X, num_jobs, progbar):
    task_id = progbar.add_task(
        f"[cyan]Inference with {num_jobs} jobs",
        total=len(X),
        start=False
    )

    smp = torch.multiprocessing.get_context("spawn")
    q = smp.Queue()
    starter = smp.Value("d", 0.0)
    procs = torch.multiprocessing.spawn(
        utils.parallel_inference_task,
        args=(num_jobs, X, q, starter),
        nprocs=num_jobs,
        join=False
    )

    while starter.value < num_jobs:
        time.sleep(0.01)
    progbar.start_task(task_id)
    starter.value += 1

    outputs = []
    while True:
        try:
            y = q.get_nowait()
            progbar.update(task_id, advance=len(y))
            outputs.append(y)
        except Empty:
            if procs.join(0.01):
                break

    return np.concatenate(outputs, axis=0)

In [7]:
with utils.get_progbar([0]) as progbar:
    y = parallel_inference(X, 2, progbar)
    y = parallel_inference(X, 4, progbar)

Output()

In [8]:
def multi_gpu_inference(X, num_gpus, jobs_per_gpu, progbar):
    task_id = progbar.add_task(
        f"[cyan]Inference with {num_gpus} GPUs",
        total=len(X),
        start=False
    )

    num_jobs = num_gpus * jobs_per_gpu
    smp = torch.multiprocessing.get_context("spawn")
    q = smp.Queue()
    starter = smp.Value("d", 0.0)
    procs = torch.multiprocessing.spawn(
        utils.parallel_inference_task,
        args=(jobs_per_gpu, X, q, starter, num_gpus),
        nprocs=num_jobs,
        join=False
    )

    while starter.value < num_jobs:
        time.sleep(0.01)
    progbar.start_task(task_id)
    starter.value += 1

    outputs = []
    while True:
        try:
            y = q.get_nowait()
            progbar.update(task_id, advance=len(y))
            outputs.append(y)
        except Empty:
            if procs.join(0.01):
                break

    return np.concatenate(outputs, axis=0)

In [9]:
with utils.get_progbar([0, 1]) as progbar:
    y = multi_gpu_inference(X, 2, 4, progbar)

Output()