# Convolution Comparison

In [1]:
plot_results = False
tap_size = 1000
signal_size = int(1e6)

In [2]:
import numpy as np

taps = np.arange(tap_size)
signal = np.ones((signal_size,))

result = np.convolve(taps, signal)

if plot_results:
    from matplotlib import pyplot as plt
    plt.stem(result)

import torch
torch_taps = torch.arange(tap_size, dtype=torch.float).reshape(1, 1, -1)
torch_signal = torch.ones(signal_size, dtype=torch.float).reshape(1, -1)
result = torch.nn.functional.conv1d(torch_signal, torch.flip(torch_taps, dims=(2,)), padding=tap_size - 1)

if plot_results:
    from matplotlib import pyplot as plt
    print(result.size())
    plt.stem(result.numpy()[0])

torch_gpu_taps = torch.arange(tap_size, dtype=torch.float).reshape(1, 1, -1).cuda()
torch_gpu_signal = torch.ones(signal_size, dtype=torch.float).reshape(1, -1).cuda()

result = torch.nn.functional.conv1d(torch_gpu_signal, torch.flip(torch_gpu_taps, dims=(2,)), padding=tap_size - 1)

if plot_results:
    from matplotlib import pyplot as plt
    print(result.size())
    plt.stem(result.numpy()[0])


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
%timeit np.convolve(taps, signal)

101 ms ± 2.96 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [4]:
%timeit torch.nn.functional.conv1d(torch_signal, torch.flip(torch_taps, dims=(2,)), padding=tap_size-1)

877 ms ± 31.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [5]:
%timeit torch.nn.functional.conv1d(torch_gpu_signal, torch.flip(torch_gpu_taps, dims=(2,)), padding=tap_size-1)

2.45 ms ± 16.6 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
