## Test

In [1]:
# Import

import epics
import numpy
import pandas
import torch
import nufft

import sys
sys.path.append('..')

from harmonica.window import Window
from harmonica.data import Data
from harmonica.frequency import Frequency

torch.set_printoptions(precision=12, sci_mode=True)
torch.cuda.is_available()

True

In [2]:
!python --version

Python 3.9.7


In [3]:
torch.__version__

'1.10.1+cu102'

In [4]:
size = 1024
length = 8192
dtype = torch.float64

In [5]:
# Test data (cpu)
device = 'cpu'
w = Window(length, 'cosine_window', 1.0, dtype=dtype, device=device)
data = torch.sin(2*numpy.pi*0.12*torch.linspace(1, len(w), len(w), dtype=w.dtype, device=w.device))
d = Data.from_data(w, torch.stack([data for _ in range(size)]))
f = Frequency(d)

In [6]:
%%timeit
f('parabola')

398 ms ± 7.67 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [7]:
# Clear
del w, d, f

In [8]:
# Test data (gpu)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
w = Window(length, 'cosine_window', 1.0, dtype=dtype, device=device)
data = torch.sin(2*numpy.pi*0.12*torch.linspace(1, len(w), len(w), dtype=w.dtype, device=w.device))
d = Data.from_data(w, torch.stack([data for _ in range(size)]))
f = Frequency(d)

In [9]:
%%timeit
f('parabola')

45.4 ms ± 448 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [10]:
# Clear
del w, d, f
torch.cuda.synchronize()
torch.cuda.empty_cache()

In [11]:
# SVD

In [12]:
# Test data (cpu)
device = 'cpu'
w = Window(512, 'cosine_window', 1.0, dtype=dtype, device=device)
data = torch.sin(2*numpy.pi*0.12*torch.linspace(1, len(w), len(w), dtype=w.dtype, device=w.device))
data = torch.stack([data for _ in range(512)])
data = torch.stack([data for _ in range(64)])
print(data.shape)
print(data.device)

torch.Size([64, 512, 512])
cpu


In [13]:
%%timeit
u, s, v = torch.linalg.svd(data)

4.45 s ± 84.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [14]:
# Clear
del w, data

In [15]:
# Test data (gpu)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
w = Window(512, 'cosine_window', 1.0, dtype=dtype, device=device)
data = torch.sin(2*numpy.pi*0.12*torch.linspace(1, len(w), len(w), dtype=w.dtype, device=w.device))
data = torch.stack([data for _ in range(512)])
data = torch.stack([data for _ in range(64)])
print(data.shape)
print(data.device)

torch.Size([64, 512, 512])
cuda:0


In [16]:
%%timeit
u, s, v = torch.linalg.svd(data)

381 ms ± 14.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [17]:
# Clear
del w, data
torch.cuda.synchronize()
torch.cuda.empty_cache()

In [18]:
# PV

In [19]:
w = Window(4096, 'cosine_window', 1.0, dtype=torch.float64, device='cpu')
pv_list = ["H:STP2:DATA:X", "H:STP4:DATA:X", "H:SRP1:DATA:X", "H:SRP2:DATA:X", "H:SRP3:DATA:X", "H:SRP4:DATA:X", "H:SRP5:DATA:X", "H:SRP6:DATA:X", "H:SRP7:DATA:X", "H:SRP8:DATA:X", "H:SRP9:DATA:X", "H:SIP1:DATA:X", "H:SIP2:DATA:X", "H:SRP10:DATA:X", "H:SRP11:DATA:X", "H:SRP12:DATA:X", "H:SRP13:DATA:X", "H:SRP14:DATA:X", "H:SRP15:DATA:X", "H:SRP16:DATA:X", "H:SRP17:DATA:X", "H:SEP5:DATA:X", "H:SEP4:DATA:X", "H:SEP3:DATA:X", "H:SEP1:DATA:X", "H:SEP0:DATA:X", "H:NEP0:DATA:X", "H:NEP1:DATA:X", "H:NEP3:DATA:X", "H:NEP4:DATA:X", "H:NEP5:DATA:X", "H:NRP17:DATA:X", "H:NRP16:DATA:X", "H:NRP15:DATA:X", "H:NRP14:DATA:X", "H:NRP13:DATA:X", "H:NRP12:DATA:X", "H:NRP11:DATA:X", "H:NRP10:DATA:X", "H:NIP3:DATA:X", "H:NIP1:DATA:X", "H:NRP9:DATA:X", "H:NRP8:DATA:X", "H:NRP7:DATA:X", "H:NRP6:DATA:X", "H:NRP5:DATA:X", "H:NRP4:DATA:X", "H:NRP3:DATA:X", "H:NRP2:DATA:X", "H:NRP1:DATA:X", "H:NTP4:DATA:X", "H:NTP2:DATA:X", "H:NTP0:DATA:X", "H:STP0:DATA:X"]
pv_rise = [0 for _ in range(len(pv_list))]

In [20]:
%%time
d = Data.from_epics(w, pv_list, pv_rise)

CPU times: user 561 ms, sys: 79.4 ms, total: 641 ms
Wall time: 112 ms


In [21]:
!nvidia-smi

Tue Jan 18 11:02:01 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 465.19.01    Driver Version: 465.19.01    CUDA Version: 11.3     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  On   | 00000000:03:00.0  On |                  N/A |
| 34%   48C    P2    52W / 151W |   1243MiB /  8116MiB |     10%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [22]:
!nvaccelinfo


CUDA Driver Version:           11030
NVRM version:                  NVIDIA UNIX x86_64 Kernel Module  465.19.01  Fri Mar 19 07:44:41 UTC 2021

Device Number:                 0
Device Name:                   NVIDIA GeForce GTX 1070
Device Revision Number:        6.1
Global Memory Size:            8510701568
Number of Multiprocessors:     15
Concurrent Copy and Execution: Yes
Total Constant Memory:         65536
Total Shared Memory per Block: 49152
Registers per Block:           65536
Warp Size:                     32
Maximum Threads per Block:     1024
Maximum Block Dimensions:      1024, 1024, 64
Maximum Grid Dimensions:       2147483647 x 65535 x 65535
Maximum Memory Pitch:          2147483647B
Texture Alignment:             512B
Clock Rate:                    1771 MHz
Execution Timeout:             Yes
Integrated Device:             No
Can Map Host Memory:           Yes
Compute Mode:                  default
Concurrent Kernels:            Yes
ECC Enabled:                   No
Memory