## Test

In [1]:
# Import

import epics
import numpy
import pandas
import torch
import nufft

from harmonica.window import Window
from harmonica.data import Data
from harmonica.frequency import Frequency

torch.set_printoptions(precision=12, sci_mode=True)
torch.cuda.is_available()

True

In [2]:
!python --version

Python 3.9.0


In [3]:
torch.__version__

'1.9.1'

In [4]:
siz = 1024
length = 8192
dtype = torch.float64

In [5]:
# Test data (cpu)
device = 'cpu'
win = Window(length, 'cosine_window', 1.0, dtype=dtype, device=device)
dat = torch.sin(2*numpy.pi*0.12*torch.linspace(1, len(win), len(win), dtype=win.dtype, device=win.device))
tbt = Data.from_tensor(win, torch.stack([dat for _ in range(siz)]))
fre = Frequency(tbt)

In [6]:
%%timeit
fre('parabola', window=True)

405 ms ± 3.51 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [7]:
# Clear
del win, tbt, fre

In [8]:
# Test data (gpu)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
win = Window(length, 'cosine_window', 1.0, dtype=dtype, device=device)
dat = torch.sin(2*numpy.pi*0.12*torch.linspace(1, len(win), len(win), dtype=win.dtype, device=win.device))
tbt = Data.from_tensor(win, torch.stack([dat for _ in range(siz)]))
fre = Frequency(tbt)

In [9]:
%%timeit
fre('parabola')

48.8 ms ± 613 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [10]:
# Clear
del win, tbt, fre
torch.cuda.synchronize()
torch.cuda.empty_cache()

In [11]:
# SVD

In [12]:
# Test data (cpu)
device = 'cpu'
win = Window(512, 'cosine_window', 1.0, dtype=dtype, device=device)
dat = torch.sin(2*numpy.pi*0.12*torch.linspace(1, len(win), len(win), dtype=win.dtype, device=win.device))
dat = torch.stack([dat for _ in range(512)])
dat = torch.stack([dat for _ in range(64)])
print(dat.shape)
print(dat.device)

torch.Size([64, 512, 512])
cpu


In [13]:
%%timeit
u, s, v = torch.linalg.svd(dat)

4.39 s ± 31.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [14]:
# Clear
del win, dat

In [15]:
# Test data (gpu)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
win = Window(512, 'cosine_window', 1.0, dtype=dtype, device=device)
dat = torch.sin(2*numpy.pi*0.12*torch.linspace(1, len(win), len(win), dtype=win.dtype, device=win.device))
dat = torch.stack([dat for _ in range(512)])
dat = torch.stack([dat for _ in range(64)])
print(dat.shape)
print(dat.device)

torch.Size([64, 512, 512])
cuda:0


In [16]:
%%timeit
u, s, v = torch.linalg.svd(dat)

372 ms ± 7.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [17]:
# Clear
del win, dat
torch.cuda.synchronize()
torch.cuda.empty_cache()

In [18]:
# PV

In [19]:
win = Window(4096, 'cosine_window', 1.0, dtype=torch.float64, device='cpu')
pv_list = ["TEST:STP2:turns_x-I", "TEST:STP4:turns_x-I", "TEST:SRP1:turns_x-I", "TEST:SRP2:turns_x-I", "TEST:SRP3:turns_x-I", "TEST:SRP4:turns_x-I", "TEST:SRP5:turns_x-I", "TEST:SRP6:turns_x-I", "TEST:SRP7:turns_x-I", "TEST:SRP8:turns_x-I", "TEST:SRP9:turns_x-I", "TEST:SIP1:turns_x-I", "TEST:SIP2:turns_x-I", "TEST:SRP10:turns_x-I", "TEST:SRP11:turns_x-I", "TEST:SRP12:turns_x-I", "TEST:SRP13:turns_x-I", "TEST:SRP14:turns_x-I", "TEST:SRP15:turns_x-I", "TEST:SRP16:turns_x-I", "TEST:SRP17:turns_x-I", "TEST:SEP5:turns_x-I", "TEST:SEP4:turns_x-I", "TEST:SEP3:turns_x-I", "TEST:SEP1:turns_x-I", "TEST:SEP0:turns_x-I", "TEST:NEP0:turns_x-I", "TEST:NEP1:turns_x-I", "TEST:NEP3:turns_x-I", "TEST:NEP4:turns_x-I", "TEST:NEP5:turns_x-I", "TEST:NRP17:turns_x-I", "TEST:NRP16:turns_x-I", "TEST:NRP15:turns_x-I", "TEST:NRP14:turns_x-I", "TEST:NRP13:turns_x-I", "TEST:NRP12:turns_x-I", "TEST:NRP11:turns_x-I", "TEST:NRP10:turns_x-I", "TEST:NIP3:turns_x-I", "TEST:NIP1:turns_x-I", "TEST:NRP9:turns_x-I", "TEST:NRP8:turns_x-I", "TEST:NRP7:turns_x-I", "TEST:NRP6:turns_x-I", "TEST:NRP5:turns_x-I", "TEST:NRP4:turns_x-I", "TEST:NRP3:turns_x-I", "TEST:NRP2:turns_x-I", "TEST:NRP1:turns_x-I", "TEST:NTP4:turns_x-I", "TEST:NTP2:turns_x-I", "TEST:NTP0:turns_x-I", "TEST:STP0:turns_x-I"]
pv_rise = [0 for _ in range(len(pv_list))]

In [20]:
%%time
tbt = Data.from_epics(54, win, pv_list, pv_rise)

CPU times: user 650 ms, sys: 120 ms, total: 770 ms
Wall time: 130 ms


In [21]:
!nvidia-smi

Tue Oct 26 13:10:41 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 465.19.01    Driver Version: 465.19.01    CUDA Version: 11.3     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  On   | 00000000:03:00.0  On |                  N/A |
| 34%   44C    P2    48W / 151W |   1531MiB /  8116MiB |      3%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------------------------------------------------------

In [22]:
!nvaccelinfo


CUDA Driver Version:           11030
NVRM version:                  NVIDIA UNIX x86_64 Kernel Module  465.19.01  Fri Mar 19 07:44:41 UTC 2021

Device Number:                 0
Device Name:                   NVIDIA GeForce GTX 1070
Device Revision Number:        6.1
Global Memory Size:            8510701568
Number of Multiprocessors:     15
Concurrent Copy and Execution: Yes
Total Constant Memory:         65536
Total Shared Memory per Block: 49152
Registers per Block:           65536
Warp Size:                     32
Maximum Threads per Block:     1024
Maximum Block Dimensions:      1024, 1024, 64
Maximum Grid Dimensions:       2147483647 x 65535 x 65535
Maximum Memory Pitch:          2147483647B
Texture Alignment:             512B
Clock Rate:                    1771 MHz
Execution Timeout:             Yes
Integrated Device:             No
Can Map Host Memory:           Yes
Compute Mode:                  default
Concurrent Kernels:            Yes
ECC Enabled:   