## Test

In [1]:
# Import

import epics
import numpy
import pandas
import torch
import nufft

import sys
sys.path.append('..')

from harmonica.window import Window
from harmonica.data import Data
from harmonica.frequency import Frequency

torch.set_printoptions(precision=12, sci_mode=True)
torch.cuda.is_available()

True

In [2]:
!python --version

Python 3.9.0


In [3]:
torch.__version__

'1.9.1'

In [4]:
size = 1024
length = 8192
dtype = torch.float64

In [5]:
# Test data (cpu)
device = 'cpu'
win = Window(length, 'cosine_window', 1.0, dtype=dtype, device=device)
data = torch.sin(2*numpy.pi*0.12*torch.linspace(1, len(win), len(win), dtype=win.dtype, device=win.device))
tbt = Data.from_tensor(win, torch.stack([data for _ in range(size)]))
f = Frequency(tbt)

In [6]:
%%timeit
f('parabola', window=True)

415 ms ± 7.26 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [7]:
# Clear
del win, tbt, f

In [8]:
# Test data (gpu)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
win = Window(length, 'cosine_window', 1.0, dtype=dtype, device=device)
data = torch.sin(2*numpy.pi*0.12*torch.linspace(1, len(win), len(win), dtype=win.dtype, device=win.device))
tbt = Data.from_tensor(win, torch.stack([data for _ in range(size)]))
f = Frequency(tbt)

In [9]:
%%timeit
f('parabola')

48.8 ms ± 662 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [10]:
# Clear
del win, tbt, f
torch.cuda.synchronize()
torch.cuda.empty_cache()

In [11]:
# SVD

In [12]:
# Test data (cpu)
device = 'cpu'
win = Window(512, 'cosine_window', 1.0, dtype=dtype, device=device)
data = torch.sin(2*numpy.pi*0.12*torch.linspace(1, len(win), len(win), dtype=win.dtype, device=win.device))
data = torch.stack([data for _ in range(512)])
data = torch.stack([data for _ in range(64)])
print(data.shape)
print(data.device)

torch.Size([64, 512, 512])
cpu


In [13]:
%%timeit
u, s, v = torch.linalg.svd(data)

4.38 s ± 10.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [14]:
# Clear
del win, data

In [15]:
# Test data (gpu)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
win = Window(512, 'cosine_window', 1.0, dtype=dtype, device=device)
data = torch.sin(2*numpy.pi*0.12*torch.linspace(1, len(win), len(win), dtype=win.dtype, device=win.device))
data = torch.stack([data for _ in range(512)])
data = torch.stack([data for _ in range(64)])
print(data.shape)
print(data.device)

torch.Size([64, 512, 512])
cuda:0


In [16]:
%%timeit
u, s, v = torch.linalg.svd(data)

367 ms ± 361 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [17]:
# Clear
del win, data
torch.cuda.synchronize()
torch.cuda.empty_cache()

In [18]:
# PV

In [19]:
win = Window(4096, 'cosine_window', 1.0, dtype=torch.float64, device='cpu')
pv_list = ["HARMONICA:STP2:turns_x-I", "HARMONICA:STP4:turns_x-I", "HARMONICA:SRP1:turns_x-I", "HARMONICA:SRP2:turns_x-I", "HARMONICA:SRP3:turns_x-I", "HARMONICA:SRP4:turns_x-I", "HARMONICA:SRP5:turns_x-I", "HARMONICA:SRP6:turns_x-I", "HARMONICA:SRP7:turns_x-I", "HARMONICA:SRP8:turns_x-I", "HARMONICA:SRP9:turns_x-I", "HARMONICA:SIP1:turns_x-I", "HARMONICA:SIP2:turns_x-I", "HARMONICA:SRP10:turns_x-I", "HARMONICA:SRP11:turns_x-I", "HARMONICA:SRP12:turns_x-I", "HARMONICA:SRP13:turns_x-I", "HARMONICA:SRP14:turns_x-I", "HARMONICA:SRP15:turns_x-I", "HARMONICA:SRP16:turns_x-I", "HARMONICA:SRP17:turns_x-I", "HARMONICA:SEP5:turns_x-I", "HARMONICA:SEP4:turns_x-I", "HARMONICA:SEP3:turns_x-I", "HARMONICA:SEP1:turns_x-I", "HARMONICA:SEP0:turns_x-I", "HARMONICA:NEP0:turns_x-I", "HARMONICA:NEP1:turns_x-I", "HARMONICA:NEP3:turns_x-I", "HARMONICA:NEP4:turns_x-I", "HARMONICA:NEP5:turns_x-I", "HARMONICA:NRP17:turns_x-I", "HARMONICA:NRP16:turns_x-I", "HARMONICA:NRP15:turns_x-I", "HARMONICA:NRP14:turns_x-I", "HARMONICA:NRP13:turns_x-I", "HARMONICA:NRP12:turns_x-I", "HARMONICA:NRP11:turns_x-I", "HARMONICA:NRP10:turns_x-I", "HARMONICA:NIP3:turns_x-I", "HARMONICA:NIP1:turns_x-I", "HARMONICA:NRP9:turns_x-I", "HARMONICA:NRP8:turns_x-I", "HARMONICA:NRP7:turns_x-I", "HARMONICA:NRP6:turns_x-I", "HARMONICA:NRP5:turns_x-I", "HARMONICA:NRP4:turns_x-I", "HARMONICA:NRP3:turns_x-I", "HARMONICA:NRP2:turns_x-I", "HARMONICA:NRP1:turns_x-I", "HARMONICA:NTP4:turns_x-I", "HARMONICA:NTP2:turns_x-I", "HARMONICA:NTP0:turns_x-I", "HARMONICA:STP0:turns_x-I"]
pv_rise = [0 for _ in range(len(pv_list))]

In [20]:
%%time
tbt = Data.from_epics(54, win, pv_list, pv_rise)

CPU times: user 618 ms, sys: 107 ms, total: 725 ms
Wall time: 128 ms


In [21]:
!nvidia-smi

Fri Nov 26 10:26:23 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 465.19.01    Driver Version: 465.19.01    CUDA Version: 11.3     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  On   | 00000000:03:00.0  On |                  N/A |
| 34%   48C    P2    48W / 151W |   1538MiB /  8116MiB |      9%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [22]:
!nvaccelinfo


CUDA Driver Version:           11030
NVRM version:                  NVIDIA UNIX x86_64 Kernel Module  465.19.01  Fri Mar 19 07:44:41 UTC 2021

Device Number:                 0
Device Name:                   NVIDIA GeForce GTX 1070
Device Revision Number:        6.1
Global Memory Size:            8510701568
Number of Multiprocessors:     15
Concurrent Copy and Execution: Yes
Total Constant Memory:         65536
Total Shared Memory per Block: 49152
Registers per Block:           65536
Warp Size:                     32
Maximum Threads per Block:     1024
Maximum Block Dimensions:      1024, 1024, 64
Maximum Grid Dimensions:       2147483647 x 65535 x 65535
Maximum Memory Pitch:          2147483647B
Texture Alignment:             512B
Clock Rate:                    1771 MHz
Execution Timeout:             Yes
Integrated Device:             No
Can Map Host Memory:           Yes
Compute Mode:                  default
Concurrent Kernels:            Yes
ECC Enabled:                   No
Memory