In [92]:
%load_ext autoreload
%autoreload 2

## Interpolation
Test interpolation of all particles and of some portion of the particles. Question is how the interpolation time and memory scales with number of particles and size of the grid.

In [42]:
%%writefile tmptest.py
import time
from utils_folding import SimulationParticles
import numpy as np
from memory_profiler import profile

@profile
def test():
  SNAPSHOT = '/appalachia/d5/DISK/from_pleiades/snapshots/gmcs0_wind4_gmc9/snapshot_550.hdf5'
  simParticles = SimulationParticles.load_snapshot(SNAPSHOT)

  t0 = time.perf_counter()
  simField3D = simParticles[::16].interp_to_field(Nsize=1024) # [::n] of the particles
  t = time.perf_counter() - t0
  print("Time for interpolation: {} s".format(t))

if __name__ == '__main__':
  test()

Overwriting tmptest.py


In [43]:
import sys
!{sys.executable} -m mprof run --multiprocess --include-children tmptest.py > tmptest.log
!{sys.executable} -m mprof plot -f -o tmptest.png
!code tmptest.png
!code tmptest.log

OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.
root_add_process : started ...
workers_process : started ...
workers using 35 threads.
Workers processed 107181154 numbers, of which were
	0.000e+00 exact calculations (R<Rmin),
	5.896e+07 exact calculations (R>Rmax),
	4.822e+07 trivial calculations,
	0.000e+00 interpolations.

workers_process : ended.
Root processed 107181154 numbers in 3287 CPU-chunks.
root_add_process : ended.
voxelize_gpu function took 27.5132 seconds
In the end, 0 in cpu_queue
Using last profile data.


In [1]:
from utils_folding import SimulationParticles
SNAPSHOT = '/appalachia/d5/DISK/from_pleiades/snapshots/gmcs0_wind4_gmc9/snapshot_550.hdf5'
simParticles = SimulationParticles.load_snapshot(SNAPSHOT)
simField3D = simParticles[::16].interp_to_field(Nsize=1024) # [::n] of the particles

Only the CPU-only flavour of Voxelize is available!
Interpolating velocity field...
Padding:  0.017721710844865554 Lbox:  1.035443421689731 Nsize:  530
Auto padding done. Time elapsed: 0.27 s


OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.
root_add_process : started ...
workers_process : started ...
workers using 35 threads.
Workers processed 39342258 numbers, of which were
	0.000e+00 exact calculations (R<Rmin),
	3.109e+07 exact calculations (R>Rmax),
	8.254e+06 trivial calculations,
	0.000e+00 interpolations.

workers_process : ended.
Root processed 39342258 numbers in 1218 CPU-chunks.
root_add_process : ended.
voxelize_gpu function took 7.3999 seconds
In the end, 0 in cpu_queue


First stamp: 0.52 s
Second stamp: 1.57 s
Third stamp: 2.45 s
Fourth stamp: 2.45 s
Interpolation done. Time elapsed: 10.23 s
Interpolation to Field3D done. Time elapsed: 10.23 s


In [3]:
import numpy as np
True in np.isnan(simField3D.vx)

False

## FFT

In [44]:
%%writefile tmptest.py
# Transforming three components at the same time
import pyfftw
import time
import numpy as np
from memory_profiler import profile
NSIZE = 1024

@profile
def run():
  t0 = time.perf_counter()
  arr = np.random.rand(NSIZE, NSIZE, NSIZE) + 1j * np.random.rand(NSIZE, NSIZE, NSIZE)
  arr = arr.astype('complex64')
  t = time.perf_counter() - t0
  print("Random field creation: {} s".format(t))

  # Create FFTW object
  t0 = time.perf_counter()
  a = pyfftw.empty_aligned((NSIZE, NSIZE, NSIZE), dtype='complex64')
  fft_object = pyfftw.FFTW(a, a, axes=(0, 1, 2), threads=32)
  t = time.perf_counter() - t0
  print("Time for FFTW object creation: {} s".format(t))

  # Perform FFT
  for i in range(5):
    t0 = time.perf_counter()
    arr = fft_object(arr)
    t = time.perf_counter() - t0
    print("Time for FFTW: {} s".format(t))

if __name__ == '__main__':
  run()

Overwriting tmptest.py


In [45]:
import sys
!{sys.executable} -m mprof run --multiprocess --include-children tmptest.py > tmptest.log
!{sys.executable} -m mprof plot -f -o tmptest.png
!code tmptest.png
!code tmptest.log

Using last profile data.


In [38]:
%%writefile tmptest.py
# Transforming each component separately
import pyfftw
import time
import numpy as np
from memory_profiler import profile

NSIZE = 512

@profile
def run():
  arr = np.random.rand(NSIZE, NSIZE, NSIZE) + 1j * np.random.rand(NSIZE, NSIZE, NSIZE)
  arr.astype('complex128')

  # Create FFTW object
  t0 = time.perf_counter()
  a = pyfftw.empty_aligned((NSIZE, NSIZE, NSIZE), dtype='complex128')
  fft_object = pyfftw.FFTW(a, a, axes=(0, 1, 2), threads=32)
  t = time.perf_counter() - t0
  print("Time for FFTW object creation: {} s".format(t))

  # Perform FFT
  for i in range(3):
    t0 = time.perf_counter()
    arr = fft_object(arr)
    t = time.perf_counter() - t0
    print("Time for FFTW: {} s".format(t))

if __name__ == '__main__':
  run()

Overwriting tmptest.py
