Skip to content

Commit

Permalink
wrote a CI-like testing bash script that sets up conda env, but no CU…
Browse files Browse the repository at this point in the history
…DA install (that would be crazy). Also removed mark_cuda_test instances (I guess they arent necessary...) and grouped tests into classes (except for PDM). I removed the alignment from the pinned CPU arrays in the cuda transfer functions in LS and NFFT..not sure how important that is but i wanted to make sure that wasnt the source of the errors i was getting during the unit testing.
  • Loading branch information
johnh2o2 committed Sep 27, 2017
1 parent e528ea3 commit 3f5be72
Show file tree
Hide file tree
Showing 8 changed files with 749 additions and 705 deletions.
1 change: 0 additions & 1 deletion cuvarbase/ce.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,6 @@ def transfer_freqs_to_gpu(self, **kwargs):
self.freqs_g.set_async(freqs, stream=self.stream)

def transfer_ce_to_cpu(self, **kwargs):
#cuda.memcpy_dtoh_async(self.ce_c, self.ce_g.ptr, stream=self.stream)
self.ce_g.get_async(stream=self.stream, ary=self.ce_c)

def compute_mag_bin_fracs(self, y, **kwargs):
Expand Down
27 changes: 24 additions & 3 deletions cuvarbase/cunfft.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,11 @@ def allocate_pinned_cpu(self, **kwargs):
self.nf = kwargs.get('nf', self.nf)

assert(self.nf is not None)
self.ghat_c = cuda.aligned_zeros(shape=(self.nf,),
dtype=self.complex_type,
alignment=resource.getpagesize())
#self.ghat_c = cuda.aligned_zeros(shape=(self.nf,),
# dtype=self.complex_type,
# alignment=resource.getpagesize())
#self.ghat_c = cuda.register_host_memory(self.ghat_c)
self.ghat_c = np.zeros(self.nf, dtype=self.complex_type)
self.ghat_c = cuda.register_host_memory(self.ghat_c)

return self
Expand Down Expand Up @@ -370,6 +372,12 @@ def __init__(self, *args, **kwargs):
self.allocated_memory = []

def m_from_C(self, C, sigma):
"""
Returns an estimate for what ``m`` value to use from ``C``,
where ``C`` is something like ``err_tolerance/N_freq``.
Pulled from <https://github.com/jakevdp/nfft>_
"""
D = (np.pi * (1. - 1. / (2. * sigma - 1.)))
return int(np.ceil(-np.log(0.25 * C) / D))

Expand Down Expand Up @@ -400,6 +408,19 @@ def estimate_m(self, N):
return self.m_from_C(self.m_tol / N, self.sigma)

def get_m(self, N=None):
"""
Returns the ``m`` value for ``N`` frequencies.
Parameters
----------
N: int
Number of frequencies, only needed if ``autoset_m`` is ``False``.
Returns
-------
m: int
The filter radius (in grid points)
"""
if self.autoset_m:
return self.estimate_m(N)
else:
Expand Down
46 changes: 40 additions & 6 deletions cuvarbase/lombscargle.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import resource

import numpy as np
import pycuda.driver as cuda
from scipy.special import gamma, gammaln

import pycuda.driver as cuda
import pycuda.gpuarray as gpuarray
from pycuda.compiler import SourceModule
import resource

from .core import GPUAsyncProcess
from .utils import weights, find_kernel, _module_reader
from .utils import autofrequency as utils_autofreq
Expand All @@ -22,6 +25,19 @@ def check_k0(freqs, k0=None, rtol=1E-2, atol=1E-7):


class LombScargleMemory(object):
"""
Container class for allocating memory and transferring
data between the GPU and CPU for Lomb-Scargle computations
Parameters
----------
sigma: int
The ``sigma`` parameter for the NFFT
stream: :class:`pycuda.driver.Stream` instance
The CUDA stream used for calculations/data transfer
m: int
The ``m`` parameter for the NFFT
"""
def __init__(self, sigma, stream, m, **kwargs):

self.sigma = sigma
Expand Down Expand Up @@ -102,6 +118,7 @@ def __init__(self, sigma, stream, m, **kwargs):
self.w = kwargs.get('w', None)

def allocate_data(self, **kwargs):
""" Allocates memory for lightcurve """
n0 = kwargs.get('n0', self.n0)
if self.buffered_transfer:
n0 = kwargs.get('n0_buffer', self.n0_buffer)
Expand All @@ -124,6 +141,10 @@ def allocate_data(self, **kwargs):
return self

def allocate_grids(self, **kwargs):
"""
Allocates memory for NFFT grids, NFFT precomputation vectors,
and the GPU vector for the Lomb-Scargle power
"""
k0 = kwargs.get('k0', self.k0)
n0 = kwargs.get('n0', self.n0)
if self.buffered_transfer:
Expand Down Expand Up @@ -151,19 +172,27 @@ def allocate_grids(self, **kwargs):
return self

def allocate_pinned_cpu(self, **kwargs):
""" Allocates pinned CPU memory for asynchronous transfer of result """
nf = kwargs.get('nf', self.nf)
assert(nf is not None)

self.lsp_c = cuda.aligned_zeros(shape=(nf,), dtype=self.real_type,
alignment=resource.getpagesize())
#self.lsp_c = cuda.aligned_zeros(shape=(nf,), dtype=self.real_type,
# alignment=resource.getpagesize())
#self.lsp_c = cuda.register_host_memory(self.lsp_c)
self.lsp_c = np.zeros(nf, dtype=self.real_type)
self.lsp_c = cuda.register_host_memory(self.lsp_c)

return self

def is_ready(self):
""" don't use this. """
raise NotImplementedError()

def allocate_buffered_data_arrays(self, **kwargs):
"""
Allocates pinned memory for lightcurves if we're reusing
this container
"""
n0 = kwargs.get('n0', self.n0)
if self.buffered_transfer:
n0 = kwargs.get('n0_buffer', self.n0_buffer)
Expand All @@ -186,6 +215,7 @@ def allocate_buffered_data_arrays(self, **kwargs):
return self

def allocate(self, **kwargs):
""" Allocate all memory necessary """
self.nf = kwargs.get('nf', self.nf)
assert(self.nf is not None)

Expand All @@ -199,6 +229,7 @@ def allocate(self, **kwargs):
return self

def setdata(self, **kwargs):
""" Sets the value of the data arrays. """
t = kwargs.get('t', self.t)
yw = kwargs.get('yw', self.yw)
w = kwargs.get('w', self.w)
Expand Down Expand Up @@ -258,6 +289,7 @@ def setdata(self, **kwargs):
return self

def transfer_data_to_gpu(self, **kwargs):
""" Transfers the lightcurve to the GPU """
t, yw, w = self.t, self.yw, self.w

assert(not any([arr is None for arr in [t, yw, w]]))
Expand All @@ -268,10 +300,11 @@ def transfer_data_to_gpu(self, **kwargs):
self.w_g.set_async(w, stream=self.stream)

def transfer_lsp_to_cpu(self, **kwargs):
cuda.memcpy_dtoh_async(self.lsp_c, self.lsp_g.ptr,
stream=self.stream)
""" Asynchronous transfer of LSP result to CPU """
self.lsp_g.get_async(ary=self.lsp_c, stream=self.stream)

def fromdata(self, **kwargs):
""" Sets and (optionally) allocates memory for data """
self.setdata(**kwargs)

if kwargs.get('allocate', True):
Expand All @@ -280,6 +313,7 @@ def fromdata(self, **kwargs):
return self

def set_gpu_arrays_to_zero(self, **kwargs):
""" Sets all gpu arrays to zero """
for x in [self.t_g, self.yw_g, self.w_g]:
if x is not None:
x.fill(self.real_type(0), stream=self.stream)
Expand Down

0 comments on commit 3f5be72

Please sign in to comment.