wrote a CI-like testing bash script that sets up conda env, but no CU…

…DA install (that would be crazy). Also removed mark_cuda_test instances (I guess they arent necessary...) and grouped tests into classes (except for PDM). I removed the alignment from the pinned CPU arrays in the cuda transfer functions in LS and NFFT..not sure how important that is but i wanted to make sure that wasnt the source of the errors i was getting during the unit testing.
johnh2o2 · Sep 27, 2017 · 3f5be72 · 3f5be72
1 parent e528ea3
commit 3f5be72
Show file tree

Hide file tree

Showing 8 changed files with 749 additions and 705 deletions.
diff --git a/cuvarbase/ce.py b/cuvarbase/ce.py
@@ -186,7 +186,6 @@ def transfer_freqs_to_gpu(self, **kwargs):
         self.freqs_g.set_async(freqs, stream=self.stream)
 
     def transfer_ce_to_cpu(self, **kwargs):
-        #cuda.memcpy_dtoh_async(self.ce_c, self.ce_g.ptr, stream=self.stream)
         self.ce_g.get_async(stream=self.stream, ary=self.ce_c)
 
     def compute_mag_bin_fracs(self, y, **kwargs):

diff --git a/cuvarbase/cunfft.py b/cuvarbase/cunfft.py
@@ -88,9 +88,11 @@ def allocate_pinned_cpu(self, **kwargs):
         self.nf = kwargs.get('nf', self.nf)
 
         assert(self.nf is not None)
-        self.ghat_c = cuda.aligned_zeros(shape=(self.nf,),
-                                         dtype=self.complex_type,
-                                         alignment=resource.getpagesize())
+        #self.ghat_c = cuda.aligned_zeros(shape=(self.nf,),
+        #                                 dtype=self.complex_type,
+        #                                 alignment=resource.getpagesize())
+        #self.ghat_c = cuda.register_host_memory(self.ghat_c)
+        self.ghat_c = np.zeros(self.nf, dtype=self.complex_type)
         self.ghat_c = cuda.register_host_memory(self.ghat_c)
 
         return self
@@ -370,6 +372,12 @@ def __init__(self, *args, **kwargs):
         self.allocated_memory = []
 
     def m_from_C(self, C, sigma):
+        """ 
+        Returns an estimate for what ``m`` value to use from ``C``,
+        where ``C`` is something like ``err_tolerance/N_freq``.
+
+        Pulled from <https://github.com/jakevdp/nfft>_
+        """
         D = (np.pi * (1. - 1. / (2. * sigma - 1.)))
         return int(np.ceil(-np.log(0.25 * C) / D))
 
@@ -400,6 +408,19 @@ def estimate_m(self, N):
         return self.m_from_C(self.m_tol / N, self.sigma)
 
     def get_m(self, N=None):
+        """ 
+        Returns the ``m`` value for ``N`` frequencies.
+
+        Parameters
+        ----------
+        N: int
+            Number of frequencies, only needed if ``autoset_m`` is ``False``.
+
+        Returns
+        -------
+        m: int
+            The filter radius (in grid points)
+        """
         if self.autoset_m:
             return self.estimate_m(N)
         else:

diff --git a/cuvarbase/lombscargle.py b/cuvarbase/lombscargle.py
@@ -1,9 +1,12 @@
+import resource
+
 import numpy as np
-import pycuda.driver as cuda
 from scipy.special import gamma, gammaln
+
+import pycuda.driver as cuda
 import pycuda.gpuarray as gpuarray
 from pycuda.compiler import SourceModule
-import resource
+
 from .core import GPUAsyncProcess
 from .utils import weights, find_kernel, _module_reader
 from .utils import autofrequency as utils_autofreq
@@ -22,6 +25,19 @@ def check_k0(freqs, k0=None, rtol=1E-2, atol=1E-7):
 
 
 class LombScargleMemory(object):
+    """
+    Container class for allocating memory and transferring
+    data between the GPU and CPU for Lomb-Scargle computations
+
+    Parameters
+    ----------
+    sigma: int
+        The ``sigma`` parameter for the NFFT
+    stream: :class:`pycuda.driver.Stream` instance
+        The CUDA stream used for calculations/data transfer
+    m: int
+        The ``m`` parameter for the NFFT
+    """
     def __init__(self, sigma, stream, m, **kwargs):
 
         self.sigma = sigma
@@ -102,6 +118,7 @@ def __init__(self, sigma, stream, m, **kwargs):
         self.w = kwargs.get('w', None)
 
     def allocate_data(self, **kwargs):
+        """ Allocates memory for lightcurve """
         n0 = kwargs.get('n0', self.n0)
         if self.buffered_transfer:
             n0 = kwargs.get('n0_buffer', self.n0_buffer)
@@ -124,6 +141,10 @@ def allocate_data(self, **kwargs):
         return self
 
     def allocate_grids(self, **kwargs):
+        """
+        Allocates memory for NFFT grids, NFFT precomputation vectors,
+        and the GPU vector for the Lomb-Scargle power
+        """
         k0 = kwargs.get('k0', self.k0)
         n0 = kwargs.get('n0', self.n0)
         if self.buffered_transfer:
@@ -151,19 +172,27 @@ def allocate_grids(self, **kwargs):
         return self
 
     def allocate_pinned_cpu(self, **kwargs):
+        """ Allocates pinned CPU memory for asynchronous transfer of result """
         nf = kwargs.get('nf', self.nf)
         assert(nf is not None)
 
-        self.lsp_c = cuda.aligned_zeros(shape=(nf,), dtype=self.real_type,
-                                        alignment=resource.getpagesize())
+        #self.lsp_c = cuda.aligned_zeros(shape=(nf,), dtype=self.real_type,
+        #                                alignment=resource.getpagesize())
+        #self.lsp_c = cuda.register_host_memory(self.lsp_c)
+        self.lsp_c = np.zeros(nf, dtype=self.real_type)
         self.lsp_c = cuda.register_host_memory(self.lsp_c)
 
         return self
 
     def is_ready(self):
+        """ don't use this. """
         raise NotImplementedError()
 
     def allocate_buffered_data_arrays(self, **kwargs):
+        """
+        Allocates pinned memory for lightcurves if we're reusing
+        this container
+        """
         n0 = kwargs.get('n0', self.n0)
         if self.buffered_transfer:
             n0 = kwargs.get('n0_buffer', self.n0_buffer)
@@ -186,6 +215,7 @@ def allocate_buffered_data_arrays(self, **kwargs):
         return self
 
     def allocate(self, **kwargs):
+        """ Allocate all memory necessary """
         self.nf = kwargs.get('nf', self.nf)
         assert(self.nf is not None)
 
@@ -199,6 +229,7 @@ def allocate(self, **kwargs):
         return self
 
     def setdata(self, **kwargs):
+        """ Sets the value of the data arrays. """
         t = kwargs.get('t', self.t)
         yw = kwargs.get('yw', self.yw)
         w = kwargs.get('w', self.w)
@@ -258,6 +289,7 @@ def setdata(self, **kwargs):
         return self
 
     def transfer_data_to_gpu(self, **kwargs):
+        """ Transfers the lightcurve to the GPU """
         t, yw, w = self.t, self.yw, self.w
 
         assert(not any([arr is None for arr in [t, yw, w]]))
@@ -268,10 +300,11 @@ def transfer_data_to_gpu(self, **kwargs):
         self.w_g.set_async(w, stream=self.stream)
 
     def transfer_lsp_to_cpu(self, **kwargs):
-        cuda.memcpy_dtoh_async(self.lsp_c, self.lsp_g.ptr,
-                               stream=self.stream)
+        """ Asynchronous transfer of LSP result to CPU """
+        self.lsp_g.get_async(ary=self.lsp_c, stream=self.stream)
 
     def fromdata(self, **kwargs):
+        """ Sets and (optionally) allocates memory for data """
         self.setdata(**kwargs)
 
         if kwargs.get('allocate', True):
@@ -280,6 +313,7 @@ def fromdata(self, **kwargs):
         return self
 
     def set_gpu_arrays_to_zero(self, **kwargs):
+        """ Sets all gpu arrays to zero """
         for x in [self.t_g, self.yw_g, self.w_g]:
             if x is not None:
                 x.fill(self.real_type(0), stream=self.stream)