diff --git a/py/redrock/rebin.py b/py/redrock/rebin.py
index 0543bfa0..e13db182 100644
--- a/py/redrock/rebin.py
+++ b/py/redrock/rebin.py
@@ -17,7 +17,7 @@
 # cuda_source contains raw CUDA kernels to be loaded as CUPY module
 cuda_source = r'''
         extern "C" {
-            __global__ void batch_trapz_rebin(const double* x, const double* y, const double* edges, const double* myz, const int* idx, double* result, int nz, int nbin, int nbasis, int nt) {
+            __global__ void batch_trapz_rebin(const double* x, const double* y, const double* edges, const double* myz, const long* idx, double* result, int nz, int nbin, int nbasis, int nt) {
                 // This kernel performs a trapezoidal rebinning for all
                 // redshifts and all bases of a template with either evenly
                 // or unevenly spaced input wavelength grid (QSOs).
@@ -209,7 +209,7 @@ def _trapz_rebin_batch(x, y, edges, myz, results, redshifted_x):
     return
 
 
-def trapz_rebin(x, y, xnew=None, edges=None, myz=None, use_gpu=False):
+def trapz_rebin(x, y, xnew=None, edges=None, myz=None, use_gpu=False, xmin=None, xmax=None):
     """Rebin y(x) flux density using trapezoidal integration between bin edges
     Optionally use GPU helper method to rebin in batch, see trapz_rebin_batch_gpu
     Note - current return array shape is (nz, nbins, nbasis).  Changing to
@@ -258,6 +258,14 @@ def trapz_rebin(x, y, xnew=None, edges=None, myz=None, use_gpu=False):
         myz (array): (optional) redshift array to rebin in batch,
             applying redshifts on-the-fly to x
         use_gpu (boolean): whether or not to use GPU algorithm 
+        xmin (float): (optional) minimum x-value - x[0] will be used if
+            omitted - this is useful to avoid GPU to CPU copying in the case
+            where x is a CuPy array and providing the scalar value results
+            in a large speed gain
+        xmax (float) (optional) maximum x-value - x[-1] will be used if
+            omitted - this is useful to avoid GPU to CPU copying in the case
+            where x is a CuPy array and providing the scalar value results
+            in a large speed gain
 
     Returns:
         array: integrated results with len(results) = len(edges)-1
@@ -280,6 +288,10 @@ def trapz_rebin(x, y, xnew=None, edges=None, myz=None, use_gpu=False):
     else:
         edges = np.asarray(edges)
     nbins = len(edges)-1
+    if xmin is None:
+        xmin = x[0]
+    if xmax is None:
+        xmax = x[-1]
 
     #Use these booleans to determine output array shape based on input
     scalar_z = False
@@ -298,9 +310,11 @@ def trapz_rebin(x, y, xnew=None, edges=None, myz=None, use_gpu=False):
         return np.zeros((0,nbins, 1), dtype=np.float64)
 
     #Must multiply x by 1+z for comparison, only need to look at max/min cases
-    if (edges[0] < x[0]*(1+myz.max()) or edges[-1] > x[-1]*(1+myz.min())):
+    #if (edges[0] < x[0]*(1+myz.max()) or edges[-1] > x[-1]*(1+myz.min())):
+    if (edges[0] < xmin*(1+myz.max()) or edges[-1] > xmax*(1+myz.min())):
         raise ValueError('edges must be within input x range')
 
+
     if (not use_gpu and scalar_z and len(y.shape) == 1):
         #Special case, call _trapz_rebin_1d directly
         result = np.zeros(nbins, dtype=np.float64)
@@ -387,7 +401,7 @@ def _trapz_rebin_batch_gpu(x, y, edges, myz, result_shape):
     #of input wavelengths at each boundary in edges and
     #use serachsorted to find index for each boundary
     e2d = cp.asarray(edges/(1+myz[:,None]))
-    idx = cp.searchsorted(x, e2d, side='right').astype(np.int32)
+    idx = cp.searchsorted(x, e2d, side='right')#.astype(cp.int32)
 
     # Load CUDA kernel
     cp_module = cp.RawModule(code=cuda_source)
@@ -435,6 +449,6 @@ def rebin_template(template, myz, dwave, use_gpu=False):
     #rebin all z and all bases in batch in parallel
     #and return dict of 3-d numpy / cupy arrays 
     for hs, wave in dwave.items():
-        result[hs] = trapz_rebin(template.wave, template.flux, xnew=wave, myz=myz, use_gpu=use_gpu)
+        result[hs] = trapz_rebin(template.wave, template.flux, xnew=wave, myz=myz, use_gpu=use_gpu, xmin=template.minwave, xmax=template.maxwave)
 
     return result
diff --git a/py/redrock/templates.py b/py/redrock/templates.py
index 6b083e14..ae91bf2d 100644
--- a/py/redrock/templates.py
+++ b/py/redrock/templates.py
@@ -110,6 +110,8 @@ def __init__(self, filename=None, spectype=None, redshifts=None,
 
         self._nbasis = self.flux.shape[0]
         self._nwave = self.flux.shape[1]
+        self.minwave = self.wave[0]
+        self.maxwave = self.wave[-1]
 
 
     @property
diff --git a/py/redrock/utils.py b/py/redrock/utils.py
index 21699382..91c1a111 100644
--- a/py/redrock/utils.py
+++ b/py/redrock/utils.py
@@ -252,6 +252,7 @@ def transmission_Lyman(zObj,lObs, use_gpu=False):
         asarray = np.asarray
 
     Lyman_series = constants.Lyman_series
+    min_wave = 0
     if (np.isscalar(zObj)):
         #zObj is a float
         lRF = lObs/(1.+zObj)
@@ -260,7 +261,9 @@ def transmission_Lyman(zObj,lObs, use_gpu=False):
             #Empty z array
             return np.ones((0, len(lObs)), dtype=np.float64)
         #This is an array of float
-        if (lObs.min()/(1+zObj.max()) > Lyman_series['Lya']['line']):
+        min_wave = lObs.min()/(1+zObj.max())
+        #if (lObs.min()/(1+zObj.max()) > Lyman_series['Lya']['line']):
+        if (min_wave > Lyman_series['Lya']['line']):
             #Return None if wavelength range doesn't overlap with Lyman series
             #No need to perform any calculations in this case
             return None
@@ -271,13 +274,14 @@ def transmission_Lyman(zObj,lObs, use_gpu=False):
         lRF = lObs/(1.+asarray(zObj)[:,None])
     T = np.ones_like(lRF)
     for l in list(Lyman_series.keys()):
+        if (min_wave > Lyman_series[l]['line']):
+            continue
         w      = lRF<Lyman_series[l]['line']
         zpix   = lObs[w]/Lyman_series[l]['line']-1.
         tauEff = Lyman_series[l]['A']*(1.+zpix)**Lyman_series[l]['B']
         T[w]  *= np.exp(-tauEff)
     if (np.isscalar(zObj) and use_gpu):
         T = asarray(T)
-
     return T
 
 def getGPUCountMPI(comm):