implemented some suggestions by Josef and Ralf

gpanterov · Jul 11, 2012 · b66e698 · b66e698
1 parent 292c0a3
commit b66e698
Show file tree

Hide file tree

Showing 2 changed files with 35 additions and 112 deletions.
diff --git a/statsmodels/nonparametric/KernelFunctions.py b/statsmodels/nonparametric/KernelFunctions.py
@@ -15,6 +15,27 @@
 from scipy.special import erf
 
 
+def _get_shape_and_transform(h, Xi, x = None):
+    """
+    Returns the transformed arrays and shape parameters
+    To be used with the kernel functions in KernelFunctions.py
+    """
+    x = np.asarray(x)
+    h = np.asarray(h, dtype=float)
+    Xi = np.asarray(Xi)
+    if Xi.ndim > 1: # More than one variable with more than one observations
+        K = np.shape(Xi)[1]
+        N = np.shape(Xi)[0]
+    elif Xi.ndim == 1: # One variable with many observations
+        K = 1
+        N = np.shape(Xi)[0]
+    else:  # ndim ==0 so Xi is a single point (number)
+        K = 1
+        N = 1
+    assert N >= K  # Need more observations than variables
+    Xi = Xi.reshape([N, K])
+    return h, Xi, x, N, K
+
 def AitchisonAitken(h, Xi, x, num_levels=False):
     """
     Returns the value of the Aitchison and Aitken's (1976) Kernel. Used for
@@ -60,23 +81,12 @@ def AitchisonAitken(h, Xi, x, num_levels=False):
     ----------
     Nonparametric econometrics : theory and practice / Qi Li and Jeffrey Scott Racine.
     """
+    h, Xi, x, N, K = _get_shape_and_transform(h,Xi,x)
     Xi = np.abs(np.asarray(Xi, dtype=int))
     x = np.abs(np.asarray(x, dtype=int))
 
-    if Xi.ndim > 1:
-        K = np.shape(Xi)[1]
-        N = np.shape(Xi)[0]
-    elif Xi.ndim == 1:
-        K = 1
-        N = np.shape(Xi)[0]
-    else:  # ndim ==0 so Xi is a single point (number)
-        K = 1
-        N = 1    
     if K == 0:
         return Xi
-
-    h = np.asarray(h, dtype=float)
-    Xi = Xi.reshape([N, K])
     c = np.asarray([len(np.unique(Xi[:, i])) for i in range(K)],
                    dtype=int)
     if num_levels:
@@ -127,21 +137,11 @@ def WangRyzin(h, Xi, x):
     Nonparametric econometrics : theory and practice / Qi Li and Jeffrey Scott Racine.
     
     """
+    h, Xi, x, N, K = _get_shape_and_transform(h,Xi,x)
     Xi = np.abs(np.asarray(Xi, dtype=int))
     x = np.abs(np.asarray(x, dtype=int))
-    if Xi.ndim > 1:
-        K = np.shape(Xi)[1]
-        N = np.shape(Xi)[0]
-    elif Xi.ndim == 1:
-        K = 1
-        N = np.shape(Xi)[0]
-    else:  # ndim ==0 so Xi is a single point (number)
-        K = 1
-        N = 1
     if K == 0:
         return Xi
-    h = np.asarray(h, dtype=float)
-    Xi = Xi.reshape([N, K])
     kernel_value = (0.5 * (1 - h) * (h ** abs(Xi - x)))
     kernel_value = kernel_value.reshape([N, K])
     inDom = (Xi == x) * (1 - h)
@@ -171,14 +171,7 @@ def Epanechnikov(h, Xi, x):
     ----------
     Racine, J. "Nonparametric econometrics: A Primer" : Foundations and Trends in Econometrics.
     """    
-    Xi = np.asarray(Xi)
-    x = np.asarray(x)
-    h = np.asarray(h, dtype=float)
-    N = np.shape(Xi)[0]
-    if Xi.ndim > 1:
-        K = np.shape(Xi)[1]
-    else:
-        K = 1
+    h, Xi, x, N, K = _get_shape_and_transform(h,Xi,x)
     if K == 0:
         return Xi
     z = (Xi - x) / h
@@ -193,14 +186,7 @@ def Epanechnikov(h, Xi, x):
 
 
 def Gaussian(h, Xi, x):
-    Xi = np.asarray(Xi)
-    x = np.asarray(x)
-    h = np.asarray(h, dtype=float)
-    N = np.shape(Xi)[0]
-    if Xi.ndim > 1:
-        K = np.shape(Xi)[1]
-    else:
-        K = 1
+    h, Xi, x, N, K = _get_shape_and_transform(h,Xi,x)
     if K == 0:
         return Xi
     z = (Xi - x) / h
@@ -213,14 +199,7 @@ def Gaussian_Convolution(h, Xi, x):
     """
     Calculates the Gaussian Convolution Kernel
     """
-    Xi = np.asarray(Xi)
-    x = np.asarray(x)
-    h = np.asarray(h, dtype=float)
-    N = np.shape(Xi)[0]
-    if Xi.ndim > 1:
-        K = np.shape(Xi)[1]
-    else:
-        K = 1
+    h, Xi, x, N, K = _get_shape_and_transform(h,Xi,x)
     if K == 0:
         return Xi
     z = (Xi - x) / h
@@ -233,18 +212,9 @@ def WangRyzin_Convolution(h, Xi, Xj):
     # This is the equivalent of the convolution case with the Gaussian Kernel
     # However it is not exactly convolution. Think of a better name
     # References http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=4&ved=0CGUQFjAD&url=http%3A%2F%2Feconweb.tamu.edu%2Fli%2FUncond1.pdf&ei=2THUT5i7IIOu8QSvmrXlAw&usg=AFQjCNH4aGzQbKT22sLBbZqHtPOyeFXNIQ
+    h, Xi, x, N, K = _get_shape_and_transform(h,Xi)
     Xi = np.abs(np.asarray(Xi, dtype=int))
     Xj = np.abs(np.asarray(Xj, dtype=int))
-    h = np.asarray(h, dtype=float)
-    if Xi.ndim > 1:
-        K = np.shape(Xi)[1]
-        N = np.shape(Xi)[0]
-    elif Xi.ndim == 1:
-        K = 1
-        N = np.shape(Xi)[0]
-    else:  # ndim ==0 so Xi is a single point (number)
-        K = 1
-        N = 1
     if K == 0:
         return Xi
     Xi = Xi.reshape([N, K])
@@ -263,18 +233,9 @@ def WangRyzin_Convolution(h, Xi, Xj):
 
 
 def AitchisonAitken_Convolution(h, Xi, Xj):
+    h, Xi, x, N, K = _get_shape_and_transform(h,Xi)
     Xi = np.abs(np.asarray(Xi, dtype=int))
     Xj = np.abs(np.asarray(Xj, dtype=int))
-    h = np.asarray(h, dtype=float)
-    if Xi.ndim > 1:
-        K = np.shape(Xi)[1]
-        N = np.shape(Xi)[0]
-    elif Xi.ndim == 1:
-        K = 1
-        N = np.shape(Xi)[0]
-    else:  # ndim ==0 so Xi is a single point (number)
-        K = 1
-        N = 1   
     if K == 0:
         return Xi
     Xi = Xi.reshape([N, K])
@@ -292,14 +253,7 @@ def AitchisonAitken_Convolution(h, Xi, Xj):
     return Ordered
 
 def Gaussian_cdf(h, Xi, x):
-    Xi = np.asarray(Xi)
-    x = np.asarray(x)
-    h = np.asarray(h, dtype=float)
-    N = np.shape(Xi)[0]
-    if Xi.ndim > 1:
-        K = np.shape(Xi)[1]
-    else:
-        K = 1
+    h, Xi, x, N, K = _get_shape_and_transform(h,Xi,x)
     if K == 0:
         return Xi
     cdf = 0.5 * h * (1 + erf((x - Xi) / (h * np.sqrt(2))))

diff --git a/statsmodels/nonparametric/np_tools.py b/statsmodels/nonparametric/np_tools.py
@@ -16,20 +16,7 @@
 Convolution_Kernels = dict(gauss_convolution=kf.Gaussian_Convolution,
                            wangryzin_convolution=kf.WangRyzin_Convolution)
 
-
-def IntegrateSingle(val, pdf):
-    f1 = lambda x: pdf(edat=np.asarray([x]))
-    return quad(f1, -np.Inf, val)[0]
-
-
-def IntegrateDbl(val, pdf):
-    f2 = lambda y, x: pdf(edat=np.asarray([x, y]))
-    return dblquad(f2, -np.Inf, val[0], lambda x: -np.Inf, lambda x: val[1])[0]
-
-def IntegrateTrpl(val, pdf):
-    f3 = lambda z, y, x: pdf(edat=np.asarray([x,y,z]))
-    return tplquad(f3, -np.Inf, val[0], lambda x: -np.Inf, lambda x: val[1],
-                   lambda x, y: -np.Inf, lambda x, y: val[2])[0]
+
 
 class LeaveOneOut(object):
     # Written by Skipper
@@ -109,26 +96,10 @@ def GPKE(bw, tdat, edat, var_type, ckertype='gaussian',
     isordered = np.where(var_type == 'o')[0]
     isunordered = np.where(var_type == 'u')[0]
     edat = np.asarray(edat)
-    #edat = np.squeeze(edat)
     K = len(var_type)
     edat = np.asarray(edat)
-    #edat = np.squeeze(edat)
-##    if tdat.ndim > 1:
-##        N, K = np.shape(tdat)
-##    else:
-##        K = 1
-##        N = np.shape(tdat)[0]
-##        tdat = tdat.reshape([N, K])
-##
-##    if edat.ndim > 1:
-##        N_edat = np.shape(edat)[0]
-##    else:
-##        N_edat = 1
-##        edat = edat.reshape([N_edat, K])
-
     if tdat.ndim == 1 and K == 1:  # one variable many observations
         N = np.size(tdat)
-        #N_edat = np.size(edat)
     elif tdat.ndim == 1 and K > 1:
         N = 1
 
@@ -163,7 +134,7 @@ def GPKE(bw, tdat, edat, var_type, ckertype='gaussian',
 
 
 def GPKE_cond_cdf(bw, tdat, edat, var_type, ckertype='gaussian',
-         okertype='wangryzin', ukertype='aitchisonaitken'):
+         okertype='wangryzin', ukertype='aitchisonaitken', tosum=False):
     # This function is just for testing the conditional cdf
     # It can be reworked and incorporated in the GPKE. TODO
     # The difference from GPKE is that it doesn't sum the products
@@ -177,7 +148,6 @@ def GPKE_cond_cdf(bw, tdat, edat, var_type, ckertype='gaussian',
     edat = np.asarray(edat)
     if tdat.ndim == 1 and K == 1:  # one variable many observations
         N = np.size(tdat)
-        #N_edat = np.size(edat)
     elif tdat.ndim == 1 and K > 1:
         N = 1
 
@@ -208,7 +178,10 @@ def GPKE_cond_cdf(bw, tdat, edat, var_type, ckertype='gaussian',
         ), axis=1)
 
         dens[:,i] = np.prod(Kval, axis=1) * 1. / (np.prod(bw[iscontinuous]))
-    return dens
+    if tosum:
+        return np.sum(dens, axis=0)
+    else:
+        return dens
 
 
 def PKE(bw, tdat, edat, var_type, ckertype='gaussian',
@@ -252,11 +225,9 @@ def PKE(bw, tdat, edat, var_type, ckertype='gaussian',
     isunordered = np.where(var_type == 'u')[0]
     K = len(var_type)
     edat = np.asarray(edat)
-    #edat = np.squeeze(edat)
 
     if tdat.ndim == 1 and K == 1:  # one variable many observations
         N = np.size(tdat)
-        #N_edat = np.size(edat)
     elif tdat.ndim == 1 and K > 1:
         N = 1
 
@@ -276,7 +247,6 @@ def PKE(bw, tdat, edat, var_type, ckertype='gaussian',
     edat = edat.reshape([N_edat, K])
 
     bw = np.reshape(np.asarray(bw), (K, ))  # must remain 1-D for indexing to work
-    # dens = np.empty([N, 1])
     Kval = np.concatenate((
     kernel_func[ckertype](bw[iscontinuous], tdat[:, iscontinuous], edat[:, iscontinuous]),
     kernel_func[okertype](bw[isordered], tdat[:, isordered], edat[:, isordered]),
@@ -301,7 +271,6 @@ def GPKE_Reg(bw, tdat, edat, var_type, ckertype='gaussian',
 
     if tdat.ndim == 1 and K == 1:  # one variable many observations
         N = np.size(tdat)
-        #N_edat = np.size(edat)
     elif tdat.ndim == 1 and K > 1:
         N = 1