Skip to content

Commit

Permalink
implemented some suggestions by Josef and Ralf
Browse files Browse the repository at this point in the history
  • Loading branch information
gpanterov committed Jul 11, 2012
1 parent 292c0a3 commit b66e698
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 112 deletions.
104 changes: 29 additions & 75 deletions statsmodels/nonparametric/KernelFunctions.py
Expand Up @@ -15,6 +15,27 @@
from scipy.special import erf


def _get_shape_and_transform(h, Xi, x = None):
"""
Returns the transformed arrays and shape parameters
To be used with the kernel functions in KernelFunctions.py
"""
x = np.asarray(x)
h = np.asarray(h, dtype=float)
Xi = np.asarray(Xi)
if Xi.ndim > 1: # More than one variable with more than one observations
K = np.shape(Xi)[1]
N = np.shape(Xi)[0]
elif Xi.ndim == 1: # One variable with many observations
K = 1
N = np.shape(Xi)[0]
else: # ndim ==0 so Xi is a single point (number)
K = 1
N = 1
assert N >= K # Need more observations than variables
Xi = Xi.reshape([N, K])
return h, Xi, x, N, K

def AitchisonAitken(h, Xi, x, num_levels=False):
"""
Returns the value of the Aitchison and Aitken's (1976) Kernel. Used for
Expand Down Expand Up @@ -60,23 +81,12 @@ def AitchisonAitken(h, Xi, x, num_levels=False):
----------
Nonparametric econometrics : theory and practice / Qi Li and Jeffrey Scott Racine.
"""
h, Xi, x, N, K = _get_shape_and_transform(h,Xi,x)
Xi = np.abs(np.asarray(Xi, dtype=int))
x = np.abs(np.asarray(x, dtype=int))

if Xi.ndim > 1:
K = np.shape(Xi)[1]
N = np.shape(Xi)[0]
elif Xi.ndim == 1:
K = 1
N = np.shape(Xi)[0]
else: # ndim ==0 so Xi is a single point (number)
K = 1
N = 1
if K == 0:
return Xi

h = np.asarray(h, dtype=float)
Xi = Xi.reshape([N, K])
c = np.asarray([len(np.unique(Xi[:, i])) for i in range(K)],
dtype=int)
if num_levels:
Expand Down Expand Up @@ -127,21 +137,11 @@ def WangRyzin(h, Xi, x):
Nonparametric econometrics : theory and practice / Qi Li and Jeffrey Scott Racine.
"""
h, Xi, x, N, K = _get_shape_and_transform(h,Xi,x)
Xi = np.abs(np.asarray(Xi, dtype=int))
x = np.abs(np.asarray(x, dtype=int))
if Xi.ndim > 1:
K = np.shape(Xi)[1]
N = np.shape(Xi)[0]
elif Xi.ndim == 1:
K = 1
N = np.shape(Xi)[0]
else: # ndim ==0 so Xi is a single point (number)
K = 1
N = 1
if K == 0:
return Xi
h = np.asarray(h, dtype=float)
Xi = Xi.reshape([N, K])
kernel_value = (0.5 * (1 - h) * (h ** abs(Xi - x)))
kernel_value = kernel_value.reshape([N, K])
inDom = (Xi == x) * (1 - h)
Expand Down Expand Up @@ -171,14 +171,7 @@ def Epanechnikov(h, Xi, x):
----------
Racine, J. "Nonparametric econometrics: A Primer" : Foundations and Trends in Econometrics.
"""
Xi = np.asarray(Xi)
x = np.asarray(x)
h = np.asarray(h, dtype=float)
N = np.shape(Xi)[0]
if Xi.ndim > 1:
K = np.shape(Xi)[1]
else:
K = 1
h, Xi, x, N, K = _get_shape_and_transform(h,Xi,x)
if K == 0:
return Xi
z = (Xi - x) / h
Expand All @@ -193,14 +186,7 @@ def Epanechnikov(h, Xi, x):


def Gaussian(h, Xi, x):
Xi = np.asarray(Xi)
x = np.asarray(x)
h = np.asarray(h, dtype=float)
N = np.shape(Xi)[0]
if Xi.ndim > 1:
K = np.shape(Xi)[1]
else:
K = 1
h, Xi, x, N, K = _get_shape_and_transform(h,Xi,x)
if K == 0:
return Xi
z = (Xi - x) / h
Expand All @@ -213,14 +199,7 @@ def Gaussian_Convolution(h, Xi, x):
"""
Calculates the Gaussian Convolution Kernel
"""
Xi = np.asarray(Xi)
x = np.asarray(x)
h = np.asarray(h, dtype=float)
N = np.shape(Xi)[0]
if Xi.ndim > 1:
K = np.shape(Xi)[1]
else:
K = 1
h, Xi, x, N, K = _get_shape_and_transform(h,Xi,x)
if K == 0:
return Xi
z = (Xi - x) / h
Expand All @@ -233,18 +212,9 @@ def WangRyzin_Convolution(h, Xi, Xj):
# This is the equivalent of the convolution case with the Gaussian Kernel
# However it is not exactly convolution. Think of a better name
# References http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=4&ved=0CGUQFjAD&url=http%3A%2F%2Feconweb.tamu.edu%2Fli%2FUncond1.pdf&ei=2THUT5i7IIOu8QSvmrXlAw&usg=AFQjCNH4aGzQbKT22sLBbZqHtPOyeFXNIQ
h, Xi, x, N, K = _get_shape_and_transform(h,Xi)
Xi = np.abs(np.asarray(Xi, dtype=int))
Xj = np.abs(np.asarray(Xj, dtype=int))
h = np.asarray(h, dtype=float)
if Xi.ndim > 1:
K = np.shape(Xi)[1]
N = np.shape(Xi)[0]
elif Xi.ndim == 1:
K = 1
N = np.shape(Xi)[0]
else: # ndim ==0 so Xi is a single point (number)
K = 1
N = 1
if K == 0:
return Xi
Xi = Xi.reshape([N, K])
Expand All @@ -263,18 +233,9 @@ def WangRyzin_Convolution(h, Xi, Xj):


def AitchisonAitken_Convolution(h, Xi, Xj):
h, Xi, x, N, K = _get_shape_and_transform(h,Xi)
Xi = np.abs(np.asarray(Xi, dtype=int))
Xj = np.abs(np.asarray(Xj, dtype=int))
h = np.asarray(h, dtype=float)
if Xi.ndim > 1:
K = np.shape(Xi)[1]
N = np.shape(Xi)[0]
elif Xi.ndim == 1:
K = 1
N = np.shape(Xi)[0]
else: # ndim ==0 so Xi is a single point (number)
K = 1
N = 1
if K == 0:
return Xi
Xi = Xi.reshape([N, K])
Expand All @@ -292,14 +253,7 @@ def AitchisonAitken_Convolution(h, Xi, Xj):
return Ordered

def Gaussian_cdf(h, Xi, x):
Xi = np.asarray(Xi)
x = np.asarray(x)
h = np.asarray(h, dtype=float)
N = np.shape(Xi)[0]
if Xi.ndim > 1:
K = np.shape(Xi)[1]
else:
K = 1
h, Xi, x, N, K = _get_shape_and_transform(h,Xi,x)
if K == 0:
return Xi
cdf = 0.5 * h * (1 + erf((x - Xi) / (h * np.sqrt(2))))
Expand Down
43 changes: 6 additions & 37 deletions statsmodels/nonparametric/np_tools.py
Expand Up @@ -16,20 +16,7 @@
Convolution_Kernels = dict(gauss_convolution=kf.Gaussian_Convolution,
wangryzin_convolution=kf.WangRyzin_Convolution)


def IntegrateSingle(val, pdf):
f1 = lambda x: pdf(edat=np.asarray([x]))
return quad(f1, -np.Inf, val)[0]


def IntegrateDbl(val, pdf):
f2 = lambda y, x: pdf(edat=np.asarray([x, y]))
return dblquad(f2, -np.Inf, val[0], lambda x: -np.Inf, lambda x: val[1])[0]

def IntegrateTrpl(val, pdf):
f3 = lambda z, y, x: pdf(edat=np.asarray([x,y,z]))
return tplquad(f3, -np.Inf, val[0], lambda x: -np.Inf, lambda x: val[1],
lambda x, y: -np.Inf, lambda x, y: val[2])[0]


class LeaveOneOut(object):
# Written by Skipper
Expand Down Expand Up @@ -109,26 +96,10 @@ def GPKE(bw, tdat, edat, var_type, ckertype='gaussian',
isordered = np.where(var_type == 'o')[0]
isunordered = np.where(var_type == 'u')[0]
edat = np.asarray(edat)
#edat = np.squeeze(edat)
K = len(var_type)
edat = np.asarray(edat)
#edat = np.squeeze(edat)
## if tdat.ndim > 1:
## N, K = np.shape(tdat)
## else:
## K = 1
## N = np.shape(tdat)[0]
## tdat = tdat.reshape([N, K])
##
## if edat.ndim > 1:
## N_edat = np.shape(edat)[0]
## else:
## N_edat = 1
## edat = edat.reshape([N_edat, K])

if tdat.ndim == 1 and K == 1: # one variable many observations
N = np.size(tdat)
#N_edat = np.size(edat)
elif tdat.ndim == 1 and K > 1:
N = 1

Expand Down Expand Up @@ -163,7 +134,7 @@ def GPKE(bw, tdat, edat, var_type, ckertype='gaussian',


def GPKE_cond_cdf(bw, tdat, edat, var_type, ckertype='gaussian',
okertype='wangryzin', ukertype='aitchisonaitken'):
okertype='wangryzin', ukertype='aitchisonaitken', tosum=False):
# This function is just for testing the conditional cdf
# It can be reworked and incorporated in the GPKE. TODO
# The difference from GPKE is that it doesn't sum the products
Expand All @@ -177,7 +148,6 @@ def GPKE_cond_cdf(bw, tdat, edat, var_type, ckertype='gaussian',
edat = np.asarray(edat)
if tdat.ndim == 1 and K == 1: # one variable many observations
N = np.size(tdat)
#N_edat = np.size(edat)
elif tdat.ndim == 1 and K > 1:
N = 1

Expand Down Expand Up @@ -208,7 +178,10 @@ def GPKE_cond_cdf(bw, tdat, edat, var_type, ckertype='gaussian',
), axis=1)

dens[:,i] = np.prod(Kval, axis=1) * 1. / (np.prod(bw[iscontinuous]))
return dens
if tosum:
return np.sum(dens, axis=0)
else:
return dens


def PKE(bw, tdat, edat, var_type, ckertype='gaussian',
Expand Down Expand Up @@ -252,11 +225,9 @@ def PKE(bw, tdat, edat, var_type, ckertype='gaussian',
isunordered = np.where(var_type == 'u')[0]
K = len(var_type)
edat = np.asarray(edat)
#edat = np.squeeze(edat)

if tdat.ndim == 1 and K == 1: # one variable many observations
N = np.size(tdat)
#N_edat = np.size(edat)
elif tdat.ndim == 1 and K > 1:
N = 1

Expand All @@ -276,7 +247,6 @@ def PKE(bw, tdat, edat, var_type, ckertype='gaussian',
edat = edat.reshape([N_edat, K])

bw = np.reshape(np.asarray(bw), (K, )) # must remain 1-D for indexing to work
# dens = np.empty([N, 1])
Kval = np.concatenate((
kernel_func[ckertype](bw[iscontinuous], tdat[:, iscontinuous], edat[:, iscontinuous]),
kernel_func[okertype](bw[isordered], tdat[:, isordered], edat[:, isordered]),
Expand All @@ -301,7 +271,6 @@ def GPKE_Reg(bw, tdat, edat, var_type, ckertype='gaussian',

if tdat.ndim == 1 and K == 1: # one variable many observations
N = np.size(tdat)
#N_edat = np.size(edat)
elif tdat.ndim == 1 and K > 1:
N = 1

Expand Down

0 comments on commit b66e698

Please sign in to comment.