From d6a4bc7d7358444859552a9e9c74ddc63abc6143 Mon Sep 17 00:00:00 2001 From: Brendan Tracey Date: Wed, 26 Oct 2016 00:40:32 -0600 Subject: [PATCH] PR cleanups --- distmv/studentst.go | 76 +++++++++++++++++++++++++-------------------- 1 file changed, 43 insertions(+), 33 deletions(-) diff --git a/distmv/studentst.go b/distmv/studentst.go index a362b22..725e620 100644 --- a/distmv/studentst.go +++ b/distmv/studentst.go @@ -7,14 +7,17 @@ package distmv import ( "math" "math/rand" + "sort" "sync" + "golang.org/x/tools/container/intsets" + "github.com/gonum/floats" "github.com/gonum/matrix/mat64" "github.com/gonum/stat/distuv" ) -// StudentT is a multi-variate StudentT distribution. It is a distribution over +// StudentsT is a multi-variate Student's T distribution. It is a distribution over // ℝ^n with the probability density // p(y) = (Γ((ν+d)/2) / Γ(ν/2)) * (νπ)^(-n/2) * |Ʃ|^(-1/2) * // (1 + 1/ν * (y-μ)^T * Ʃ^-1 * (y-μ))^(-(ν+n)/2) @@ -26,7 +29,8 @@ import ( // the distribution approaches a multi-variate normal distribution. // μ is the mean of the distribution, and the covariance is ν/(ν-2)*Ʃ. // -// See http://users.isy.liu.se/en/rt/roth/student.pdf for more information. +// See https://en.wikipedia.org/wiki/Student%27s_t-distribution and +// http://users.isy.liu.se/en/rt/roth/student.pdf for more information. type StudentsT struct { nu float64 mu []float64 @@ -41,13 +45,12 @@ type StudentsT struct { dim int } -// NewStudentsT creates a new StudentsT with the given scale, mean, and covariance +// NewStudentsT creates a new StudentsT with the given nu, mu, and sigma // parameters. // // NewStudentsT panics if len(mu) == 0, or if len(mu) != sigma.Symmetric(). If -// the covariance matrix is not positive-definite, nil is returned and the -// boolean is false. -func NewStudentsT(nu float64, mu []float64, sigma mat64.Symmetric, src *rand.Rand) (*StudentsT, bool) { +// the covariance matrix is not positive-definite, nil is returned and ok is false. +func NewStudentsT(nu float64, mu []float64, sigma mat64.Symmetric, src *rand.Rand) (dist *StudentsT, ok bool) { if len(mu) == 0 { panic(badZeroDimension) } @@ -64,7 +67,7 @@ func NewStudentsT(nu float64, mu []float64, sigma mat64.Symmetric, src *rand.Ran } copy(s.mu, mu) - ok := s.chol.Factorize(sigma) + ok = s.chol.Factorize(sigma) if !ok { return nil, false } @@ -80,7 +83,8 @@ func NewStudentsT(nu float64, mu []float64, sigma mat64.Symmetric, src *rand.Ran // of dimension 1 is observed, the returned normal represents dimensions {0, 2, ...} // of the original Student's T distribution. // -// ConditionStudentsT returns {nil, false} if there is a failure during the update. +// ok indicates whether there was a failure during the update, and if ok is false +// dist is not usable. // Mathematically this is impossible, but can occur with finite precision arithmetic. func (s *StudentsT) ConditionStudentsT(observed []int, values []float64, src *rand.Rand) (*StudentsT, bool) { if len(observed) == 0 { @@ -107,31 +111,10 @@ func (s *StudentsT) ConditionStudentsT(observed []int, values []float64, src *ra } -// findUnob returns the unobserved variables (the complementary set to observed). -// findUnob panics if any value repeated in observed. -func findUnob(observed []int, dim int) (unobserved []int) { - ob := len(observed) - unob := dim - ob - obMap := make(map[int]struct{}) - for _, v := range observed { - if _, ok := obMap[v]; ok { - panic("distmv: observed dimension occurs twice") - } - obMap[v] = struct{}{} - } - unobserved = make([]int, 0, unob) - for i := 0; i < dim; i++ { - if _, ok := obMap[i]; !ok { - unobserved = append(unobserved, i) - } - } - return unobserved -} - -// studentsTConditional updates a Students T distribution based on the observed samples +// studentsTConditional updates a Student's T distribution based on the observed samples // (see documentation for the public function). The Gaussian conditional update // is treated as a special case when nu == math.Inf(1). -func studentsTConditional(observed []int, values []float64, nu float64, mu []float64, sigma mat64.Symmetric) (newNu float64, newMean []float64, newSigma mat64.Symmetric) { +func studentsTConditional(observed []int, values []float64, nu float64, mu []float64, sigma mat64.Symmetric) (newNu float64, newMean []float64, newSigma *mat64.SymDense) { dim := len(mu) ob := len(observed) @@ -214,6 +197,24 @@ func studentsTConditional(observed []int, values []float64, nu float64, mu []flo return nu + float64(ob), mu1, &sigma11 } +// findUnob returns the unobserved variables (the complementary set to observed). +// findUnob panics if any value repeated in observed. +func findUnob(observed []int, dim int) (unobserved []int) { + setOb := &intsets.Sparse{} + for _, v := range observed { + setOb.Insert(v) + } + setAll := &intsets.Sparse{} + for i := 0; i < dim; i++ { + setAll.Insert(i) + } + setUnob := &intsets.Sparse{} + setUnob.Difference(setAll, setOb) + unobserved = setUnob.AppendTo(nil) + sort.Ints(unobserved) + return unobserved +} + // CovarianceMatrix returns the covariance matrix of the distribution. Upon // return, the value at element {i, j} of the covariance matrix is equal to // the covariance of the i^th and j^th variables. @@ -239,6 +240,7 @@ func (s *StudentsT) Dim() int { return s.dim } +// LogProb computes the log of the pdf of the point x. func (s *StudentsT) LogProb(y []float64) float64 { if len(y) != s.dim { panic(badInputLength) @@ -270,7 +272,11 @@ func (s *StudentsT) LogProb(y []float64) float64 { // p(x_i) = \int_{x_o} p(x_i | x_o) p(x_o) dx_o // where x_i are the dimensions in the input, and x_o are the remaining dimensions. // The input src is passed to the call to NewStudentsT. -func (s *StudentsT) MarginalStudentsT(vars []int, src *rand.Rand) (*StudentsT, bool) { +// +// ok indicates whether there was a failure during the update, and if ok is false +// dist is not usable. +// Mathematically this is impossible, but can occur with finite precision arithmetic. +func (s *StudentsT) MarginalStudentsT(vars []int, src *rand.Rand) (dist *StudentsT, ok bool) { newMean := make([]float64, len(vars)) for i, v := range vars { newMean[i] = s.mu[v] @@ -293,10 +299,14 @@ func (s *StudentsT) Mean(x []float64) []float64 { return x } +// Prob computes the value of the probability density function at x. func (s *StudentsT) Prob(y []float64) float64 { return math.Exp(s.LogProb(y)) } +// Rand generates a random number according to the distributon. +// If the input slice is nil, new memory is allocated, otherwise the result is stored +// in place. func (s *StudentsT) Rand(x []float64) []float64 { // If Y is distributed according to N(0,Sigma), and U is chi^2 with // parameter ν, then @@ -326,7 +336,7 @@ func (s *StudentsT) Rand(x []float64) []float64 { return x } -// setK computes and stores the covariance matrix of the distribution. +// setSigma computes and stores the covariance matrix of the distribution. func (s *StudentsT) setSigma() { s.once.Do(func() { s.sigma = mat64.NewSymDense(s.dim, nil)