/
MixtureOfGaussiansExample.java
231 lines (205 loc) · 9.23 KB
/
MixtureOfGaussiansExample.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
/*
* File: MixtureOfGaussiansExample.java
* Authors: Justin Basilico
* Company: Sandia National Laboratories
* Project: Cognitive Foundry
*
* Copyright September 11, 2009, Sandia Corporation.
* Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive
* license for use of this work by or on behalf of the U.S. Government. Export
* of this program may require a license from the United States Government.
* See CopyrightHistory.txt for complete details.
*
*/
package examples;
import gov.sandia.cognition.learning.algorithm.clustering.KMeansClustererWithRemoval;
import gov.sandia.cognition.learning.algorithm.clustering.cluster.GaussianCluster;
import gov.sandia.cognition.learning.algorithm.clustering.cluster.GaussianClusterCreator;
import gov.sandia.cognition.learning.algorithm.clustering.divergence.GaussianClusterDivergenceFunction;
import gov.sandia.cognition.learning.algorithm.clustering.initializer.NeighborhoodGaussianClusterInitializer;
import gov.sandia.cognition.math.matrix.Matrix;
import gov.sandia.cognition.math.matrix.MatrixFactory;
import gov.sandia.cognition.math.matrix.Vector;
import gov.sandia.cognition.math.matrix.VectorFactory;
import gov.sandia.cognition.statistics.distribution.MixtureOfGaussians;
import gov.sandia.cognition.statistics.distribution.MultivariateGaussian;
import java.util.ArrayList;
import java.util.Random;
/**
* This example shows how to learn a MixtureOfGaussians from a set of data. It
* uses a random dataset to provide the training data and then learns a
* MixtureOfGaussians using both a soft learner and a hard learner. The hard
* learner does hard assignment of points to gaussians when learning the
* mixture while the soft learner does soft assignment.
*
* @author Justin Basilico
* @since 3.0
*/
public class MixtureOfGaussiansExample
{
/**
* Runs the example.
*
* @param args
* Command-line arguments (ignored).
*/
public static void main(
final String[] args)
{
// Part 1: Create some data.
// To start with we need some data to learn from. To do this, we are
// going to create a random mixture of gaussians and then generate
// random samples from it. In a real application you would typically
// load in some data set of Vectors instead of generating the data.
// Here are some general parameters for the example.
final int dimensionality = 2;
final double range = 10.0;
final int numSamples = 100;
final int actualNumGaussians = 2;
final int guessedNumGaussians = 2;
// Create a random number generator to use to generate our data.
final Random random = new Random(47);
// Note: Using a Random like this means that each time the example is
// run a same set of data will be generated.
// Create a mixture of gaussians as mechanism to create some random
// data.
final MixtureOfGaussians.PDF actualMixture = createRandomMixtureOfGaussians(
random, actualNumGaussians, dimensionality, range);
// Print out our actual mixture so we can look at it to compare how
// the learner does.
System.out.println("Actual gaussians:");
printMixture(actualMixture);
System.out.println();
// Now we sample from the mixture to create our training dataset.
final ArrayList<Vector> data = actualMixture.sample(random, numSamples);
// Part 2: Using a Soft Learner
// Now that we have some data to use, we create a soft learner to try
// and fit a mixture of gaussians to the example data that we have.
// We construct a new learner object.
// Next we configure the parameters of the soft learner.
// For this example, the only real parameter we are concerned with
// is telling the soft learner how many gaussians to look for. We
// do this by calling the setNumGaussians method.
final MixtureOfGaussians.EMLearner softLearner =
new MixtureOfGaussians.EMLearner( guessedNumGaussians, random );
// Now that our learner is configured, we call the learning algorithm
// by calling the learn method and passing in our dataset. This runs
// the algorithm and then returns the mixture of gaussians it has
// learned from that data.
MixtureOfGaussians.PDF learnedMixture = softLearner.learn(data);
System.out.println("Soft Learned Gaussians: ");
printMixture(learnedMixture);
System.out.println();
// Part 3: Using a Hard Learner
// Using the hard learner is a little more complicated than the soft
// learner because you can pass in the specific clustering algorithm
// that you want it to use.
// We are going to try using the K-means algorithm with removal to
// do hard clustering on the data.
final int maxIterations = 1000;
final double removalThreshold = 0.1;
final KMeansClustererWithRemoval<Vector, GaussianCluster> kmeans =
new KMeansClustererWithRemoval<Vector, GaussianCluster>(
guessedNumGaussians,
maxIterations,new NeighborhoodGaussianClusterInitializer(random),
new GaussianClusterDivergenceFunction(),
new GaussianClusterCreator(),
removalThreshold);
// Note that here we pass in the guessed number of gaussians to the
// constructor for KMeans so that it knows how many clusters to start
// with.
// After we have created our clustering algorithm, we create a new
// hard learner and pass the clustering algorithm to it.
final MixtureOfGaussians.Learner hardLearner =
new MixtureOfGaussians.Learner(kmeans);
// The hard learner does not have any parameters to tune since it just
// uses the parameters of the KMeans algorithm.
// Now we cal the learn method on the hard learner and again get out
// our learned mixture of gaussians.
learnedMixture = hardLearner.learn(data);
System.out.println("Hard Learned Gaussians: ");
printMixture(learnedMixture);
System.out.println();
}
/**
* Prints a mixture of gaussians to System.out.
*
* @param mixture
* The mixture to print.
*/
public static void printMixture(
final MixtureOfGaussians.PDF mixture)
{
// Loop through the mixture and print out the random variables that
// make up the mixture.
for (int i = 0; i < mixture.getDistributionCount(); i++)
{
final MultivariateGaussian gaussian =
mixture.getDistributions().get(i);
// Get some information about the gaussian.
final double prior = mixture.getPriorWeights()[i] / mixture.getPriorWeightSum();
final Vector mean = gaussian.getMean();
final Matrix covariance = gaussian.getCovariance();
System.out.println("Gaussian " + (i + 1));
System.out.println("Prior: " + prior);
System.out.println("Mean: " + mean);
System.out.println("Covariance: ");
System.out.println(covariance);
}
}
/**
* Creates a random mixture of gaussians containing the requested number
* of gaussians inside it of the given dimensionality.
*
* @param random
* The random number generator.
* @param numGaussians
* The number of gaussians to put in the mixture.
* @param dimensionality
* The dimensionality of multivariate gaussians in the mixture.
* @param range
* The range of values to allow the mixture over.
* @return
* A new random mixture of gaussians.
*/
public static MixtureOfGaussians.PDF createRandomMixtureOfGaussians(
final Random random,
final int numGaussians,
final int dimensionality,
final double range)
{
// Create a random set of gaussians to form the mixture.
final ArrayList<MultivariateGaussian.PDF> gaussians =
new ArrayList<MultivariateGaussian.PDF>(numGaussians);
for (int i = 0; i < numGaussians; i++)
{
final MultivariateGaussian.PDF gaussian =
createRandomGaussian(random, dimensionality, range);
gaussians.add(gaussian);
}
return new MixtureOfGaussians.PDF(gaussians);
}
/**
* Creates a random multivariate gaussian.
*
* @param random
* The random number generator.
* @param dimensionality
* The dimensionality of the multivariate gaussian to create.
* @param range
* The range of values for the gaussian
* @return
* A new random multivariate gaussian.
*/
public static MultivariateGaussian.PDF createRandomGaussian(
final Random random,
final int dimensionality,
final double range)
{
return new MultivariateGaussian.PDF(
VectorFactory.getDefault().createUniformRandom(
dimensionality, -range, range, random),
MatrixFactory.getDefault().createIdentity(
dimensionality, dimensionality).scale(0.5 * range));
}
}