A demo of training, predicting and testing a linear classifier using the centered moments of simulated response data. Here we use a L1 regularizor to select the most expressive moments.


In [1]:
# include Machine learning definitions and functions
using MultipleScattering
using MultipleScatteringLearnMoments

In [2]:
# Load training data
dataname = "bunnytime";
datatestname = "bunnytimetest";
train = load_moments(string(dataname,".mnts")); #bunnytime8  # bunnytime-low.
# Load testing data
test = load_moments(string(datatestname,".mnts")); #bunnytimetest8   # bunnytime-extra  # bunnytimetest-extra # bunnytimetest-low

loading:/home/rgower/.julia/v0.5/MultipleScatteringLearnMoments/test/../data/bunnytime.mnts.jld
loading:/home/rgower/.julia/v0.5/MultipleScatteringLearnMoments/test/../data/bunnytimetest.mnts.jld


In [3]:
## Using Lasso as a feature selection for the moments
## Learning the radius with Lasso and a feature map
momnums = [1,2,3,4,5,6];
optionsr = OptionsML(momnums,[],true) 
#Choose a feature map: fmmean, fmmoments,  fmofm
fmr = fmmoments; 
lambda = 0.0; # 0.0 for default choice of cross-validation grid, "GLMNet" lets GLMNet choose the grid
mlr = setup_ml(train, fmr, "L1",optionsr, "radius");
# fitting the radius model
fit_L1(mlr, optionsr, lambda);
Rsqdr = test_linear(test, datatestname, fmr,  mlr,optionsr, "radius");

R squared error radius= 0.84


In [4]:
## Plotting the weight attributed to eat moment. The plot shows that the 3rd moment is not important
lengthresp = length(train[1].moments[1]);
smon = size(train[1].moments[momnums]);
mom_weights_square_r = reshape( abs.(mlr.parameters[1:end-1]), smon[1], lengthresp);
heatmap(train[1].x_arr,1:smon[1], mom_weights_square_r', xlabel="time", ylabel="moment",fill=(true,cgrad(:blues,[0,0.1,1.0])))#
savefig("../figures/weights-heatmap-$(mlr.name).pdf");

LoadError: UndefVarError: heatmap not defined

In [5]:
## Using Lasso as a feature selection for the moments
## Learning the concentration with Lasso and a feature map
momnums = [1,2,3,4,5,6]; 
optionsv = OptionsML(momnums,[],true) 
#Choose a feature map: fmmean, fmmoments,  fmofm,
fmv = fmmoments; 
lambda = 0.0; # 0.0 for default choice of cross-validation grid, "GLMNet" lets GLMNet choose the grid
# retrieving lambdas that were calculated using  cross validation. See cross_valid.ipynb
mlv = setup_ml(train, fmv, "L1",optionsv, "concen");
# fitting the concentration model
fit_L1(mlv, optionsv, lambda);
Rsqdr = test_linear(test, datatestname, fmv,  mlv,optionsv, "concen");

(#data, #features): (205, 175)
Fitting momentsmapL1-concen
8.3% absolute train error
concen 22.0% absolute test error
R squared error concen= 0.93


In [6]:
## Plotting the weight attributed to eat moment. The plot shows that the 3rd moment is not important
lengthresp = length(train[1].moments[1]);
smon = size(train[1].moments[momnums]);
mom_weights_square_v = reshape( abs.(mlv.parameters[1:end-1]), smon[1], lengthresp);
heatmap(train[1].x_arr,1:smon[1], mom_weights_square_v', xlabel="time", ylabel="moment",fill=(true,cgrad(:blues,[0,0.1,1.0])))#
savefig("../figures/weights-heatmap-$(mlv.name).pdf");

LoadError: UndefVarError: heatmap not defined

In [7]:
## Predict a single particle using the kernal ridgeregression radius and volume models
particlenum = 10;
vl = test[particlenum].label[1]
rd = test[particlenum].label[2]
println("(vl, rd) =",vl,',',rd);
# Make a single prediction on test set
 vpred = predict_linear(test[particlenum], fmv,  mlv, optionsv)
 rpred = predict_linear(test[particlenum], fmr,  mlr, optionsr)
 println("(vpred, rpred) =",floor(vpred[1]*1000)/1000,',',floor(rpred[1]*1000)/1000);

(vl, rd) =0.03,1.11
(vpred, rpred) =0.023,1.062


In [8]:
## Using Lasso as a feature selection for the moments
## Learning the volume fraction with Lasso and a feature map
momnums = [1,2,3,4,5,6];
optionsv = OptionsML(momnums,[],true) 
#Choose a feature map: fmmean, fmmoments,  fmofm,
fmv = fmmoments; 
lambda = 0.0; # 0.0 for default choice of cross-validation grid, "GLMNet" lets GLMNet choose the grid
# retrieving lambdas that were calculated using  cross validation. See cross_valid.ipynb
mlv = setup_ml(train, fmv, "L1",optionsv, "volumefraction");
# fitting the concentration model
fit_L1(mlv, optionsv, lambda);
Rsqdr = test_linear(test, datatestname, fmv,  mlv,optionsv, "volumnefraction");

(#data, #features): (205, 175)
Fitting momentsmapL1-volumefraction
28.9% absolute train error
volumnefraction 84.6% absolute test error
R squared error volumnefraction= -0.02
