A demo of training, predicting and testing a linear classifier using the centered moments of simulated response data. Here we use kernel ridge regression to learn the radius and volume fraction.


In [18]:
# Dependencies
using StatsBase
using LaTeXStrings
using JLD
using GLMNet
using Plots 

In [19]:
# include Machine learning definitions and functions
include("../src/MultiplyScattering_learn_moments.jl");



In [20]:
# Load training data
dataname = "bunnytime8"; # bunnytimetest-low
datatestname = "bunnytimetest8"; # bunnytimetest-extra #bunnytimetest-extra8

train = load_moments(string(dataname,".mnts")); #bunnytime8  # bunnytime-low.
# Load testing data
test = load_moments(string(datatestname,".mnts")); #bunnytimetest8   # bunnytime-extra   # bunnytimetest-low

loading:/home/rgower/Dropbox/Scattering-1.5D/numerics/mom_linear_regression/test/../../Data/bunnytime8.mnts.jld
loading:/home/rgower/Dropbox/Scattering-1.5D/numerics/mom_linear_regression/test/../../Data/bunnytimetest8.mnts.jld


In [36]:
## Learning the Radius with kernal ridgeregression#
momnums = [1,2];
optionsr = OptionsML(momnums,[],true) 
#choose a feature map: fmmean, fmmoments, fmofm_full
fmr = fmmean; 
#choose a kernel: "OrnUhlen", "gauss", "rationquad", "" <-- no kernal. See kernels.jl to add more kernels
kernelname = "OrnUhlen";
#retrieving the regularization parameter lambda and the kernel parameters that were calculated using  cross validation. See cross_valid.ipynb
lambda, kernelparam = get_crossvalid_parameters(fmr,kernelname,"radius",optionsr,dataname);
#setup up the machine learning model.
mlr = setup_ml(train, fmr,  kernelname,optionsr, "radius");
# fitting the radius model
fit_L2(mlr, optionsr,  kernelname, kernelparam, lambda);
# testing the fitted model on the test data. See plots generated in ./figures
Rsqdr = test_linear(test,datatestname,fmr,  mlr,optionsr, "radius");

found ../data/meanmapOrnUhlen-radius-bunnytime8-crossvalid.jld with lambdabest 64.0 and kernelparam 4.0
(#data, #features): (205, 30)
Fitting meanmapOrnUhlen-radius
52.5% absolute train error
radius 28.3% absolute test error
R squared error radius= 0.53


In [39]:
## Learning Volfrac kernal ridgeregression
#Choose a kernel from: "OrnUhlen", "gauss", "rationquad", "" <-- not kernal. See kernels.jl to add more kernels
optionsv = OptionsML(momnums,[],true) 
fmv = fmmoments;  # fmmean, fmmoments, fmofm, fmofm_full
kernelname = "OrnUhlen";  #"OrnUhlen", "gauss",
#retrieving the regularization parameter lambda and the kernel parameters that were calculated using  cross validation. See cross_valid.ipynb
lambda, kernelparam = get_crossvalid_parameters(fmv,kernelname,"concen",optionsv,dataname);
# retrieving lambdas that were calculated using  cross validation. See cross_valid.ipynb
mlv = setup_ml(train, fmv,  kernelname,optionsv, "concen");
# fitting the volume model
fit_L2(mlv, optionsv,  kernelname, kernelparam, lambda);
# testing the fitted model on the test data. See plots generated in ./figures
Rsqdv = test_linear(test,datatestname,fmv,  mlv,optionsv, "concen");

found ../data/momentsmapOrnUhlen-concen-bunnytime8-crossvalid.jld with lambdabest 5.960464477539063e-8 and kernelparam 64.0
(#data, #features): (205, 59)
Fitting momentsmapOrnUhlen-concen
0.0% absolute train error
concen 17.5% absolute test error
R squared error concen= 0.96


In [15]:
## Predict a single particle using the kernal ridgeregression radius and volume models
gridnum = 10;
vl = test[gridnum].label[1]
rd = test[gridnum].label[2]
println("True (concentration, radius) = (",floor(vl*1000)/1000,", ",floor(rd*1000)/1000, ')');
# Make a single prediction on test set
 vpred = predict_linear(test[gridnum], fmv,  mlv, optionsv)
 rpred = predict_linear(test[gridnum], fmr,  mlr, optionsr)
 println("Pred (concentration, radius) = (",floor(vpred[1]*1000)/1000,", ",floor(rpred[1]*1000)/1000, ')');

True (concentration, radius) = (0.03, 1.11)
Pred (concentration, radius) = (0.021, 0.989)
