A demo of training, predicting and testing a linear classifier using the centered moments of simulated response data. Here we use kernel ridge regression to learn the radius and volume fraction.


In [1]:
# Dependencies
using StatsBase
using JLD
using GLMNet
using Plots 
using LaTeXStrings

In [2]:
# include Machine learning definitions and functions
using MultipleScattering
using MultipleScatteringLearnMoments
#include("../src/MultiplyScattering_learn_moments.jl");

In [3]:
# Load training data
dataname = "bunnytime"; # bunnytimetest-low
datatestname = "bunnytimetest"; # bunnytimetest-extra #bunnytimetest-extra8

train = load_moments(string(dataname,".mnts")); #bunnytime8  # bunnytime-low.
# Load testing data
test = load_moments(string(datatestname,".mnts")); #bunnytimetest8   # bunnytime-extra   # bunnytimetest-low

loading:/home/rgower/.julia/v0.5/MultipleScatteringLearnMoments/test/../data/bunnytime.mnts.jld
loading:/home/rgower/.julia/v0.5/MultipleScatteringLearnMoments/test/../data/bunnytimetest.mnts.jld


In [7]:
## Learning the Radius with kernal ridgeregression#
momnums = [1,2];
optionsr = OptionsML(momnums,[],true) 
#choose a feature map: fmmean, fmmoments, fmofm_full
fmr = fmmoments; 
#choose a kernel: "OrnUhlen", "gauss", "rationquad", "" <-- no kernal. See kernels.jl to add more kernels
kernelname = "OrnUhlen";
#retrieving the regularization parameter lambda and the kernel parameters that were calculated using  cross validation. See cross_valid.ipynb
lambda, kernelparam = get_crossvalid_parameters(fmr,kernelname,"radius",optionsr,dataname);
#setup up the machine learning model.
mlr = setup_ml(train, fmr,  kernelname,optionsr, "radius");
# fitting the radius model
fit_L2(mlr, optionsr,  kernelname, kernelparam, lambda);
# testing the fitted model on the test data. See plots generated in ./figures
Rsqdr = test_linear(test,datatestname,fmr,  mlr,optionsr, "radius");

found ../data/momentsmapOrnUhlen-radius-bunnytime-crossvalid.jld with lambdabest 5.960464477539063e-8 and kernelparam 16.0
(#data, #features): (205, 59)
Fitting momentsmapOrnUhlen-radius
0.0% absolute train error
radius 11.9% absolute test error
R squared error radius= 0.93


In [8]:
## Learning Volfrac kernal ridgeregression
#Choose a kernel from: "OrnUhlen", "gauss", "rationquad", "" <-- not kernal. See kernels.jl to add more kernels
optionsv = OptionsML(momnums,[],true) 
fmv = fmmoments;  # fmmean, fmmoments, fmofm, fmofm_full
kernelname = "OrnUhlen";  #"OrnUhlen", "gauss",
#retrieving the regularization parameter lambda and the kernel parameters that were calculated using  cross validation. See cross_valid.ipynb
lambda, kernelparam = get_crossvalid_parameters(fmv,kernelname,"concen",optionsv,dataname);
# retrieving lambdas that were calculated using  cross validation. See cross_valid.ipynb
mlv = setup_ml(train, fmv,  kernelname,optionsv, "concen");
# fitting the volume model
fit_L2(mlv, optionsv,  kernelname, kernelparam, lambda);
# testing the fitted model on the test data. See plots generated in ./figures
Rsqdv = test_linear(test,datatestname,fmv,  mlv,optionsv, "concen");

found ../data/momentsmapOrnUhlen-concen-bunnytime-crossvalid.jld with lambdabest 0.25 and kernelparam 2.0
(#data, #features): (205, 59)
Fitting momentsmapOrnUhlen-concen
2.9% absolute train error
concen 16.6% absolute test error
R squared error concen= 0.96


In [13]:
## Predict a single particle using the kernal ridgeregression radius and volume models
gridnum = 80;
vl = test[gridnum].label[1] # volume fraction
rd = test[gridnum].label[2] # radius
println("True:      (concentration, radius) = (",round(vl/rd^2,3),", ",round(rd,3), ')');
# Make a single prediction on test set
 vpred = predict_linear(test[gridnum], fmv,  mlv, optionsv)
 rpred = predict_linear(test[gridnum], fmr,  mlr, optionsr)
 println("Predicted: (concentration, radius) = (",round(vpred[1],3),", ",round(rpred[1],3), ')');

True:      (concentration, radius) = (0.121, 1.93)
Predicted: (concentration, radius) = (0.145, 1.092)
