In this notebook, I compare the output of `SwissFit` against the output of Peter Lapage's [lsqfit](https://github.com/gplepage/lsqfit) on an already-optimized set of starting parameters for a curve collapse analysis of the Ising model. This goal of this comparison is to pick out any differences in the estimate of important statistical quantities from both codes. First, let's load up the appropriate modules and grab both the staring parameters and fit data.

In [1]:
""" External modules """
import gvar as gv # Peter Lepage's GVar module
import lsqfit as lsqfit # Peter Lepage's lsqfit module
import numpy as np # NumPy for number crunching

""" Local modules """
import example_tools # For getting example data

""" SwissFit modules """
from swissfit import fit # SwissFit fitter module
from swissfit.optimizers import scipy_least_squares # Trust region reflective local optimizer
from swissfit.machine_learning import radial_basis # Module for radial basis function network

""" Grab data """
Kl, Kh = 1. / 2.3, 1. / 2.22 # K = J/T, where T is the standard Ising temp.
volumes = ['64', '96', '128', '256'] # Ns values
data, starting_parameters = example_tools.potts2_data('u', Kl, Kh, volumes)
p0 = gv.mean(starting_parameters)

Now let's set up the neural network and fit function as done in the `examples/potts2_example.ipynb` notebook. 

In [14]:
# Define radial basis function network topology
network_topology = {
    'lyr1': { # Hidden layer
        'in': 1, 'out': 2, # Dimension of input & output
        'activation': 'exp', # Exponential activation
    },
    'lyr2': { # Output layer
        'in': 2, 'out': 1,
        'activation': 'linear' # Linear activation
    }
}

# Create radial basis function network
neural_network = radial_basis.RadialBasisNeuralNetwork(network_topology)

# Define fit function for SwissFit
def fit_fcn_swissfit(b, l, p):
    return np.ravel(neural_network.out((b * p['c'][0] - 1.) * l**p['c'][1], p))

# Define fit function for lsqfit
def fit_fcn_lsqfit(p): return fit_fcn_swissfit(*np.transpose(data['x']), p)

Finally, let's go ahead and grab priors for both codes.

In [3]:
# Define function that grabs the fit parameters
def get_priors(code, lmbda_weight, lmbda_center, lmbda_bandwidth, lmbda_bias):
    # Initialize dictionary of priors
    prior = {}
    
    # Prior on critical parameters K_c & 1/nu
    if code == 'swissfit': prior['c'] = [gv.gvar('2.0(2.0)'), gv.gvar('1.0(1.0)')]
    elif code == 'lsqfit': prior['log(c)'] = gv.log([gv.gvar('2.0(2.0)'), gv.gvar('1.0(1.0)')])
    
    # Prior on the RBFN weights
    prior = neural_network.network_priors(
        prior_choice_center = { # Prior for weights
            'lyr1': { # Only for output layer
                'prior_type': 'ridge_regression', # Type of prior
                'mean': 0., # Mean of zero
                'standard_deviation': lmbda_center # Width of lambda
            }
        }, 
        prior_choice_bandwidth = { # Prior for weights
            'lyr1': { # Only for output layer
                'prior_type': 'ridge_regression', # Type of prior
                'mean': 0., # Mean of zero
                'standard_deviation': lmbda_bandwidth # Width of lambda
            }
        }, 
        prior_choice_weight = { # Prior for weights
            'lyr2': { # Only for output layer
                'prior_type': 'ridge_regression', # Type of prior
                'mean': 0., # Mean of zero
                'standard_deviation': lmbda_weight # Width of lambda
            }
        }, 
        prior_choice_bias = { # Prior for weights
            'lyr2': { # Only for output layer
                'prior_type': 'ridge_regression', # Type of prior
                'mean': 0., # Mean of zero
                'standard_deviation': lmbda_bias # Width of lambda
            }
        }, 
        prior = prior # Take in already-specified prior dictionary and modify it
    )

    """ Return prior and p0 """
    return prior

# Get priors for both codes
lmbda_weight, lmbda_center, lmbda_bandwidth, lmbda_bias = 1.8, 10., 5., 5.
prior_swissfit = get_priors('swissfit', lmbda_weight, lmbda_center, lmbda_bandwidth, lmbda_bias)
prior_lsqfit = get_priors('lsqfit', lmbda_weight, lmbda_center, lmbda_bandwidth, lmbda_bias)

Now I fit with `SwissFit`...

In [4]:
# Define a function that transforms the priors into log priors to force positivity on critical parameters
log_priors = {'c': lambda x: gv.log(x)}

# Create SwissFit fit object
fitter = fit.SwissFit(
    udata = data, # Fit data; "data = data" is also acceptable - "udata" means "uncorrelated"
    uprior = prior_swissfit, # Priors; "prior = prior" is also acceptable - "uprior" means "uncorrelated"
    p0 = p0, # Starting values for parameters - chained for empirical Bayes
    fit_fcn = fit_fcn_swissfit, # Fit function
    prior_transformation_fcn = log_priors # Transformation of prior "c" to "log(c)"
)

# Create trust region reflective local optimizer from SciPy
local_optimizer = scipy_least_squares.SciPyLeastSquares(fitter = fitter)

# Do fit
fitter(local_optimizer)
print(fitter)


SwissFit: 🧀
   chi2/dof [dof] = 0.75 [29]   Q = 0.83   (Bayes) 
   chi2/dof [dof] = 0.98 [20]   Q = 0.48   (freq.) 
   AIC [k] = 39.8 [9]   logML = 132.733*

Parameters*:
     c
             1               2.269213(38)   [2.0(2.0)]
             2                 1.0005(27)   [1.0(1.0)]
     lyr1.center
             1                 -2.020(96)   [   0(10)]
             2                  -7.13(76)   [   0(10)]
     lyr1.bandwidth
             1                  0.511(44)   [0.0(5.0)]
             2                  0.074(10)   [0.0(5.0)]
     lyr2.weight
             1                 -0.235(33)   [0.0(1.8)]
             2                  -2.25(57)   [0.0(1.8)]
     lyr2.bias
             1                0.99720(20)   [0.0(5.0)]

Estimator:
   SwissFit optimizer object
*Laplace approximation



And the same fit with [lsqfit](https://github.com/gplepage/lsqfit)...

In [17]:
# Specify starting value for log(c)
p0['log(c)'] = gv.log(p0['c'])

# Do fit & print result
fitter = lsqfit.nonlinear_fit(
    data = data['y'],
    fcn = fit_fcn_lsqfit,
    prior = prior_lsqfit,
    p0 = p0,
)
print(fitter)

Least Square Fit:
  chi2/dof [dof] = 0.75 [29]    Q = 0.83    logGBF = 131.91

Parameters:
        log(c) 0   0.819433 (17)      [  0.7 (1.0) ]  
               1     0.0005 (27)      [  0.0 (1.0) ]  
   lyr1.center 0     -2.020 (96)      [     0 (10) ]  
               1      -7.13 (76)      [     0 (10) ]  
lyr1.bandwidth 0      0.511 (44)      [  0.0 (5.0) ]  
               1      0.074 (10)      [  0.0 (5.0) ]  
   lyr2.weight 0     -0.235 (33)      [  0.0 (1.8) ]  
               1      -2.25 (57)      [  0.0 (1.8) ]  *
     lyr2.bias 0    0.99720 (20)      [  0.0 (5.0) ]  
----------------------------------------------------
             c 0   2.269213 (38)      [  2.0 (2.0) ]  
               1     1.0005 (27)      [  1.0 (1.0) ]  

Settings:
  svdcut/n = 1e-12/0    tol = (1e-08,1e-10,1e-10*)    (itns/time = 8/0.0)
  fitter = scipy_least_squares    method = trf



You may notice, depending on the version of [lsqfit](https://github.com/gplepage/lsqfit), that the marginal likelihood are slightly different. This difference stems from subtleties in how the covariance of the fit parameters is included in the marginal likelihood. In `SwissFit`, it enters the marginal likelihood through the Hessian of $\chi^2_{\mathrm{aug.}}$ and a negative sign is included in the log determinant to account for the fact that the Laplace-estimated covariance is the inverse of the Hessian of $\chi^2_{\mathrm{aug.}}$. This avoids any potential complications that can arise from taking the log determinant of the inverse, which I find can affect the estimate of the marginal likelihood. On the other hand, [lsqfit](https://github.com/gplepage/lsqfit) incorporates the covariance of the fit parameters directly. The slight difference can only be due to the inverse, and I have indeed check that this is the case. Now let's do the same thing, but without any priors.

In [21]:
""" Remove log(c) from p0 """
del p0['log(c)']

""" SwissFit """
# Create SwissFit fit object
fitter = fit.SwissFit(
    udata = data, # Fit data; "data = data" is also acceptable - "udata" means "uncorrelated"
    p0 = p0, # Starting values for parameters - chained for empirical Bayes
    fit_fcn = fit_fcn_swissfit, # Fit function
    prior_transformation_fcn = log_priors # Transformation of prior "c" to "log(c)"
)

# Create trust region reflective local optimizer from SciPy
local_optimizer = scipy_least_squares.SciPyLeastSquares(fitter = fitter)

# Do fit
fitter(local_optimizer)
print(fitter)

""" lsqfit """
# Do fit & print result
fitter = lsqfit.nonlinear_fit(
    udata = data['y'],
    fcn = fit_fcn_lsqfit,
    p0 = p0
)
print(fitter)


SwissFit: 🧀
   chi2/dof [dof] = 0.97 [20]   Q = 0.5   (Bayes) 
   chi2/dof [dof] = 0.97 [20]   Q = 0.5   (freq.) 
   AIC [k] = 37.37 [9]   logML = 153.131*

Parameters*:
     c
             1               2.269213(38)   [n/a]
             2                 1.0009(27)   [n/a]
     lyr1.center
             1                 -2.041(98)   [n/a]
             2                  -7.53(96)   [n/a]
     lyr1.bandwidth
             1                  0.499(43)   [n/a]
             2                  0.069(11)   [n/a]
     lyr2.weight
             1                 -0.244(35)   [n/a]
             2                  -2.59(85)   [n/a]
     lyr2.bias
             1                0.99729(22)   [n/a]

Estimator:
   SwissFit optimizer object
*Laplace approximation

Least Square Fit (no prior):
  chi2/dof [dof] = 0.97 [20]    Q = 0.5    

Parameters:
             c 0   2.269213 (38)      [   2.26921 +- inf ]  
               1     1.0009 (27)      [   1.00055 +- inf ]  
   lyr1.center 0     -2.041 (9