In [1]:
# import internal files
from historymatch import emulators
from historymatch import sample
from historymatch import historymatch
from historymatch import plot
from historymatch import utils


# import external modules
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import os
import pandas as pd
#import importlib
from matplotlib.patches import Rectangle

import sklearn.linear_model as skl

plt.rcParams.update({'font.size': 10})

np.random.seed(4)

In [2]:
# import data

with open("data/MassEval2016.dat",'r') as infile:
    Masses = pd.read_fwf(infile, usecols=(2,3,4,6,11,12),
              names=('N', 'Z', 'A', 'Element', 'Ebinding', 'E_unc'),
              widths=(1,3,5,5,5,1,3,4,1,13,11,11,9,1,2,11,9,1,3,1,12,11,1),
              header=64,
              index_col=False)
    
# Extrapolated values are indicated by '#' in place of the decimal place, so
# the Ebinding column won't be numeric. Coerce to float and drop these entries.
Masses['Ebinding'] = pd.to_numeric(Masses['Ebinding'], errors='coerce')
Masses = Masses.dropna()
Masses['E_unc'] = pd.to_numeric(Masses['E_unc'], errors='coerce')
Masses = Masses.dropna()
# Convert from keV to MeV.
Masses['Ebinding'] /= 1000
Masses['E_unc'] /= 1000

# Group the DataFrame by nucleon number, A.
Masses = Masses.groupby('A')
# Find the rows of the grouped DataFrame with the maximum binding energy.
Masses = Masses.apply(lambda t: t[t.Ebinding==t.Ebinding.max()])

A = Masses['A'].to_numpy()
Z = Masses['Z'].to_numpy()
N = Masses['N'].to_numpy()
Element = Masses['Element'].to_numpy()
Energies = Masses['Ebinding'].to_numpy()

#Energies_unc = Masses['E_unc'].to_numpy()
Energies_unc = Masses['E_unc'].to_numpy()

In [3]:
# define parameter space

#theta_0_bound = np.array([-20, 20]).reshape(1,-1)
#theta_1_bound = np.array([-0.3, -0.1]).reshape(1,-1)
#theta_2_bound = np.array([0, 3]).reshape(1,-1)
#theta_3_bound = np.array([0, 0.01]).reshape(1,-1) # keep
#theta_4_bound = np.array([-0.3, 0.2]).reshape(1,-1)

theta_0_bound = np.array([3, 6]).reshape(1,-1)
theta_1_bound = np.array([-0.21, -0.19]).reshape(1,-1)
theta_2_bound = np.array([0.8, 1.1]).reshape(1,-1)
theta_3_bound = np.array([0.005, 0.010]).reshape(1,-1) # keep
theta_4_bound = np.array([0.31, 0.33]).reshape(1,-1)


parameter_bounds = np.concatenate((theta_0_bound, theta_1_bound, \
                                   theta_2_bound, theta_3_bound, theta_4_bound), axis=0)

#parameter_bounds = np.concatenate((theta_0_bound, theta_1_bound, theta_2_bound), axis=0)



theta_0_vals = np.linspace(parameter_bounds[0,0], parameter_bounds[0,1], 100)
theta_1_vals = np.linspace(parameter_bounds[1,0], parameter_bounds[1,1], 100)
theta_2_vals = np.linspace(parameter_bounds[2,0], parameter_bounds[2,1], 100)
theta_3_vals = np.linspace(parameter_bounds[3,0], parameter_bounds[3,1], 100)
theta_4_vals = np.linspace(parameter_bounds[4,0], parameter_bounds[4,1], 100)


theta_vals = np.concatenate((theta_0_vals.reshape(1,-1), theta_1_vals.reshape(1,-1), theta_2_vals.reshape(1,-1), theta_3_vals.reshape(1,-1), theta_4_vals.reshape(1,-1)), axis=0)


In [4]:
def LiquidDropModel(a0, a1, a2, a3, a4, A, Z, N):
    
    return a0 + a1*A + a2*(A**(2.0/3.0)) + a3*Z*(Z-1)*(A**(-1.0/3.0)) \
                + a4*((N-Z)**2)/A




In [5]:
# generate observational data with some uncertainty

noutputs = 10

start = 100
step = 20

#variables = np.concatenate((A[-noutputs:].reshape(-1,1),Z[-noutputs:].reshape(-1,1),\
                            #N[-noutputs:].reshape(-1,1)), axis=1)


variables = np.concatenate((A[start::step].reshape(-1,1),Z[start::step].reshape(-1,1),\
                            N[start::step].reshape(-1,1)), axis=1)


In [6]:
# Now we set up the design matrix X
X2 = np.zeros((len(A[-1:]),5))
X2[:,0] = np.ones_like(A[-1:])
X2[:,1] = A[-1:]
X2[:,2] = A[-1:]**(2.0/3.0)
X2[:,3] = Z[-1:]*(Z[-1:]-1) * A[-1:]**(-1.0/3.0)
X2[:,4] = (N[-1:]-Z[-1:])**2 * A[-1:]**(-1.0)


# Now we set up the design matrix X
X = np.zeros((len(A),5))
X[:,0] = np.ones_like(A)
X[:,1] = A
X[:,2] = A**(2.0/3.0)
X[:,3] = Z*(Z-1) * A**(-1.0/3.0)
X[:,4] = (N-Z)**2 * A**(-1.0)

In [7]:
clf = skl.LinearRegression(fit_intercept=False).fit(X, Energies)

print(clf.coef_)

preds = clf.predict(X)

err = np.square(Energies-preds)
pred_sigma = np.sqrt(np.var(err))

print(pred_sigma)


[ 4.74783699 -0.20173376  0.94655788  0.00783107  0.32286627]
0.04119868982872722


In [8]:
# generate observational data (for testing)

sigma_obs = Energies_unc

#https://www.int.washington.edu/users/bertsch/articles/369.pdf
sigma_model = pred_sigma

obs_data = Energies

In [9]:
nwaves = 5
ndim = 5
volshape = 'ellipsoid'

In [10]:
#importlib.reload(historymatch)

# initialise history matching class
HM = historymatch.HistoryMatch(ndim, 'GP', volshape)

In [11]:


ToyModel = historymatch.Simulator(HM)
ToyModel.set_simulator(LiquidDropModel)

HM.set_observations(obs_data[start::step], variables=variables, sigma_obs=sigma_obs, sigma_model=sigma_model)
HM.initialize_volume(parameter_bounds[:,0], parameter_bounds[:,1])

results = HM.run(nwaves=nwaves)

(5, 2)
(5, 2)
Running wave 1


  0%|          | 0/8 [00:00<?, ?it/s]

ein: 
mean em sd : 8.763686474721434
min em sd : 0.443364218626076
max em sd : 19.13411683873309


 12%|█▎        | 1/8 [00:00<00:00,  7.30it/s]

ein: 
mean em sd : 5.478609069415637
min em sd : 0.28879343791974205
max em sd : 12.219360909379256


 38%|███▊      | 3/8 [00:00<00:00,  9.64it/s]

ein: 
mean em sd : 5.471850891809
min em sd : 0.14757099957057798
max em sd : 12.760725559024882
ein: 
mean em sd : 14.863055478802293
min em sd : 0.8013953875842799
max em sd : 24.24324604344342
ein: 
mean em sd : 10.017096258451577
min em sd : 0.21847248961707708
max em sd : 22.75618967977593


 75%|███████▌  | 6/8 [00:00<00:00,  9.38it/s]

ein: 
mean em sd : 5.9143774981928985
min em sd : 0.05438594158050857
max em sd : 13.120047172000401
ein: 
mean em sd : 11.507987381043796
min em sd : 3.898452154376607
max em sd : 16.508233074353374


100%|██████████| 8/8 [00:00<00:00,  9.19it/s]

ein: 
mean em sd : 20.946649697891853
min em sd : 0.41885795309455814
max em sd : 49.96963506393842
(129, 6)
Convergence : False





Relative nonimplausible volume remaining: 0.953
Running wave 2


  0%|          | 0/8 [00:00<?, ?it/s]

ein: 
mean em sd : 5.990073795638871
min em sd : 0.19342339407411202
max em sd : 13.414514685673884


 12%|█▎        | 1/8 [00:00<00:00,  8.59it/s]

ein: 
mean em sd : 13.09084031471404
min em sd : 0.3623611995733786
max em sd : 27.053747383121376


 38%|███▊      | 3/8 [00:00<00:00,  9.12it/s]

ein: 
mean em sd : 20.69281207761253
min em sd : 0.1585243337299145
max em sd : 41.695342869050265
ein: 
mean em sd : 8.248863410949605
min em sd : 0.10537618449036805
max em sd : 15.014797371804628
ein: 
mean em sd : 15.224480455936384
min em sd : 0.26568180653032647
max em sd : 31.372864071772458


 62%|██████▎   | 5/8 [00:00<00:00, 10.43it/s]

ein: 
mean em sd : 17.88380493085932
min em sd : 0.5480269857457013
max em sd : 40.66547948566


 88%|████████▊ | 7/8 [00:00<00:00, 11.62it/s]

ein: 
mean em sd : 10.397828541886723
min em sd : 0.41932890669250544
max em sd : 20.98845741748093
ein: 
mean em sd : 13.029391432775878
min em sd : 0.6616411702685658
max em sd : 26.159857774159864


100%|██████████| 8/8 [00:00<00:00, 10.83it/s]

(30, 6)
Convergence : False





Relative nonimplausible volume remaining: 2.809
Running wave 3


  0%|          | 0/8 [00:00<?, ?it/s]

ein: 
mean em sd : 8.269429632361433
min em sd : 0.4699182737014311
max em sd : 18.575022246506272
ein: 


 25%|██▌       | 2/8 [00:00<00:00, 10.61it/s]

mean em sd : 3.9756015541719454
min em sd : 0.17766300680899658
max em sd : 9.193024104843317
ein: 
mean em sd : 8.73128123497255
min em sd : 0.40809782891402674
max em sd : 14.336054177743982
ein: 
mean em sd : 16.363999828270263
min em sd : 0.4192278881880324
max em sd : 42.105272237456646


 50%|█████     | 4/8 [00:00<00:00, 10.64it/s]

ein: 
mean em sd : 6.498614299197559
min em sd : 0.09229132622413036
max em sd : 15.55540317784766
ein: 
mean em sd : 7.558554628246117
min em sd : 0.13449892419360898
max em sd : 15.808087565325703


 75%|███████▌  | 6/8 [00:00<00:00, 10.48it/s]

ein: 
mean em sd : 98.04590949216524
min em sd : 12.296853894200426
max em sd : 161.27654311566766


100%|██████████| 8/8 [00:00<00:00, 10.55it/s]


ein: 
mean em sd : 17.9039888315745
min em sd : 0.16222613513302744
max em sd : 42.591918635377155
(0, 6)


ValueError: zero-size array to reduction operation minimum which has no identity

In [None]:
plot_wave = 1

print(results.nonimp_bounds)


In [None]:
#importlib.reload(plot)

#fig, axes = plt.subplots(ndim,ndim,figsize=(10,10))

#plot.plotcorner(results.I_samples[4], sample_bounds, ndim, bins=20, Fig=(fig,axes), labels=('a0', 'a1', 'a2', 'a3', 'a4'))
#axes[1,0].scatter(clf.coef_[0],clf.coef_[1], color='red')
#axes[2,0].scatter(clf.coef_[0],clf.coef_[2], color='red')
#axes[2,1].scatter(clf.coef_[1],clf.coef_[2], color='red')
#axes[3,1].scatter(clf.coef_[1],clf.coef_[3], color='red')
#axes[2,3].scatter(clf.coef_[3],clf.coef_[2], color='red')

#for i in range(ndim):
    #axes[i,i].axvline(clf.coef_[i], color='red')

In [12]:
#importlib.reload(plot)

colors = ['turquoise', 'cornflowerblue', 'mediumpurple', 'plum', 'lightpink', ]

sample_bounds = utils.locate_boundaries(results.samples[-1], ndim)
tempbounds = np.concatenate((sample_bounds[-1].reshape(-1,1),sample_bounds[-1].reshape(-1,1)),axis=1).T

true = [4.74783699, -0.20173376,  0.94655788,  0.00783107,  0.32286627]

fig, axes = plt.subplots(ndim, ndim, figsize=(15,15))
if volshape == 'ellipsoid':
    for k in range(nwaves):
        mean = np.mean(results.nonimplausible[k][:,:-1].T, axis=1)
        cov = np.cov(results.nonimplausible[k][:,:-1].T)
        for i in range(ndim):
            for j in range(ndim):
                ax = axes[j,i]
                if i < j:
                    cov_matrix = np.array([[cov[i,i], cov[i,j]],[cov[j,i], cov[j,j]]])
                    plot.get_cov_ellipse(cov_matrix, [mean[i],mean[j]], 3, 5.991, ax, colors[k])
                    #ax.set_xlim([parameter_bounds[i][0],parameter_bounds[i][1]])
                    #ax.set_ylim([parameter_bounds[j][0],parameter_bounds[j][1]])
                    ax.scatter(true[i],true[j])
                elif i == j:
                    #ax.plot(theta_vals[i], stats.norm.pdf(theta_vals[i], mean[i], np.sqrt(cov[i,i])), color='plum')
                    ax.set_xlim([sample_bounds[i][0],sample_bounds[i][1]])
                    #ax.set_title(str(theta_names[i]) + '=' + str(round(theta_best[i], 2)), fontsize=14)
                else:
                    ax.axis('off')
elif volshape == 'hypercube':
    for k in range(nwaves):
        for i in range(ndim):
            for j in range(ndim):
                ax = axes[j,i]
                if i < j:
                    ax.add_patch(Rectangle((results.nonimp_bounds[k][i,0], results.nonimp_bounds[k][j,0]),\
                                           (results.nonimp_bounds[k][i,1]-results.nonimp_bounds[k][i,0]), \
                                           (results.nonimp_bounds[k][j,1]-results.nonimp_bounds[k][j,0]),\
                        color=colors[k],alpha=0.7,label='Nonimp. Hypercube'))
                    ax.set_xlim([parameter_bounds[i][0],parameter_bounds[i][1]])
                    ax.set_ylim([parameter_bounds[j][0],parameter_bounds[j][1]])
                    ax.scatter(true[i],true[j])
                else:
                    ax.axis('off')


NameError: name 'results' is not defined

In [None]:
fig, ax = plt.subplots(figsize=(15,15))

col = np.where(results.I_samples[0][:,-1]<3,'g','r')
#col2 = np.where(results.I_samples[1][:,-1]<3,'g','r')
col = 'red'

#print(np.where(results.I_samples[0][:,-1]<3))
#ax.scatter(results.I_samples[0][:,0], results.I_samples[0][:,1], s=40, color=col, edgecolor='black')

ax.scatter(results.samples[1][:,1], results.samples[1][:,2], s=40, color='gold', edgecolor='black')
ax.scatter(results.nonimplausible[0][:,1], results.nonimplausible[0][:,2], s=40, color='red', edgecolor='black')

#ax.scatter(results.I_samples[1][:,0], results.I_samples[1][:,1], s=40, color=col2, edgecolor='black')

#ax.scatter(results.training_pts[0][:,0], results.training_pts[0][:,1], s=40, color=col, edgecolor='black')

print(results.I_samples[0][19453])
#ax.set_xlim([1,5])
#ax.set_ylim([1,5])
print(true)
