# PESNet

This notebook contains a schematic neural network architecture for producing potential energy surfaces for given nucleus. The input to the example network is [A,Z,Q20,Q30], with the output being the energy. 

The philosophy behind this structure is to produce a network that can quickly produce full surfaces for nuclei, perhaps as a guiding light for a HFB calculation. The network isn't expected to work well for extrapolation beyond the training set of nuclei, where the PES may have exotic behavior.

A secondary method of training is to actively exclude nuclei to benchmark the performance on whole surfaces. This can be done for a few nuclei, though one should probably disable the test split if you're excluding several whole nuclei.

An alternate method (and thus a good one for a final project!) to produce surfaces would be to train a network that, instead, takes a few points as input (say 5%-10% of a surface) to then supersample the rest of the surface.

In [None]:
import os
import tensorflow as tf
from tensorflow.keras import backend as K
import numpy as np
import h5py
import pandas as pd
import sklearn.linear_model as skl
import sklearn as sk
from pylab import plt, mpl
from pickle import dump, load
#from sklearn.externals import joblib
from sklearn.preprocessing import MinMaxScaler,RobustScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.metrics import confusion_matrix, classification_report

plt.style.use('seaborn')
mpl.rcParams['font.family'] = 'serif'

if tf.test.gpu_device_name():
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))
else:
    print("Please install GPU version of TF for faster training")

print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
def MakePlot(x,y, styles, labels, axlabels):
    plt.figure(figsize=(10,6))
    for i in range(len(x)):
        plt.plot(x[i], y[i], styles[i], label = labels[i])
        plt.xlabel(axlabels[0])
        plt.ylabel(axlabels[1])
    plt.legend(loc=0)
    
# R2 metric for tensorflow from https://jmlb.github.io/ml/2017/03/20/CoeffDetermination_CustomMetric4Keras/
def R2(y_true, y_pred):
    SS_res =  K.sum(K.square( y_true-y_pred )) 
    SS_tot = K.sum(K.square( y_true - K.mean(y_true) ) ) 
    return ( 1 - SS_res/(SS_tot + K.epsilon()) )

def PESpredict(X, model, scaler):
    X = np.asarray(X)
    X = X.T
    xx = scaler.transform(X)
    pes=model.predict(xx)
    
    return pes.T.squeeze()


def NucError(A,Z,df,model,scaler):
    PES_sub = df[(df['A'] == A) & (df['Z'] == Z)]

    print("Nucleus: A =",int(A)," Z =",int(Z))

    A = PES_sub['A']
    Z = PES_sub['Z']
    Q20 = PES_sub['Q20']
    Q30 = PES_sub['Q30']
    E = PES_sub['HFB_cubic']

    xx = (A.to_numpy(),Z.to_numpy(),Q20.to_numpy(),Q30.to_numpy())

    pes = PESpredict(xx, model, scaler)

    print("Chosen nucleus error: ",sum((pes - np.asarray(E))**2)/pes.size)
    
def PlotSurface(A,Z,df,model,scaler):
    PES_sub = df[(df['A'] == A) & (df['Z'] == Z)]

    A = PES_sub['A']
    Z = PES_sub['Z']
    Q20 = PES_sub['Q20']
    Q30 = PES_sub['Q30']
    E = PES_sub['HFB_cubic']

    xx = (A.to_numpy(),Z.to_numpy(),Q20.to_numpy(),Q30.to_numpy())

    pes = PESpredict(xx, model, scaler)
    
    fig, axs = plt.subplots(1,2,figsize=(20,8), gridspec_kw={'width_ratios': [0.8, 1]})

    # Plot the surface.
    cmp = 'coolwarm'

    surf = axs[0].contour(np.reshape(Q20.to_numpy(),(126,21)), np.reshape(Q30.to_numpy(),(126,21)), np.reshape(E.to_numpy(),(126,21)))#, cmap=mpl.cm.seismic)
    axs[0].set_title("From Data")
    img = axs[0].imshow(np.reshape(E.to_numpy(),(126,21)).T, extent=[0, 250, 0, 60], origin='lower',
           cmap=cmp,interpolation='bilinear',aspect="auto")
    surf = axs[1].contour(np.reshape(Q20.to_numpy(),(126,21)), np.reshape(Q30.to_numpy(),(126,21)), np.reshape(pes,(126,21)))#, cmap=mpl.cm.seismic)
    axs[1].set_title("From NN")
    img = axs[1].imshow(np.reshape(pes,(126,21)).T, extent=[0, 250, 0, 60], origin='lower',
           cmap=cmp,interpolation='bilinear',aspect="auto")
    fig.colorbar(img)

    plt.show()

# Where to save the figures and data files
PROJECT_ROOT_DIR = "Results"
FIGURE_ID = "Results/FigureFiles"
DATA_ID = "Data/"

if not os.path.exists(PROJECT_ROOT_DIR):
    os.mkdir(PROJECT_ROOT_DIR)

if not os.path.exists(FIGURE_ID):
    os.makedirs(FIGURE_ID)

if not os.path.exists(DATA_ID):
    os.makedirs(DATA_ID)

def image_path(fig_id):
    return os.path.join(FIGURE_ID, fig_id)

def data_path(dat_id):
    return os.path.join(DATA_ID, dat_id)

def save_fig(fig_id):
    plt.savefig(image_path(fig_id) + ".png", format='png')

The following cell can be ran once you have a trained model and saved scaler to spot check the performance of the network for certain nuclei

In [None]:
def model_check():
    model = tf.keras.models.load_model('saved_model/PESNet.model',custom_objects={'R2':R2})
    xscaler = load( open( 'pickles/xscaler.pkl', "rb" ) )

    infile = open(data_path("All.dat"),'r')

    PESfull = pd.read_csv(infile,delim_whitespace=True,low_memory=False)

    NucError(304,100,PESfull,model,xscaler)
    NucError(232,88,PESfull,model,xscaler)
    NucError(264,94,PESfull,model,xscaler)
    NucError(282,94,PESfull,model,xscaler)
    NucError(360,110,PESfull,model,xscaler)
    NucError(238,94,PESfull,model,xscaler)
    NucError(256,98,PESfull,model,xscaler)
    NucError(324,108,PESfull,model,xscaler)

    PlotSurface(304,100,PESfull,model,xscaler)
    PlotSurface(232,88,PESfull,model,xscaler)
    
    return

### Wrangling our data

First, let's load in the data. I've shoved all the potential energy surfaces into a file called `All.dat`, so we'll load that into a dataframe and get started

In [None]:
infile = open(data_path("All.dat"),'r')

PES = pd.read_csv(infile,delim_whitespace=True,low_memory=False)

A = PES['A']
Z = PES['Z']
Q20 = PES['Q20']
Q30 = PES['Q30']
E = PES['HFB_cubic']

Now we will put together the input numpy arrays for tensorflow. I'll also do the train/test splitting here as well as some scaling -- this would be a good place to explore changing things around to squeeze some additional performance out of your model!

In [None]:
# Build input array
xx = (A.to_numpy(),Z.to_numpy(),Q20.to_numpy(),Q30.to_numpy())
xx = np.asarray(xx)
xx = xx.T

yy = np.asarray(E).reshape(-1, 1)

# Get a test set for later
x_train, x_test, y_train, y_test = train_test_split(xx,yy,test_size=0.25,random_state=42)
x_train,y_train = sk.utils.shuffle(x_train,y_train, random_state=42)

# Scale input
xscaler = RobustScaler(unit_variance=True)# Try out MinMaxScaler() or other ones!
xscaler.fit(xx)

xs_train = xscaler.transform(x_train)
xs_test = xscaler.transform(x_test)
xs_full = xscaler.transform(xx)

# Scale output with the following lines.
# I don't scale anything, but give it a try and see how it affects your results!

#yscaler = MinMaxScaler()
#yscaler.fit(y_train)

ys_train = y_train #yscaler.transform(y_train)
ys_test = y_test #yscaler.transform(y_test)
ys_full = yy #yscaler.transform(yy)





### Building the network

Now we actually build our neural network! I have some skeleton code for the input and here, but I don't want to bias you too much -- play with keras layer types, node configurations, activation functions, input information, etc. 

For information on the keras API, check the documentation: https://keras.io/api/

In [None]:
nodes = # This is a handy helper variable for programmatic node counts
activation = # I recommend you pick something nice from tf.keras.layers

model=tf.keras.Sequential() #Define the model object

# Input layer assuming we pass in A, Z, Q20, Q30
model.add(tf.keras.layers.Dense(nodes,input_shape=(4,),activation=activation))

# Hidden layers
model.add()


# Here's an output assuming you just want to output the energy
model.add(tf.keras.layers.Dense(1))

model.compile(tf.keras.optimizers.Adam(lr=0.001),loss='mean_squared_error',metrics=[R2]) #Adam optimizer and mean squared error loss

# Try different optimizers too! It will help you get a feel for what each one actually does

#model.compile(tf.keras.optimizers.Adadelta(),loss='mean_squared_error',metrics=[R2])


### Training the model and checking the performance

With our bespoke neural network in hand, let's train! I have some reasonable defaults here, but try new things! Break the optimizer! Crash your computer! The possibilities are endless.

I also have some handy plotting features and loss computation down below, but by all means plot different quantities to see if they reveal something intriguing.

In [None]:
results=model.fit(xs_train,ys_train,epochs=100, batch_size=512, validation_split=0.25,verbose=1,shuffle=True)

In [None]:
history = results.history
fig, ax = plt.subplots(figsize=(10, 10))

plt.plot(history["loss"], label="training loss")
plt.plot(history["val_loss"], label="validation loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
ax.set_yscale("log", nonpositive='clip')
#plt.ylim(0,1)
plt.show()

In [None]:
# test loss calculation

#[test_loss,test_R2]=model.evaluate(xs_test, ys_test, verbose=1)

#print('Test Loss: {:.04}'.format(test_loss))
#print('Test R2: {:.04}'.format(test_R2))

testen_out=model.predict(xs_test)
trainen_out=model.predict(xs_train)
fullen_out=model.predict(xs_full)

# Shift back for plotting

x_train = xscaler.inverse_transform(xs_train)
x_test = xscaler.inverse_transform(xs_test)
x_full = xscaler.inverse_transform(xs_full)

trainen_out = trainen_out.T.squeeze()#yscaler.inverse_transform(trainen_out).T.squeeze()
testen_out = testen_out.T.squeeze()#yscaler.inverse_transform(testen_out).T.squeeze()
fullen_out = fullen_out.T.squeeze()#yscaler.inverse_transform(fullen_out).T.squeeze()

a_test=np.sum(x_test,axis=1)
a_train=np.sum(x_train,axis=1)
a_full=np.sum(x_full,axis=1)

en_test=np.asarray(y_test).squeeze()
trainen_test=np.asarray(y_train).squeeze()
fullen_test=np.asarray(yy).squeeze()

diff = en_test-testen_out
traindiff = trainen_test-trainen_out
fulldiff = fullen_test-fullen_out

diff = np.asarray(diff.T).squeeze()
fulldiff = np.asarray(fulldiff.T).squeeze()
traindiff = np.asarray(traindiff.T).squeeze()

x_test_even = x_test[(np.rint(x_test[:,0])%2==0) & (np.rint(x_test[:,1])%2==0)]
diff_even = diff[(np.rint(x_test[:,0])%2==0) & (np.rint(x_test[:,1])%2==0)]
x_train_even = x_train[(np.rint(x_train[:,0])%2==0) & (np.rint(x_train[:,1])%2==0)]
traindiff_even = traindiff[(np.rint(x_train[:,0])%2==0) & (np.rint(x_train[:,1])%2==0)]

The following cell in particular computes and plots the results for a chosen nucleus. I recommend doing this sort of investigation if you're trying to improve performance in certain regions of the nuclear chart and have some standard benchmark to compare to.

In [None]:
model_mse = model

#PES = PESfull[ (((PESfull['A'] != 304) & (PESfull['Z'] != 100)))]
PESfull = PES
A=360.0
Z=110.0

PES_sub = PESfull[(PESfull['A'] == A) & (PESfull['Z'] == Z)]

print("Nucleus: A =",int(A)," Z =",int(Z))

A = PES_sub['A']
Z = PES_sub['Z']
Q20 = PES_sub['Q20']
Q30 = PES_sub['Q30']
E = PES_sub['HFB_cubic']

xx = (A.to_numpy(),Z.to_numpy(),Q20.to_numpy(),Q30.to_numpy())
xx = np.asarray(xx)
xx = xx.T

xs_full = xscaler.transform(xx)

yy = np.asarray(E).reshape(-1, 1)

#ys_full = yscaler.transform(yy)

#model.evaluate(xs_test, ys_test, verbose=1)
pes=model_mse.predict(xs_full)

# Shift back for plotting

x_full = xscaler.inverse_transform(xs_full)

fullen_out = pes.T.squeeze()#yscaler.inverse_transform(pes).T.squeeze()

print("Chosen nucleus error: ",sum((fullen_out - np.asarray(E))**2)/fullen_out.size)

#print(traindiff)
t2 = traindiff**2
print("Training error: ",sum(t2)/traindiff.size)

t2 = diff**2
print("Test set error: ",sum(t2)/diff.size)


fig, axs = plt.subplots(1,2,figsize=(20,8), gridspec_kw={'width_ratios': [0.8, 1]})
#im = ax.imshow(np.reshape(Q20.to_numpy(),(126,21)), np.reshape(Q30.to_numpy(),(126,21)), np.reshape(E.to_numpy(),(126,21)), interpolation='bilinear', origin='lower')#,cmap=mpl.cm.seismic)

# Plot the surface.

cmp = 'coolwarm'

surf = axs[0].contour(np.reshape(Q20.to_numpy(),(126,21)), np.reshape(Q30.to_numpy(),(126,21)), np.reshape(E.to_numpy(),(126,21)))#, cmap=mpl.cm.seismic)
axs[0].set_title("From Data")
img = axs[0].imshow(np.reshape(E.to_numpy(),(126,21)).T, extent=[0, 250, 0, 60], origin='lower',
           cmap=cmp,interpolation='bilinear',aspect="auto")
surf = axs[1].contour(np.reshape(Q20.to_numpy(),(126,21)), np.reshape(Q30.to_numpy(),(126,21)), np.reshape(fullen_out,(126,21)))#, cmap=mpl.cm.seismic)
axs[1].set_title("From NN")
img = axs[1].imshow(np.reshape(fullen_out,(126,21)).T, extent=[0, 250, 0, 60], origin='lower',
           cmap=cmp,interpolation='bilinear',aspect="auto")
fig.colorbar(img)

# Customize the z axis.
#ax.set_zlim(-1.01, 1.01)

#ax.zaxis.set_major_formatter('{x:.02f}')

# Add a color bar which maps values to colors.
#fig.colorbar(surf, shrink=0.5, aspect=5)
#cbar = fig.colorbar(surf)
plt.show()


### Saving our model

The nice thing about a trained neural network is that it can then be deployed relatively easily for the purposes of inference. To enable that, we'll save our model and scaler so we can use it later. If you're making a lot of networks and want to pit them against each other in a battle royale be sure to save them with reasonably informative names. Consider being systematic with your tests as well -- you may be able to draw some interesting conclusions from these investigations.

In [None]:
!mkdir -p saved_model
model.save('saved_model/PESNet.model')

# Saved model with standard test/training.val splits:
# Training error:  XXXX
# Test set error:  XXXX

!mkdir -p pickles
# save the scaler
dump(xscaler, open('pickles/xscaler.pkl', 'wb'))

model.summary()

### Trying new things!

The above methodology is really the simplest/most straightforward way to do this stuff, but there are many many other things that you can try to improve your performance globally or to test the ability of the network to extrapolate to new nuclei it has never seen before.

In your remaining time I recommend you start throwing things at the wall and see what sticks! In addition to trying different network architectures like mentioned above, you can also change what gets fed into your network during training and inference. One example would be to feed in the ground state energy for the nucleus from the liquid drop model, for instance. Will it be useful to the network? Who knows!

To test your model's performance in extrapolation, you can explicitly exclude certain nuclei's PES from the training data set. This gives you a nice set of benchmarks that should be entirely foreign to the trained model. The following cell shows you how to prepare such a dataset and you can copy it up above or write the rest below.

In [None]:
infile = open(data_path("All.dat"),'r')


PESfull = pd.read_csv(infile,delim_whitespace=True,low_memory=False)
PES = PESfull[~((PESfull['A'] == 304) & (PESfull['Z'] == 100))]
PES = PES[~((PES['A'] == 232) & (PES['Z'] == 88))]
PES = PES[~((PES['A'] == 264) & (PES['Z'] == 94))]
PES = PES[~((PES['A'] == 282) & (PES['Z'] == 94))]
PES = PES[~((PES['A'] == 360) & (PES['Z'] == 110))]
PES = PES[~((PES['A'] == 238) & (PES['Z'] == 94))]
PES = PES[~((PES['A'] == 256) & (PES['Z'] == 98))]
PES = PES[~((PES['A'] == 324) & (PES['Z'] == 108))]

A = PES['A']
Z = PES['Z']
Q20 = PES['Q20']
Q30 = PES['Q30']
E = PES['HFB_cubic']

Evaluation and plotting for one of the excluded nuclei

In [None]:
#model_msle = model 

A=256.0
Z=98.0

PES_sub = PESfull[(PESfull['A'] == A) & (PESfull['Z'] == Z)]

print("Nucleus: A =",int(A)," Z =",int(Z))

A = PES_sub['A']
Z = PES_sub['Z']
Q20 = PES_sub['Q20']
Q30 = PES_sub['Q30']
E = PES_sub['HFB_cubic']

xx = (A.to_numpy(),Z.to_numpy(),Q20.to_numpy(),Q30.to_numpy())
xx = np.asarray(xx)
xx = xx.T

xs_full = xscaler.transform(xx)

yy = np.asarray(E).reshape(-1, 1)

#model.evaluate(xs_test, ys_test, verbose=1)
pes=model.predict(xs_full)

# Shift back for plotting

x_full = xscaler.inverse_transform(xs_full)

fullen_out = pes.T.squeeze()#yscaler.inverse_transform(pes).T.squeeze()

print("Chosen nucleus error: ",sum((fullen_out - np.asarray(E))**2)/fullen_out.size)

#print(traindiff)
traindiff = trainen_test-trainen_out.T.squeeze()

t2 = traindiff**2
print("Training error: ",sum(t2)/traindiff.size)

#t2 = diff**2
#print("Test set error: ",sum(t2)/diff.size)


fig, axs = plt.subplots(1,2,figsize=(20,8), gridspec_kw={'width_ratios': [0.8, 1]})
#im = ax.imshow(np.reshape(Q20.to_numpy(),(126,21)), np.reshape(Q30.to_numpy(),(126,21)), np.reshape(E.to_numpy(),(126,21)), interpolation='bilinear', origin='lower')#,cmap=mpl.cm.seismic)

# Plot the surface.

cmp = 'coolwarm'

surf = axs[0].contour(np.reshape(Q20.to_numpy(),(126,21)), np.reshape(Q30.to_numpy(),(126,21)), np.reshape(E.to_numpy(),(126,21)))#, cmap=mpl.cm.seismic)
axs[0].set_title("From Data")
img = axs[0].imshow(np.reshape(E.to_numpy(),(126,21)).T, extent=[0, 250, 0, 60], origin='lower',
           cmap=cmp,interpolation='bilinear',aspect="auto")
surf = axs[1].contour(np.reshape(Q20.to_numpy(),(126,21)), np.reshape(Q30.to_numpy(),(126,21)), np.reshape(fullen_out,(126,21)))#, cmap=mpl.cm.seismic)
axs[1].set_title("From NN")
img = axs[1].imshow(np.reshape(fullen_out,(126,21)).T, extent=[0, 250, 0, 60], origin='lower',
           cmap=cmp,interpolation='bilinear',aspect="auto")
fig.colorbar(img)

# Customize the z axis.
#ax.set_zlim(-1.01, 1.01)

#ax.zaxis.set_major_formatter('{x:.02f}')

# Add a color bar which maps values to colors.
#fig.colorbar(surf, shrink=0.5, aspect=5)
#cbar = fig.colorbar(surf)
plt.show()
