In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import neuralEOS
from neuralEOS.output import plot, readfiles
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
params = neuralEOS.Parameters()

First we are going to analyse correlations between different features and the reference

In [None]:
df_aa_p = pd.read_csv(params.pressure_path + "pressure_aa_v6_train_1.csv")
df_aa_p = df_aa_p.drop(["Unnamed: 0", "Unnamed: 0.1", "P_class", "P_aa_fd_i", "P_aa_st_rr_i",
                       "P_aa_st_tr_i", "P_aa_ideal_i", "P_aa_vir_corr_i", "P_aa_vir_nocorr_i"], axis=1)
df_aa_p.head()

First let's print the feature correlations. This gives Table 1 in the paper.

In [None]:
correlations = df_aa_p.corr(method="kendall")
feature_importances = np.abs(correlations.P_ref).sort_values(ascending=False)[1:]
print(feature_importances)

Now we plot Fig. 1, which shows some example correlations of features to the target.

In [None]:
plot.plot_correlations(df_aa_p.P_ref, df_aa_p.dv_dR, df_aa_p.MIS, latex=True, setsize=True, savefig=True, figname=params.fig_path+"corr_plot_08_09.png")

Next, we plot the percentage errors of the neural network models for the inner CV loop, as a function of the number of features (Fig. 2), and summarize the results in Table III.

In [None]:
cv_summary_list = []
for i in range(1,6):
    cv_summary_list.append(params.intermediate_data_path + "inner_cv_output/aa_14_09_cv_iter_"+str(i)+"/cv_summary.txt")
df_cv_summary = readfiles.load_inner_cv(cv_summary_list, sep=", ")

In [None]:
min_features = int(df_cv_summary.n_features.min())
max_features = int(df_cv_summary.n_features.max() + 1)
feature_range = max_features - min_features
x_features = [i for i in range(min_features, max_features)]
avg_scores = np.zeros((2, feature_range))
for i in range(min_features, max_features):
    df_tmp = df_cv_summary[df_cv_summary.n_features == i]
    avg_scores[0, i - min_features] = df_tmp.MAPE.mean()
    avg_scores[1, i - min_features] = df_tmp.MAE.mean()
for i in range(min_features, max_features):
    print(i, round(avg_scores[0, i - min_features], 3), round(avg_scores[1, i - min_features], 5))

In [None]:
plot.plot_feature_errs(df_cv_summary,latex=True,setsize=True, savefig=True, figname=params.main_path + "figs/features_err.png")

Next, we plot the percentage errors as a function of temperature and density, for the raw average atom data (Fig. 3).

In [None]:
df_aa = pd.read_csv("/home/callow46/neuralEOS/data/processed/raw_pressure/pressure_aa_v6.csv")
plot.plot_error_x_temp_aa(df_aa, size='preprint', pretty=True, save=True, filename=params.fig_path + "error_temp_aa_v6.png")

Next up, we plot the lowest temperature pressures against the reference (Fig. 4).

In [None]:
df_aa = df_aa.sort_values(by=["temp", "rho"], ascending=True)
plot.plot_low_T_errs(df_aa, save=True, filename=params.fig_path + "He_low_T_errs.png")

Now we plot the pressures and their errors on a logarithmic scale (Fig. 5).

In [None]:
plot.plot_log_error_aa(df_aa, pretty=True, save=True, filename=params.fig_path+"aa_log_big_v6.png")

Here we plot the AA data points that atoMEC wasn't able to calculate.

In [None]:
plot.plot_missing_aa(params.pressure_path + "pressure_elements.csv", params.pressure_path + "pressure_aa_v6.csv", save=True, filename=params.fig_path + "aa_missing.png")

Now we make Table 5, which is the aggregate error metrics for the AA results over the full temperature range.

In [None]:
tester = neuralEOS.Testing(params)
aa_csv_list = [params.pressure_path + "pressure_aa_v6.csv"]
for ptype in [
    "P_aa_fd",
    "P_aa_st_tr",
    "P_aa_st_rr",
    "P_aa_vir_corr",
    "P_aa_vir_nocorr",
    "P_aa_ideal",
]:
    if ptype != "P_pred":
        results = tester.evaluate(aa_csv_list, ptype, P_ion="P_ion")
    else:
        results = tester.evaluate(aa_csv_list, ptype)
    print("Results for " + ptype)
    print(results)

Next we make Table 6, showing the average error metrics for the AA results, when only temperatures above 10 eV are included.

In [None]:
tester = neuralEOS.Testing(params)
aa_csv_list = [params.pressure_path + "pressure_aa_v6.csv"]
for ptype in [
    "P_aa_fd",
    "P_aa_st_tr",
    "P_aa_st_rr",
    "P_aa_vir_corr",
    "P_aa_vir_nocorr",
    "P_aa_ideal",
]:
    if ptype != "P_pred":
        results = tester.evaluate(aa_csv_list, ptype, P_ion="P_ion", temp_thresh=10)
    else:
        results = tester.evaluate(aa_csv_list, ptype)
    print("Results for " + ptype)
    print(results)

Next comes the neural network results for the FPEOS database. We first have to load the AA and AA free network predictions.

In [None]:
# the aa results
filelist_nn_aa = []
for i in range(1,6):
    filelist_nn_aa.append(params.pressure_path+"pressure_aa_v6_test_"+str(i)+"_pred.csv")
df_aa = readfiles.load_inner_cv(filelist_nn_aa, sep=",")

# the aa free results
filelist_nn_no_aa = []
for i in range(1,6):
    filelist_nn_no_aa.append(params.pressure_path+"pressure_v6_test_"+str(i)+"_pred.csv")
df_no_aa = readfiles.load_inner_cv(filelist_nn_no_aa, sep=",")

Plot the mean absolute percentage errors as a function of temperature and density (Fig. 7)

In [None]:
plot.plot_error_x_temp_nn(df_aa, df_no_aa, pretty=True, save=True, filename=params.fig_path+"error_temp_nn.png")

Now we plot the logarithmic errors (Fig. 8)

In [None]:
plot.plot_log_error_nn(df_aa, df_no_aa, save=True, filename=params.fig_path + "nn_log_errs.png")

Next, the comparison between the AA and neural network results (Fig 9)

In [None]:
plot.plot_nn_aa_errs(df_aa, df_no_aa, pretty=True, save=True, filename=params.fig_path + "aa_nn_errs_comp.png")

Next, we create Table 8, which compares the aggregate error metrics for the AA and AA-free neural network models, as well as the finite-difference raw AA pressure.

In [None]:
tester = neuralEOS.Testing(params)

for ptype in [
    "P_aa_fd",
    "P_pred",
]:
    if ptype != "P_pred":
        results = tester.evaluate(filelist_nn_aa, ptype, P_ion="P_ion")
    else:
        results = tester.evaluate(filelist_nn_aa, ptype)
    print("Results for " + ptype)
    print(results)

In [None]:
tester = neuralEOS.Testing(params)

for ptype in [
    "P_pred",
]:
    if ptype != "P_pred":
        results = tester.evaluate(filelist_nn_no_aa, ptype, P_ion="P_ion")
    else:
        results = tester.evaluate(filelist_nn_no_aa, ptype)
    print("Results for " + ptype)
    print(results)

In the following, we show the results for the FP-Be database, starting with the raw AA results. First, we compare the distribution in the density / temperature space of the Beryllium results (Fig. 10).

In [None]:
df_Be = pd.read_csv(params.pressure_path+"Be_aa_pressure_v2.csv")
plot.plot_dist_comps(df_aa, df_Be, save=True, filename="/home/callow46/neuralEOS/figs/Be_FPEOS_dist_comp.png")

Now, we compare the MAPEs for the different AA methods (Fig. 11).

In [None]:
plot.plot_error_x_temp_aa(df_Be, size='preprint', pretty=True, save=True, filename=params.fig_path + "error_temp_aa_Be.png")

Next, we show the MAPEs for the AA and AA-free neural network results.

In [None]:
df_Be_nn_aa = pd.read_csv(params.pressure_path + "Be_aa_pressure_v2_pred.csv")
df_Be_nn_no_aa = pd.read_csv(params.pressure_path + "Be_pressure_pred_v2.csv")
plot.plot_error_x_temp_nn(df_Be_nn_aa, df_Be_nn_no_aa,pretty=True, save=True, filename = "/home/callow46/neuralEOS/figs/error_temp_nn_Be.png")

We now compare both network predictions with the (uncorrected) virial AA predictions.

In [None]:
# waiting for final Be nn no AA results
plot.plot_log_pressure_nn_aa(df_Be.P_ref, df_Be_nn_aa.P_pred, df_Be_nn_no_aa.P_pred, df_Be.P_aa_vir_nocorr+df_Be.P_ion, save=True, filename="/home/callow46/neuralEOS/figs/Be_log_nn_aa.png")

In [None]:
plot.plot_nn_aa_errs(df_Be_nn_aa, df_Be_nn_no_aa,pretty=True, save=True, filename="/home/callow46/neuralEOS/figs/aa_nn_errs_comp_Be.png")

In [None]:
tester = neuralEOS.Testing(params)
aa_csv_list = [params.pressure_path + "Be_aa_pressure_v2_pred.csv"]
for ptype in [
    "P_aa_fd",
    "P_aa_st_rr",    
    "P_aa_st_tr",
    "P_aa_vir_nocorr",
    "P_aa_vir_corr",
    "P_aa_ideal",
    "P_pred",
]:
    if ptype != "P_pred":
        results = tester.evaluate(aa_csv_list, ptype, P_ion="P_ion")
    else:
        results = tester.evaluate(aa_csv_list, ptype)
    print("Results for " + ptype)
    print(results)

In [None]:
tester = neuralEOS.Testing(params)
aa_csv_list = [params.pressure_path + "Be_pressure_pred_v2.csv"]
for ptype in [
    "P_pred"
]:
    if ptype != "P_pred":
        results = tester.evaluate(aa_csv_list, ptype, P_ion="P_ion")
    else:
        results = tester.evaluate(aa_csv_list, ptype)
    print("Results for " + ptype)
    print(results)

In [None]:
tester = neuralEOS.Testing(params)
aa_csv_list = [params.pressure_path + "Be_aa_pressure_v2_pred.csv"]
for ptype in [
    "P_aa_fd",
    "P_aa_st_rr",    
    "P_aa_st_tr",
    "P_aa_vir_nocorr",    
    "P_aa_vir_corr",
    "P_aa_ideal",
    "P_pred",
]:
    if ptype != "P_pred":
        results = tester.evaluate(aa_csv_list, ptype, P_ion="P_ion", temp_thresh=10)
    else:
        results = tester.evaluate(aa_csv_list, ptype, temp_thresh=10)
    print("Results for " + ptype)
    print(results)

In [None]:
tester = neuralEOS.Testing(params)
aa_csv_list = [params.pressure_path + "Be_pressure_pred_v2.csv"]
for ptype in [
    "P_pred"
]:
    if ptype != "P_pred":
        results = tester.evaluate(aa_csv_list, ptype, P_ion="P_ion", temp_thresh=10)
    else:
        results = tester.evaluate(aa_csv_list, ptype, temp_thresh=10)
    print("Results for " + ptype)
    print(results)

We analyze the timings for the AA method

In [None]:
df_timings = pd.read_csv(params.processed_data_path + "aa_timings.csv")

In [None]:
# get the median time
df_timings.scf_time.quantile([0.25, 0.5, 0.75])

In [None]:
plot.plot_timings(df_timings, pretty=True, save=True, filename=params.fig_path + "AA_timings.png")

Here is the analysis of the FPEOS data. This will go in an appendix most likely.

In [None]:
# analysis of FPEOS data
tester = neuralEOS.Testing(params)
fpeos_csv_list_x = []
for i in range(5):
    fpeos_csv_list_x.append(params.pressure_path + "FPEOS_interp_x_"+str(i)+".csv")
for ptype in [
    "P_pred"
]:
    if ptype != "P_pred":
        results = tester.evaluate(fpeos_csv_list_x, ptype)
    else:
        results = tester.evaluate(fpeos_csv_list_x, ptype)
    print("Results for " + ptype)
    print(results)

In [None]:
# analysis of FPEOS data
tester = neuralEOS.Testing(params)
fpeos_csv_list_y = []
for i in range(5):
    fpeos_csv_list_y.append(params.pressure_path + "FPEOS_interp_y_"+str(i)+".csv")
for ptype in [
    "P_pred"
]:
    if ptype != "P_pred":
        results = tester.evaluate(fpeos_csv_list_y, ptype)
    else:
        results = tester.evaluate(fpeos_csv_list_y, ptype)
    print("Results for " + ptype)
    print(results)

In [None]:
df_fpeos_x = readfiles.load_inner_cv(fpeos_csv_list_x, sep=",")
df_fpeos_y = readfiles.load_inner_cv(fpeos_csv_list_y, sep=",")

In [None]:
plot.plot_fpeos_errs(df_fpeos_x, df_fpeos_y, pretty=True, save=True, filename=params.fig_path + "FPEOS_interp_plot.png")