In [1]:
from gibbsml.ellingham.fingerprint import Fingerprint
from catlearn.regression import GaussianProcess
import numpy as np
import matplotlib.pyplot as plt

USER_API_KEY = ''  # Type your own Materials Project API key. See https://materialsproject.org/open

# Set up model parameters.



In [2]:
reg = 1e-3  # Regularization parameter
reg_bounds = (1e-5, 1e-1)
kernel = [
          {'type': 'gaussian', 'width': 1., 'scaling': 1.},
          {'type': 'linear', 'scaling': 1., 'constant': 1.},
          ]

# Load training set.

In [3]:

fp = Fingerprint(USER_API_KEY=USER_API_KEY)
fp.load_set(filename='../gibbsml/ellingham/trainingset_ellingham_08June2020.json')

# Get training data and target features (train_y).
train_x = list(fp.get_features_values())
train_y = list(fp.get_target_features_values(target_feature='dS0_expt'))
train_labels = fp.get_labels()


# Run one-leave-out.

In [4]:

total_predictions = []
total_uncertainties = []

for n_seed in range(0, len(train_x)):
    test_x = []
    test_y = []
    test_labels = []

    train_x_loop = train_x.copy()
    train_y_loop = train_y.copy()
    train_labels_loop = train_labels.copy()

    test_x.append(train_x_loop.pop(n_seed))
    test_y.append(train_y_loop.pop(n_seed))
    test_labels.append(train_labels_loop.pop(n_seed))

    # Reshape.
    shape_train_x_loop = np.shape(train_x_loop)
    train_x_loop = np.reshape(train_x_loop, shape_train_x_loop)
    train_y_loop = np.array(train_y_loop)

    shape_test_x = np.shape(test_x)
    test_x = np.reshape(test_x, shape_test_x)
    test_y = np.array(test_y)

    # Train the GP model.
    gp = GaussianProcess(kernel_list=kernel, regularization=reg,
                         regularization_bounds=reg_bounds,
                         train_fp=train_x_loop, train_target=train_y_loop,
                         optimize_hyperparameters=False,
                         scale_data=True)
    gp.optimize_hyperparameters(global_opt=False,
                                #algomin='L-BFGS-B',
                                algomin='TNC',
                                eval_jac=True)

    # Get the predictions.
    prediction = gp.predict(test_fp=test_x, uncertainty=True)
    pred = prediction['prediction']
    unc = prediction['uncertainty'] * 2

    total_predictions.append(pred[0][0])
    total_uncertainties.append(unc[0])

    print('Species:', train_labels[n_seed])
    print('Experimental slope:', train_y[n_seed])
    print('Predicted slope:', pred)
    print('Error exp. vs calc. (%)',
          ((train_y[n_seed] - pred[0][0]) / train_y[n_seed]) * 100)


Species: Mg2SiO4_mp-2895
Experimental slope: [0.22336061]
Predicted slope: [[0.21523936]]
Error exp. vs calc. (%) [3.63593797]
Species: MgTiO3_mp-3771
Experimental slope: [0.21723006]
Predicted slope: [[0.20012113]]
Error exp. vs calc. (%) [7.87594892]
Species: MgTi2O5_mp-28232
Experimental slope: [0.197454]
Predicted slope: [[0.19554007]]
Error exp. vs calc. (%) [0.96930751]
Species: MgAl2O4_mp-3536
Experimental slope: [0.2265789]
Predicted slope: [[0.23134625]]
Error exp. vs calc. (%) [-2.10405965]
Species: LiAlO2_mp-3427
Experimental slope: [0.22856241]
Predicted slope: [[0.22096077]]
Error exp. vs calc. (%) [3.32584682]
Species: NaAlO2_mp-9212
Experimental slope: [0.22164659]
Predicted slope: [[0.22882645]]
Error exp. vs calc. (%) [-3.23932555]
Species: Al2SiO5_mp-4753
Experimental slope: [0.18705777]
Predicted slope: [[0.18989162]]
Error exp. vs calc. (%) [-1.51495641]
Species: Li2SiO3_mp-5012
Experimental slope: [0.20627495]
Predicted slope: [[0.21082446]]
Error exp. vs calc. (%)

# Plots

In [None]:
plt.figure(figsize=(12,12))
# Fit values to line:
x = np.linspace(0.10, 0.3, 100)
plt.plot(x, x, color='red', linestyle='--')

from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score
regr = linear_model.LinearRegression()

reg_train_x = train_y
shape_reg_train_x = np.shape(reg_train_x)
reg_train_x = np.reshape(reg_train_x, shape_reg_train_x)


reg_train_y = [total_predictions]
reg_train_y = np.reshape(reg_train_y, -1)

# Fit Model:
regr.fit(reg_train_x, reg_train_y)

linear_fit_line = []
for x_test in x:
    x_test = np.reshape(x_test, (-1, 1))
    linear_fit_line.append(regr.predict(x_test))

# Get coefficients:
print('Coefficients: \n', regr.coef_)
plt.plot(x, linear_fit_line, color='blue', linewidth=1)


mse_of_process = np.sqrt(mean_squared_error(x, linear_fit_line))
print('Mean squared error: %.4f'
      % np.sqrt(mean_squared_error(x, linear_fit_line)))

from sklearn.metrics import mean_absolute_error
y_true = train_y
y_pred = total_predictions
mae_of_process = mean_absolute_error(y_true, y_pred)

print('MAE:', mae_of_process)

# Plot predicted values:
for i in range(0, len(total_predictions)):
    x = train_y[i][0]
    y = total_predictions[i]
    u = total_uncertainties[i]
    plt.scatter(x, y, label=train_labels[i], s=100)
    plt.errorbar(x=x, y=y, yerr=u, color='black')
    plt.errorbar(x=x, y=y, yerr=mae_of_process, color='red')
    plt.annotate(train_labels[i], (x + 0.001, y + 0.001))

# Plot general tags:
plt.xlabel('Experimental slopes')
plt.ylabel('Predicted slopes')
# plt.xlim(0, 3000)
# plt.ylim(0, 3000)
plt.show()


Coefficients: 
 [0.88677551]
Mean squared error: 0.0066
MAE: 0.01012411214376011
