## Auto Keras

In [None]:
import os

import tensorflow as tf
import autokeras as ak
import sklearn

In [None]:
from sklearn.model_selection import train_test_split, GridSearchCV

file_save_h5 = 'SAVED H5PY FILE'

with h5py.File(str(file_save_h5), "r") as hf:
    X = hf["embeddings"][:]
    y = hf["labels"][:]

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y, 
    train_size=0.9,
    random_state=22,
)

# separate into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

In [None]:
input_layer = ak.Input()
rnn_layer = ak.RNNBlock(layer_type="lstm")(input_layer)
dense_layer = ak.DenseBlock()(rnn_layer)
output_layer = ak.RegressionHead()(dense_layer)

automodel = ak.AutoModel(input_layer, output_layer, project_name='autokeras_lstm', overwrite=True)
automodel.fit(X_train, y_train, validation_split=0.1, epochs=100, batch_size=32)

In [None]:
# define the search
search = ak.StructuredDataRegressor(project_name='property_X', max_trials=100, loss='mean_squared_error')
# perform the search
search.fit(x=X_train, y=y_train, verbose=1, validation_split=0.1, epochs=200, batch_size=32)

In [None]:
# evaluate the model
mae, _ = search.evaluate(X_test, y_test, verbose=0)
print('MSE: %.3f' % mae)

In [None]:
train_predictions = search.predict(X_train)
print("Train R2 score:", sklearn.metrics.r2_score(y_train, train_predictions))
test_predictions = search.predict(X_test)
print("Test R2 score:", sklearn.metrics.r2_score(y_test, test_predictions))

In [None]:
train_predictions_list = []

for t in train_predictions:
    train_predictions_list.append(t[0])
    
test_predictions_list = []

for t in test_predictions:
    test_predictions_list.append(t[0])

In [None]:
corr_train = np.corrcoef(train_predictions_list, y_train)
print('Pearsons correlation for training set: %s' % corr_train)
corr_test = np.corrcoef(test_predictions_list, y_test)
print('Pearsons correlation for training set: %s' % corr_test)

In [None]:
plt.scatter(train_predictions, y_train, label="Train", c='#00BDE3')
plt.scatter(test_predictions, y_test, label="Test", c='#b734eb')
plt.xlabel("Predicted property")
plt.ylabel("True property")
plt.legend()
#plt.plot([1.5, 5.0], [0.5, 6.0], c='k', zorder=0)

#plt.xlim([2.6, 4.0])
#plt.ylim([0, 6.0])
plt.tight_layout()
plt.savefig('property_autoKeras_best_model_100epochs_100trials_ESM_MSE.png', dpi=300)
plt.show()

## Export and save model

In [None]:
model = search.export_model()
model.summary()

In [None]:
try:
    model.save("property_autoKeras_best_model_100epochs_100trials_ESM_MSE", save_format="tf")
except Exception:
    model.save("property_autoKeras_best_model_100epochs_100trials_ESM_MSE.h5")

In [None]:
from tensorflow.keras.models import load_model

loaded_model = load_model("property_autoKeras_best_model_100epochs_100trials_ESM_MSE", custom_objects=ak.CUSTOM_OBJECTS)

In [None]:
train_predictions = loaded_model.predict(X_train)
print("Train R2 score:", sklearn.metrics.r2_score(y_train, train_predictions))
test_predictions = loaded_model.predict(X_test)
print("Test R2 score:", sklearn.metrics.r2_score(y_test, test_predictions))