# Analyze Models

In [1]:
def process(training_prop, model, experiment, path_to_ml_data):
    """
    Args:
        training_prop (str) - 'Ef' if models trained on formation energies; 'Ed' if decomposition energies
        model (str) - ML model
        experiment (str) - 'allMP', 'LiMnTMO', or 'smact'
        path_to_ml_data (os.PathLike) - path to ml_data directory in .../TestStabilityML/mlstabilitytest/ml_data
    
    Returns:
        Runs all relevant analyses
        Prints a summary
    """
    if (model == 'CGCNN') and (experiment == 'smact'):
        print('CGCNN cannot be applied directly to the SMACT problem because the structures are not known')
        return
    if ('random' in experiment) and (training_prop == 'Ed'):
        print('Random perturbations only apply to models trained on Ef as written')
        return
    if (experiment == 'classifier') and (training_prop == 'Ef'):
        print('Classifier experiment only applies to training on Ed')
        return
    if (model == 'CGCNN') and (training_prop == 'Ed'):
        print('CGCNN not trained on Ed')
        return
    data_dir = os.path.join(path_to_ml_data, training_prop, experiment, model)
    if not os.path.exists(data_dir):
        os.mkdir(data_dir)
        
    data_file = 'ml_input.json'
    finput = os.path.join(data_dir, data_file)
    if 'random' in experiment:
        src = finput.replace(experiment, 'allMP')
        copyfile(src, finput)
    if not os.path.exists(finput):
        print('missing data for %s-%s' % (model, experiment))
        return
    if training_prop == 'Ef':
        nprocs = 'all'
        obj = StabilityAnalysis(data_dir,
                                data_file,
                                experiment,
                                nprocs=nprocs)
    elif training_prop == 'Ed':
        obj = EdAnalysis(data_dir,
                         data_file,
                         experiment)
    else:
        raise NotImplementedError
    obj.results_summary
    print('got results')
    return

In [2]:
import os
from mlstabilitytest.stability.StabilityAnalysis import StabilityAnalysis, EdAnalysis
from shutil import copyfile

here = './mlstabilitytest/'

models = ['ElFrac', 'Meredig', 'Magpie', 'AutoMat', 'ElemNet', 'Roost', 
              'CGCNN']
experiments = ['LiMnTMO', 'allMP', 'smact',
               'random1', 'random2', 'random3',
               'classifier']
training_props = ['Ef', 'Ed']
path_to_ml_data = os.path.join(here, 'ml_data')

for training_prop in training_props:
    print('\n____ models trained on %s ____\n' % training_prop)
    for experiment in experiments:
        print('\n ~~~ %s ~~~\n' % experiment)
        experiment_dir = os.path.join(path_to_ml_data, training_prop, experiment)
        if (('random' not in experiment) and (training_prop == 'Ed')) or ((experiment != 'classifier') and (training_prop == 'Ef')):
            if not os.path.exists(experiment_dir):
                os.mkdir(experiment_dir)
        for model in models:
            print('\n %s ' % model)
            process(training_prop, model, experiment, path_to_ml_data)
            


____ models trained on Ef ____


 ~~~ LiMnTMO ~~~


 ElFrac 

Checking input data...
Data looks good.
Time elapsed = 0 s.

Reading existing results file: ./mlstabilitytest/ml_data/Ef/LiMnTMO/ElFrac/ml_results.json

Summarizing performance...

MAE on formation enthalpy = 0.099 eV/atom
MAE on decomposition enthalpy = 0.064 eV/atom

Classifying stable or unstable:
Precision = 0.062
Recall = 0.222
Accuracy = 0.861
F1 = 0.098
FPR = 0.116

Confusion matrix:
TP | FP
FN | TN = 
2 | 30
7 | 228

Time elapsed = 0 s
got results

 Meredig 

Checking input data...
Data looks good.
Time elapsed = 0 s.

Reading existing results file: ./mlstabilitytest/ml_data/Ef/LiMnTMO/Meredig/ml_results.json

Summarizing performance...

MAE on formation enthalpy = 0.098 eV/atom
MAE on decomposition enthalpy = 0.065 eV/atom

Classifying stable or unstable:
Precision = 0.062
Recall = 0.222
Accuracy = 0.861
F1 = 0.098
FPR = 0.116

Confusion matrix:
TP | FP
FN | TN = 
2 | 30
7 | 228

Time elapsed = 0 s
got results

 Ma

KeyboardInterrupt: 

# Train a Models

In [3]:
from os.path import dirname, abspath, join
from sys import argv
import json
from mlstabilitytest.training.process import problem_dictionary, target_list, model_dictionary

base_path = './'
output_base_path = join(base_path, "mlstabilitytest", "ml_data")

"""
Train models on the MP data, and make predictions in a format suitable for hull analysis

"""
try:
    problem = argv[1]
    target = argv[2]
    model_name = argv[3]
except IndexError:
    print("Arguments should be Problem Target Model")
    exit(1)

try:
    train_func = problem_dictionary[problem]
except KeyError:
    print("Invalid problem selection {}. Valid choices are {}".format(problem,
                                                                      ", ".join(problem_dictionary.keys())))
    exit(1)
try:
    if not target in target_list:
        raise ValueError
except ValueError:
    print("Invalid target selection. Valid choices are {}".format(
        ", ".join(target_list)))
    exit(1)

try:
    model = model_dictionary[model_name](target)
except KeyError:
    print("Invalid model selection. Valid choices are {}".format(
        ", ".join(model_dictionary.keys())))
    exit(1)

output_file = join(output_base_path, target, problem,
                   model_name, 'ml_input.json')

print("Training {} to predict {} using the {} dataset".format(
    model_name, target, problem))

predictions = train_func(model, target)

print("Training complete, saving predictions to {}".format(output_file))
with open(output_file, 'w') as f:
    json.dump(predictions, f)

Arguments should be Problem Target Model
Invalid problem selection -f. Valid choices are allMP, LiMnTMO, smact
Invalid target selection. Valid choices are Ed, Ef


NameError: name 'model_name' is not defined

# Testing script for decomposition only

In [5]:
import os
from mlstabilitytest.stability.StabilityAnalysis import EdAnalysis
from shutil import copyfile

here = './mlstabilitytest/'

# models = ['ElFrac', 'Meredig', 'Magpie', 'AutoMat', 'ElemNet', 'Roost', 
#               'CGCNN']
model = 'Magpie'

experiment = 'LiMnTMO'
training_prop = 'Ed'
path_to_ml_data = os.path.join(here, 'ml_data')


data_dir = os.path.join(path_to_ml_data, training_prop, experiment, model)
data_file = 'ml_input.json'

obj = EdAnalysis(data_dir, data_file, experiment)

obj.results_summary


Checking input data...
Data looks good.
Time elapsed = 0 s.

Reading existing results file: ./mlstabilitytest/ml_data/Ed/LiMnTMO/Magpie/ml_results.json

Summarizing performance...

MAE on decomposition enthalpy = 0.027 eV/atom


Classifying stable or unstable:
Precision = 0.000
Recall = 0.000
Accuracy = 0.966
F1 = 0.000
FPR = 0.000

Confusion matrix:
TP | FP
FN | TN = 
0 | 0
9 | 258

Time elapsed = 0 s
