# Summarize Results of Model Training
Find the best model for each porblem

In [1]:
%matplotlib inline
from matplotlib import pyplot as plt
from sklearn.metrics import mean_absolute_error, r2_score
from pathlib import Path
import pandas as pd
import json

Configuration

In [2]:
prop, level = 'oxidation_potential', 'mopac_pm7-acn-adiabatic'

## Load all records
The last thing our training script does is write `params.json`. Find these files then load in the assoicated data

In [3]:
complete_runs = [x.parent for x in Path('runs').glob('*/test_summary.json')]
print(f'Found {len(complete_runs)} completed runs')

Found 1 completed runs


In [4]:
def load_summary(path: Path) -> dict:
    """Load the summary of the run configuration and parameters
    
    Args:
        path: Path to the run
    Returns:
        Summary of inputs and results
    """
    
    # Load the configuration and summary
    output = {'path': path, 'starting_model': path.parent.name}
    output.update(json.loads((path / 'params.json').read_text()))
    output.update(json.loads((path / 'test_summary.json').read_text()))
    output['level_count'] = len(output['lower_levels'])
    return output

Gather results and sort by the error with no data available about the molecule (most common case in our pipelines)

In [5]:
summary = pd.DataFrame(map(load_summary, complete_runs)).sort_values('level_0_mean_absolute_error')
summary.head()

Unnamed: 0,path,starting_model,seed,data_path,learning_rate,weight_decay,model_kwargs,num_loaders,batch_size,model,...,name,level,data_hash,level_1_mean_absolute_error,level_1_r2_score,level_1_mean_squared_error,level_0_mean_absolute_error,level_0_r2_score,level_0_mean_squared_error,level_count
0,runs/model=EGNN-prop=oxidation_potential_xtb-v...,runs,12156906,../datasets/mdf-mos,0.0005,0.0,{},4,64,EGNN,...,oxidation_potential,xtb-vertical,b499f5d21c60b5930ec8b9a780050c8a,0.105681,0.864417,0.019025,0.105681,0.864417,0.019025,1


## Plot Performance of Best Model
Plot the predicted vs actual and the learning as a function of epoch

In [9]:
best_run = summary.query(f'target_property=="{prop}" and target_method=="{level}"').sort_values('level_0_mean_absolute_error').iloc[0]
best_run

IndexError: single positional indexer is out-of-bounds

Loss vs epoch

In [None]:
log_data = pd.read_csv(best_run.run_path / 'metrics.csv')

In [None]:
fig, ax = plt.subplots(figsize=(3.5, 2.))

ax.plot(log_data['epoch'] + 1, log_data['train_loss_epoch'], 'k--o')
ax.plot(log_data['epoch'] + 1, log_data['val_loss_epoch'], 'r--o')

ax.set_yscale('log')

ax.set_xlabel('Epoch')
ax.set_ylabel('Loss')

Predicted vs Actual

In [None]:
test_data = pd.read_csv(best_run.run_path / 'predictions.csv.gz')

In [None]:
fig, ax = plt.subplots(figsize=(3.5, 3.5))

ax.scatter(test_data['true'], test_data['pred'], s=5)

ax.set_xlim(ax.get_xlim())
ax.set_ylim(ax.get_xlim())

ax.plot(ax.get_xlim(), ax.get_xlim(), 'k--')

ax.set_xlabel('True (V)')
ax.set_ylabel('Pred (V)')


## Store Best Model for Each Property
Save the type and kwargs for each level

In [None]:
best_dir = Path('best_models')
best_dir.mkdir(exist_ok=True)
for (prop, level), group in runs.groupby(['target_property', 'target_method']):
    (best_dir / f'{prop}-{level}.json').write_text(
        json.dumps(group.drop(columns=['run_path']).sort_values('r2').iloc[-1].to_dict(), indent=2)
    )