In [3]:
#!/usr/bin/python
# coding=utf-8
# Copyright 2016-2018 Angelo Ziletti
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import

__author__ = "Angelo Ziletti"
__copyright__ = "Copyright 2016-2018, The NOMAD Project"
__maintainer__ = "Angelo Ziletti"
__email__ = "ziletti@fhi-berlin.mpg.de"
__date__ = "20/04/18"

if __name__ == "__main__":
    import sys
    import os.path

    atomic_data_dir = os.path.normpath('/home/ziletti/nomad/nomad-lab-base/analysis-tools/atomic-data')
    sys.path.insert(0, atomic_data_dir)

    from nomadml.utils.utils_binaries import get_energy_diff_by_spacegroup
    from pint import UnitRegistry
    from ase.spacegroup import get_spacegroup
    from future.utils import viewitems
    from nomadml.utils.utils_config import read_configs
    from nomadml.utils.utils_config import setup_logger
    from nomadml.utils.utils_crystals import create_supercell, substitute_atoms
    from nomadml.utils.utils_data_retrieval import read_ase_db
    from nomadml.utils.utils_crystals import convert_energy_substance
    from nomadml.utils.utils_data_retrieval import write_ase_db
    from nomadml.utils.utils_crystals import get_spacegroup_old
    from nomadml.utils.utils_parsing import nmd_uri_to_ase_atoms_tmp
    from nomadml.wrappers import calc_descriptor
    from nomadml.wrappers import load_descriptor
    import seaborn as sns
    from nomadml.wrappers import calc_model, calc_embedding, plot
    from nomadml.descriptors.atomic_features import AtomicFeatures
    from nomadml.descriptors.atomic_features import get_table_atomic_features
    from nomadml.descriptors.diffraction2d import Diffraction2D
    from nomadcore.local_meta_info import loadJsonFile, InfoKindEl
    from nomadml.wrappers import get_json_list
    from nomadml.utils.utils_parsing import read_data
    import random
    import numpy as np
    import pandas as pd

    # read config file
    config_file = '/home/ziletti/Documents/nomadml_docs/config_default.yml'
    configs = read_configs(config_file)
    logger = setup_logger(configs, level='DEBUG', display_configs=False)
    ureg = UnitRegistry(os.path.normpath(configs["others"]["ureg_file"]))

    # setup folder and files
    main_folder = '/home/ziletti/Documents/nomadml_docs/'
    tmp_folder = os.path.abspath(os.path.normpath(os.path.join(main_folder, 'tmp')))
    desc_folder = os.path.abspath(os.path.normpath(os.path.join(main_folder, 'desc_folder')))
    results_folder = os.path.abspath(os.path.normpath(os.path.join(main_folder, 'results')))
    desc_info_file = os.path.abspath(os.path.normpath(os.path.join(desc_folder, 'desc_info.json.info')))
    table_file = os.path.abspath(os.path.normpath(os.path.join(main_folder, 'atomic_features_table.csv')))
    figure_file = os.path.abspath(os.path.normpath(os.path.join(main_folder, 'atomic_features_plot.png')))
    control_file = os.path.abspath(os.path.normpath(os.path.join(main_folder, 'control.json')))
    lookup_file = os.path.abspath(os.path.normpath(os.path.join(main_folder, 'lookup.dat')))

    path_to_collection = '/home/ziletti/nomad/nomad-lab-base/analysis-tools/atomic-data/tests/ExtendedBinaries_Dimers_Atoms_new.json'

    metadata_info_path = configs['metadata']['nomad_meta_info']
    # metadata_info_path = os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(__file__)), __meta_info_path__))
    metadata_info, warns = loadJsonFile(filePath=metadata_info_path, dependencyLoader=None,
                                        extraArgsHandling=InfoKindEl.ADD_EXTRA_ARGS, uri=None)

    # kwargs = {'feature_order_by': 'atomic_mulliken_electronegativity', 'energy_unit': 'eV', 'length_unit': 'angstrom'}

    energy_unit = 'eV'
    length_unit = 'angstrom'
    kwargs = {'energy_unit': energy_unit, 'length_unit': length_unit}

    descriptor = AtomicFeatures(path_to_collection=path_to_collection, metadata_info=metadata_info,
                                materials_class='binaries', configs=configs, **kwargs)
    # descriptor = Diffraction2D(configs=configs)

    # define operations on structures
    # operations_on_structure_list = [(create_supercell,
    #                                  dict(create_replicas_by='nb_atoms', min_nb_atoms=32, target_nb_atoms=256,
    #                                       random_rotation=False, random_rotation_before=True,
    #                                       cell_type='standard_no_symmetries', optimal_supercell=False))]
    # operations_on_structure_list = [(substitute_atoms,
    #                                  dict(create_replicas_by='nb_atoms', min_nb_atoms=32, target_nb_atoms=128,
    #                                       random_rotation=False, random_rotation_before=True,
    #                                       cell_type='standard_no_symmetries', optimal_supercell=False,
    #                                       target_sub_ratio=0.5, max_n_sub_species=1))]
    # =============================================================================
    # Descriptor calculation
    # =============================================================================

    # desc_file_name = 'desc_calc_trial'

    nmd_uris_rs = [u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CudDm0on_-EHhn0SHX20l2vdbSQ1x',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/Cp4wUDDucIEdS9euDT89Y6xQA_JPq',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/Cd5Tx2nPg7dFY-jys9XwKne6OQtKX',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CyukHM_doowQLr1Ipwa8feMxPVmI2',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CHiW0XWZCN8j4FL20b8tZzv7Vz59s',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CnhDURE4i9Q5yUaSUbEmarpPFd-oP',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/Chw2RDlr8RJrjY8nb2PfCE6Bf--N0',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CxJnNtspUIcqGhneVuSJKposdVxH_',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/Cl7aTuAjyxpsJM7vLAOVHYwJm-QE6',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CvkvzEExTn8uE2HYyp39OAr0XeTVs',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/C5q6OPnbkCI9OZnxRMmigkwjECTEe',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/Cq_S1pMWXyVEwLQtS_CRQLruINQc7',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CN0Q_OXA7e5yO6EkDKkOpGHM6hyCj',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/C49a5P12dU5LYRyFCHIIWWy_T06lE',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/Ch7OdXYR4ndMpTcR0zX4mqRoBZpP9',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CAlWHa4oJtvotPEJZkbrlNC_sn0h2',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/C-R6dmrIaT8iFyy2ObACxIHGWnNOy',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CKhAdygZBTTTF8uvQjKv1RdaX-cR-',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CKu0vasdF5E6n3C3QydIjCtGOIla4',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CZpgpSkSltbUhJUCSbPqBKkIvQi8v',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CctpEiY3VdEmVYwH7UjqZpiDWZtpM',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CItWNNwGWlKJ12UjZAOfNetX3xlkd',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CrTcNbJ50u8bqAFWGjPJKqnuuEvY7',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CfFPGDGw6-cK5ARIXtsKl496E6A0G',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CeXgyF2iElVtNWSX9xZhroKK8nJJ4',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/C7kHL_6prXXdx5_MzMVmwsmStNoc0',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CaMvDXJtsbfDgbdvrFdiFSKpXNYyC',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/C9sheCSX6Gol5L-IsCvDlnmT_MEGG',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CSy2hA53Gi7wXfU9SCowqkmFWD2gp',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/C51AKUeSNYXrRBK_-uc8y1-bCfUNg',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CaqU_UGMSvVN6niB3zqMYchjsLHRX',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CcyDh6nCotXyohIHh5k1dx5L5D5X9',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CudM2fYFckG7O5R4BJqg04tK_l1bd',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/C9Yuhn2S6hqpJ0cf9E9uw5G5bJlzV',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CqmzGiDuYJ-Q8j-KfLDlQBvWb2Gjt',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CCeh79N53GyBSPmZQQJ97G0eAHaDT',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CfgDGZGQLelhbTh9ZtsKqWGFxvhJ9',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CB0yzgD_PWA0LKTKGJ8ZZuD33YEUG',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CAy8gmVjhXEQxzCJ8LOWZakyvudO8',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CxHkLq1KNzXy50hbEPelPan0cCrsH',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CWkXOKoiw0iAE585QkElUZNCYOqYI',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CgemHvDiNrY7gsuhCx0VTL8kPb7AM',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/ClIiyctCzbm5lbDOxpwEi3GbORHRD',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CGRvHpDj8bRbzvIL0c9yfOmeZjfah',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CHG50HtPjrXvkxeCITsjzFtq9N4hK',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CZuBrsUzsdX__rAeKn_JQgfX-YGoo',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CshguFKUbvULqOUN80QC-M3xQvrmJ',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CXoBZDFv4BhXvWTO29YBcGBXXu1vS',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CsmYUa8-6qr40jG7XJhUIynL1Ue8b',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/C-M1B6jU_t-kPPKkoFU9kZkEbx332',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/Cq1Tvh6kPeJ-PO77jXvOsp92PMK4P',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CfA960McVCueQzY_t-TVT0wgbZCC5',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/C-eZyP4BB8uo0pdmQIIrat2mhXQBN',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CmenrFglDQWoTWLNvVVobyI3dmkIe',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CoEQrdbxDvcCldS5_cpSOcAS57svB',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CQUYoBu1tULTvysw2jz8XwBnIeewS',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CYOw5h3ttt0tMyUPOvqDPc6yArPTy',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/Czl8jOEFAC45VXxnxMJ7_nf2xS6v2',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CmkSmszyXIzY3yIzTUnkvOCwqNFFg',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/C7JW4GQVa_xQ4YKM88F9LFzyVoXke',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CsL8fyWYvrq6V0pE3zTfoNsWAVUd1',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CbVMALFnpGdoEyabKhI_3DtbUX6W7',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CME2sPwrfVW7U0veuObWai6ryPqou',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CSzD2TkeF0Gg9fnlc1cNvyK7NL24E',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CQB49fu8BN3kua7uLKQLlT5dWdHi0',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CzkPWSKWCQ14F1io7eGkOhK7h0O_Q',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/C_18Q5thpEagvBD-4tUdeTopJwCTV',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CKmJzuNjx3TVhnGoqyqoWSnwEgjGm',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/C7oSkZrV7zs3KG6S8IZyTSE_FmwBQ',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CIwvZmuUIIrtn2HcPLoozMo73I4uz',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/C-70iigSqFlM9BO7d8xReToc2yoJL',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CS3i4SxrlnWE9AUgOmy00D3f5dMgE',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CKSpxMXqdSstTt6Es26kroYBYENnq',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/C_DFu-YobOdcOb1mfdI22vrtaSQAh',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CZQkSWOat-bIQV5IVle0tBtpUg_u-',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/Co4TFqVuLhanZRORPTA7dDA2sdbrg',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CvVLlR_Pq2Ibks3hWK2HOSJ1GgVRY',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CmmsPJ6ouZjFnoIdGfis_3AHs9clP',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CQESlzgesuFywpq09x-vZ0gikcjPf',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CKfJHS4WQGppgde2dACUjMuVoL2sB',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CcEzW227FYLGI2t3jk-gLCgcEZWZe',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/C36pL30yblwhze_vHZYZ_cybqeH4V']

    nmd_uris_zb = [u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CjGykEyzLOFynTPTNDcycF0GYg1PE',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CRzzJL7OHYeejsIvgfG1ph6BAeS_q',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CNavIaZhgwAeZM0-QhWHe_38iUgEF',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CpxTrTc6NkExiq1nzQkhd3cJ1-yRY',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CVYIDqiD6OslrGDcpUvuqvc0bD8Jr',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CiZTl1-v3bdCUDjxt-w2VxKMGW3-6',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/ComJQS9nQ4WsUIr718n6H4YbM0Fi7',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CEC0GOHh7MviqeJkG1qukjk4bALIS',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CNXyczNslCGZT642R9ZFYGvidFvua',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CpyJZtNPyNqX3ofwfNMpvJU_9PEKI',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CA3s37bS9VLUzI5wYL_ntZ6RIM6IJ',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CvEU0OOwFN7eFqiwt9m9S_SmhCJUm',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CsJUIeSLEotoIZk6R54H-G2JWPnQG',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CUT_umVDXGUamLH1R7nkazwKz95dz',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/C_rlThO8Jv0C2YIgYKLbCTM1rvfW-',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CyizrsR40QyxopYKKk2jUtl7nElXJ',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CQhb3_h4Bo9e5xjhhTUBY_8uOEtTM',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/C0tj3NYHfrit7NB0ewfG-fIjRWuJD',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/ClrW3wNN03bq-G8TLkXHpALwSTUUg',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/C3hSpXydSB6z3p79OEIOQK6llto1K',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/Ca7ltCNBQsk7Owlu0bJnsE8iY-Rmw',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CYeYlDJb7j4qJ9ol38GSM_eYJsiSe',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CX39jEfgLeDPddrkPuTvUfVv4_thl',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CWNXU92VwL7KkuoxItglRiuifcOnk',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CoWlbXJuGJ4-22DclM15L_g44LN3P',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CEYkIqgUpWfoq4Tcsy8_bFVUs9mko',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/Cst86UVhV07OfKDhwlp0PNxiMtXki',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CVc9Pn-w-6MEpu41jV4p_keLeM-Yy',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/C81vIYtJtOEx4n865B86z-KvUb6hA',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/C8BMwwn-g_0Xezs6oK3ay6ZRXIRR-',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/C8nQ1bSGP4pyRMOa4i5Uhjpb1Mord',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CCmzvvjhPSzkp_bhkAuDWK375Fs9g',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/ChaEAJ72mzGm65KpjGcnVVlFax_l7',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CvXy3VrpadhZLQAwphJE6GVB_0OUp',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/C3486jK16L4wlXG0B4v1-csMQ6oJ3',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/COL8lsjGueHjkLb9hMpptti6c8Phg',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CULp85M-UCJ52e4UaGcyeLDmUeq2Z',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/Cr6E85ezTMa4WX-GTFoms6w0Rb0hT',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/C0e8gRRxOvcJquPDa7SeYFk2OCiFS',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/Cx8VpfH0LzX99ht06ME-0EsmmrSqe',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CM3KjHYJjTA26va4uYXD8homH7pUm',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/C6GMfPCT_Fa40hXkVKpqnygEBt4PG',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/Cu9uI6ldU3ZVgwfmy-Um0D7IBxTCK',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CN0ekL4w0k3A5OXacRBScDJ9KlGMf',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/C39GmzBY478BzuXrZM-0i2Z-njyb9',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CEhAEeu8aSPA_d3_dHCjTlAi1y09j',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CVshjrYqjAg_8QtgfGW2ABnR-mlIP',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CDjDFM8OWbY0TtHCY-DBYRJmRQ8fO',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CxkyksXrwYuxnJ_wD7qN890rycd67',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CUDiBRLe2pg5Hjvd3kc_20wMbinMI',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CpikjM2BVj1atNlsbkcJzK9TkUIox',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CcYC-NeMnx_goUeYg8PmaNVo0chDc',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CQFhJGR6USZg0MrTttQvXm1IiHIdq',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CGvuYKpzVCcWnf33I4uy8fGyJVxXq',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CV--hzM8rvSS8a6LZBuuW6IPbqvY6',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CHQ9WIyu3N7whYM_ykFZunv5to3l2',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/C2_H4gO4T7T7jknE474jzrc1Y4Tjm',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CFs1SxWG79Zqj0jssdOIqUicZk9aH',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CiOIHShEKCjdganj-Sd0MkJaLglGr',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/Cn-jKNaG1IM7sKsxBh-ekfl3M3hIa',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/C_80H6vlBefw1U3rKFDPPtpJAX1GH',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CdHoVKHCES7XtBpTVk0eihbo0kqmR',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CAzsSjYMU1-CdGulNpG_KzgFlfRrK',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CVlh5JoGn6jHWlE96SKt6eRMYTIVK',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CHfNgOoPEHjzs9iOh900vIUv-GVJl',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CVTUPQyCTvrAWV0DEN_xnPgrBPmM2',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/Cws96oc5f7jIltD9Vvqc3svzL4mcW',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CLJBz0uY-AywnUhGMCXMounM-_Af3',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/Ckole11VWAOiu91qHeq6lOzIM2Y1Y',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/C2R4Ds9DFm8USF_AgHtQnWK1TkQiR',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CxrF4NRKjX9jsmVIocs7uQuLwD_cS',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CMxYGoRCMDXQWrNytWJHc-vUgRKTT',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CK3-3e-av7nkv5AOEwjZyyjkI9Hgy',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CC8N-y0PPPHeAwhkYGyYYI9H1UUHy',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CqWPF7Pn3u9LPGyrxipPfrpfm31zz',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CM_ADyGOaL4e2biSXvxQWrEDM78Z3',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CfGXdJkORwLQ-aX-d9bla7obqtnkt',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CmILc9BsSYjJ9OKH4MkPr0D4LGYGC',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/Cy-J0ezaQ_Fdsh_196hT-XgsYNQAs',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/CUJUPZHk2jrE1KVUS7H13mKBH4oVR',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/COIYfYCEIron9yzowfHWhVea-VEFW',
                   u'nmd://NWApItBGtGUDsfMVlHKqrjUQ4rShT/C_1mfRE8eDZ7zCLQwGT_3n8YC34dE']

    desc_file_name = 'atomic_features_binaries'

    # ase_atoms_list = nmd_uri_to_ase_atoms_tmp(nmd_uris_rs + nmd_uris_zb)
    #
    # ase_db_file = write_ase_db(ase_atoms_list=ase_atoms_list,
    #                            db_name='binaries_ghiringhelli2015', main_folder=main_folder,
    #                            folder_name='db_ase')
    # sys.exit()
    # ase_db_file_zb = '/home/ziletti/Documents/nomadml_docs/db_ase/zb_binaries_ghiringhelli2015.db'
    # ase_db_file_rs = '/home/ziletti/Documents/nomadml_docs/db_ase/rs_binaries_ghiringhelli2015.db'
    ase_db_file = '/home/ziletti/Documents/nomadml_docs/db_ase/binaries_ghiringhelli2015.db'

    ase_atoms_list = read_ase_db(db_path=ase_db_file)
    # ase_atoms_list = read_ase_db(db_path=ase_db_file_zb) + read_ase_db(db_path=ase_db_file_rs)

    selected_feature_list = ['atomic_ionization_potential', 'atomic_electron_affinity', 'atomic_rs_max',
                             'atomic_rp_max', 'atomic_rd_max']

    equiv_spgroups = [(225, 221), (216, 227)]
    dict_delta_e = get_energy_diff_by_spacegroup(ase_atoms_list, target='energy_total', equiv_spgroups=equiv_spgroups)

    # convert dict_delta_e in eV:
    for key, item in viewitems(dict_delta_e):
        item_converted = convert_energy_substance('J', item, ureg=ureg, energy_unit=energy_unit,
                                                  length_unit=length_unit)
        dict_delta_e[key] = item_converted

    dict_delta_e_correct = dict(SeZn=4.2159179660687287e-20, InSb=1.2506570609555687e-20, AgCl=-6.8568799955090609e-21,
                                SZn=4.4190167090138643e-20, BN=2.7430550221687302e-19, GaSb=2.4773702178144287e-20,
                                BrRb=-2.6246943074069208e-20, BaTe=-6.0143598233041078e-20, BeSe=7.9298202208796193e-20,
                                MgS=-1.3890792272791661e-20, AsB=1.4018696180162358e-19, AlAs=3.416831556361176e-20,
                                BP=1.632978757533666e-19, TeZn=3.9253535511316122e-20, MgSe=-8.8603255975880209e-21,
                                ClLi=-6.149391549293258e-21, FK=-2.3456843288794608e-20, BrLi=-5.2465217764284492e-21,
                                BSb=9.3062289002001379e-20, ClRb=-2.5715504707788539e-20, GeSn=1.3083912918128912e-20,
                                CsI=-2.6017342042091155e-20, CaTe=-5.6149286826500206e-20, ClK=-2.6349506219210773e-20,
                                Sn2=2.7179163244033424e-21, BrCs=-2.4972695386489382e-20, CsF=-1.734569615637085e-20,
                                BrCu=2.442400384019518e-20, CaSe=-5.7806170659176063e-20, AgF=-2.4634695420313482e-20,
                                MgTe=-7.3560522736479063e-22, FLi=-9.5310792412059186e-21, CuF=-2.7272687327072279e-21,
                                FNa=-2.3357835066436331e-20, C2=4.2114873809101575e-19, BaO=-1.4900011177054134e-20,
                                AgBr=-4.8118839046830307e-21, MgO=-3.721451404088126e-20, FRb=-2.1724838814450727e-20,
                                AlN=1.1687730189874494e-20, Si2=4.4727296163305501e-20, SiSn=2.1646816357014748e-20,
                                OSr=-3.5297012817210187e-20, ClNa=-2.1307665354820141e-20, AsIn=2.14767895373523e-20,
                                OZn=1.633710321777262e-20, CGe=1.3000748379902827e-19, CdO=-1.348413629348854e-20,
                                InP=2.8709930119109753e-20, SSr=-5.9029656118592692e-20, InN=2.4628706405675984e-20,
                                BaSe=-5.5025977545738119e-20, BrK=-2.6624325013472597e-20, BeTe=7.5075740576200973e-20,
                                CdS=1.1643465692630618e-20, CdTe=1.8351256432814928e-20, GeSi=4.217091895225337e-20,
                                GaP=5.5876198809236085e-20, CdSe=1.3389702567051265e-20, INa=-1.8399111846593045e-20,
                                AlP=3.5080994271602511e-20, BeO=1.1084460139894976e-19, AsGa=4.3944144343852047e-20,
                                Ge2=3.218012279776111e-20, SeSr=-6.0003270319365202e-20, CSi=1.071894196156348e-19,
                                BaS=-5.1231589897332471e-20, AgI=5.9161045280535275e-21, GaN=6.9445584247860156e-20,
                                CaS=-5.9141658617526103e-20, AlSb=2.5133142314028706e-20, IK=-2.6762621853286689e-20,
                                ILi=-3.4704646494924847e-21, ClCs=-2.4088110334584613e-20, CaO=-4.2492775596486839e-20,
                                CuI=3.2792483878850995e-20, CSn=7.2664795347721018e-20, BeS=8.1122637897805144e-20,
                                IRb=-2.6788624696652254e-20, BrNa=-2.0256115610545176e-20, SrTe=-6.0769715445352658e-20,
                                ClCu=2.5035406212316063e-20)

    desc_file_path = calc_descriptor(descriptor=descriptor, configs=configs, ase_atoms_list=ase_atoms_list,
                                     tmp_folder=tmp_folder, desc_folder=desc_folder, desc_info_file=desc_info_file,
                                     desc_file=str(desc_file_name) + '.tar.gz', format_geometry='aims',
                                     operations_on_structure=None,
                                     selected_feature_list=selected_feature_list,  # spacegroup_tuples,
                                     nb_jobs=-1)

    desc_file_path = '/home/ziletti/Documents/nomadml_docs/desc_folder/atomic_features_binaries.tar.gz'
    target_list, structure_list = load_descriptor(desc_files=desc_file_path, configs=configs)
    df_atomic_features = get_table_atomic_features(structure_list)

    # TO DO: check consistency in the order of atomic species
    df_atomic_features.drop('chemical_formula', axis=1, inplace=True)

    values = []
    for structure in structure_list:
        energy = structure.info['energy_total']
        nmd_uri = structure.info['nmd_uri']
        chem_formula = structure.get_chemical_formula(mode='hill')
        value = (energy, nmd_uri, chem_formula)
        values.append(value)

    columns = ['energy_total', 'nmd_uri', 'chemical_formula']
    df_extra_info = pd.DataFrame.from_records(values, columns=columns)
    df = pd.concat([df_atomic_features, df_extra_info], axis=1, join='inner')

    # find rows that correspond to lowest energy structures
    df = df.sort_values(by='energy_total').groupby(['chemical_formula'], as_index=False).first()
    # add target (energy difference) based on dict_delta_e
    df['target'] = df['chemical_formula'].map(dict_delta_e)

    # copy dataframe with features only to give to l1-l0 minimization
    df_features = df.copy(deep=True)
    target = np.asarray(df['target'].values.astype(float))

    if 'target' in df_features.columns.tolist():
        df_features.drop('target', axis=1, inplace=True)

    # drop columns that are not features
    cols_to_drop = ['nmd_uri', 'energy_total', 'chemical_formula']
    allowed_operations = ['+', '-', '/', '|-|', 'exp', '^2']
    max_dim = 2

    calc_model(method='l1_l0', metadata_info=metadata_info, df_features=df_features, cols_to_drop=cols_to_drop,
               target=target, max_dim=max_dim,
               allowed_operations=allowed_operations, tmp_folder=tmp_folder, results_folder=results_folder,
               lookup_file=lookup_file, control_file=control_file, energy_unit=energy_unit, length_unit=length_unit)

    sys.exit()











    # retrieve delta_e with load_descriptor using target_list
    desc_file_path = '/home/ziletti/Documents/nomadml_docs/desc_folder/atomic_features_binaries_rs.tar.gz'
    target_list, ase_atoms_list = load_descriptor(desc_files=desc_file_path, configs=configs)

    df_atomic_features = get_table_atomic_features(ase_atoms_list)

    # write table to file
    df_atomic_features.to_csv(table_file)

    # plot the table with seaborn
    df_atomic_features = df_atomic_features.set_index('chemical_formula')
    mask = df_atomic_features.isnull()
    sns.set(font_scale=0.3)
    sns_plot = sns.heatmap(df_atomic_features, annot=True, mask=mask, annot_kws={"size": 3})
    fig = sns_plot.get_figure()
    fig.tight_layout()
    fig.savefig(figure_file, dpi=200)


    logger.info("Calculation completed.")


INFO: Metadata for descriptor AtomicFeatures: [u'atomic_features_table']
INFO: Metadata for descriptor AtomicFeatures: [u'atomic_features_table']
INFO: Reading atomic collection from '/home/ziletti/nomad/nomad-lab-base/analysis-tools/atomic-data/tests/ExtendedBinaries_Dimers_Atoms_new.json'
INFO: Reading atomic collection from '/home/ziletti/nomad/nomad-lab-base/analysis-tools/atomic-data/tests/ExtendedBinaries_Dimers_Atoms_new.json'
INFO: Ordering atomic features by 'atomic_mulliken_electronegativity' of the elements
INFO: Ordering atomic features by 'atomic_mulliken_electronegativity' of the elements
INFO: Database length: 164
INFO: Database length: 164


NameError: name 'dict_delta_e' is not defined