# Spacegroup determination: comparing spglib vs neural network based on the diffraction intensity in spherical harmonics (DISH) descriptor

Author: Angelo Ziletti (angelo.ziletti@gmail.com; ziletti@fhi-berlin.mpg.de)

In [40]:
from ai4materials.dataprocessing.preprocessing import load_dataset_from_file
from ai4materials.wrappers import load_descriptor
from ai4materials.utils.utils_config import set_configs
from ai4materials.utils.utils_config import setup_logger
from ai4materials.dataprocessing.preprocessing import load_dataset_from_file
from ai4materials.dataprocessing.preprocessing import prepare_dataset
from ase.spacegroup import get_spacegroup as ase_get_spacegroup
from collections import Counter
import itertools
import matplotlib.pyplot as plt
import numpy as np
import os
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
%matplotlib inline  

main_folder = '/home/ziletti/Documents/calc_nomadml/rot_inv_3d/'
dataset_folder = os.path.abspath(os.path.normpath(os.path.join(main_folder, 'datasets')))
desc_folder = os.path.abspath(os.path.normpath(os.path.join(main_folder, 'desc_folder')))

configs = set_configs(main_folder=main_folder)
logger = setup_logger(configs, level='INFO', display_configs=False)
configs['io']['dataset_folder'] = dataset_folder
configs['io']['desc_folder'] = desc_folder

## Pristine

In [41]:
# hcp - spacegroup 194
filenames_pristine_hcp = ['hcp/pristine/A_hP2_194_c_target_nb_atoms128_rotid0_pristine.tar.gz',
                        'hcp/pristine/A_hP2_194_c_target_nb_atoms128_rotid1_pristine.tar.gz',
                        'hcp/pristine/A_hP2_194_c_target_nb_atoms128_rotid2_pristine.tar.gz',
                        'hcp/pristine/A_hP2_194_c_target_nb_atoms128_rotid3_pristine.tar.gz',
                        'hcp/pristine/A_hP2_194_c_target_nb_atoms128_rotid4_pristine.tar.gz']
# sc - spacegroup 221
filenames_pristine_sc = ['sc/pristine/A_cP1_221_a_target_nb_atoms128_rotid0_pristine.tar.gz',
                       'sc/pristine/A_cP1_221_a_target_nb_atoms128_rotid1_pristine.tar.gz',
                       'sc/pristine/A_cP1_221_a_target_nb_atoms128_rotid2_pristine.tar.gz',
                       'sc/pristine/A_cP1_221_a_target_nb_atoms128_rotid3_pristine.tar.gz',
                       'sc/pristine/A_cP1_221_a_target_nb_atoms128_rotid4_pristine.tar.gz']

# fcc - spacegroup 225
filenames_pristine_fcc = ['fcc/pristine/A_cF4_225_a_target_nb_atoms128_rotid0_pristine.tar.gz',
                         'fcc/pristine/A_cF4_225_a_target_nb_atoms128_rotid1_pristine.tar.gz',
                         'fcc/pristine/A_cF4_225_a_target_nb_atoms128_rotid2_pristine.tar.gz',
                         'fcc/pristine/A_cF4_225_a_target_nb_atoms128_rotid3_pristine.tar.gz',
                         'fcc/pristine/A_cF4_225_a_target_nb_atoms128_rotid4_pristine.tar.gz']

# diam - spacegroup 227
filenames_pristine_diam = ['diam/pristine/A_cF8_227_a_target_nb_atoms128_rotid0_pristine.tar.gz',
                          'diam/pristine/A_cF8_227_a_target_nb_atoms128_rotid1_pristine.tar.gz',
                          'diam/pristine/A_cF8_227_a_target_nb_atoms128_rotid2_pristine.tar.gz',
                          'diam/pristine/A_cF8_227_a_target_nb_atoms128_rotid3_pristine.tar.gz',
                          'diam/pristine/A_cF8_227_a_target_nb_atoms128_rotid4_pristine.tar.gz']
# bcc - spacegroup 229
filenames_pristine_bcc = ['bcc/pristine/A_cI2_229_a_target_nb_atoms128_rotid0_pristine.tar.gz',
                         'bcc/pristine/A_cI2_229_a_target_nb_atoms128_rotid1_pristine.tar.gz',
                         'bcc/pristine/A_cI2_229_a_target_nb_atoms128_rotid2_pristine.tar.gz',
                         'bcc/pristine/A_cI2_229_a_target_nb_atoms128_rotid3_pristine.tar.gz',
                         'bcc/pristine/A_cI2_229_a_target_nb_atoms128_rotid4_pristine.tar.gz']
                         
                         
desc_files_pristine_hcp = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_pristine_hcp]
desc_files_pristine_sc = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_pristine_sc]
desc_files_pristine_fcc = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_pristine_fcc]
desc_files_pristine_diam = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_pristine_diam]
desc_files_pristine_bcc = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_pristine_bcc]

y_true = []
target_list_hcp_pristine, structure_list_hcp_pristine = load_descriptor(desc_files=desc_files_pristine_hcp, configs=configs)
y_true = y_true + [194]*len(structure_list_hcp_pristine)

target_list_sc_pristine, structure_list_sc_pristine = load_descriptor(desc_files=desc_files_pristine_sc, configs=configs)
y_true = y_true + [221]*len(structure_list_sc_pristine)

target_list_fcc_pristine, structure_list_fcc_pristine = load_descriptor(desc_files=desc_files_pristine_fcc, configs=configs)
y_true = y_true + [225]*len(structure_list_fcc_pristine)

target_list_diam_pristine, structure_list_diam_pristine = load_descriptor(desc_files=desc_files_pristine_diam, configs=configs)
y_true = y_true + [227]*len(structure_list_diam_pristine)

target_list_bcc_pristine, structure_list_bcc_pristine = load_descriptor(desc_files=desc_files_pristine_bcc, configs=configs)
y_true = y_true + [229]*len(structure_list_bcc_pristine)

INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/pristine/A_hP2_194_c_target_nb_atoms128_rotid0_pristine.tar.gz
INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/pristine/A_hP2_194_c_target_nb_atoms128_rotid0_pristine.tar.gz
INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/pristine/A_hP2_194_c_target_nb_atoms128_rotid0_pristine.tar.gz
INFO: Extracting file 2/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/pristine/A_hP2_194_c_target_nb_atoms128_rotid1_pristine.tar.gz
INFO: Extracting file 2/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/pristine/A_hP2_194_c_target_nb_atoms128_rotid1_pristine.tar.gz
INFO: Extracting file 2/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/pristine/A_hP2_194_c_target_nb_atoms128_rotid1_pristine.tar.gz
INFO: Extracting file 3/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/d

INFO: Extracting file 4/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/diam/pristine/A_cF8_227_a_target_nb_atoms128_rotid3_pristine.tar.gz
INFO: Extracting file 4/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/diam/pristine/A_cF8_227_a_target_nb_atoms128_rotid3_pristine.tar.gz
INFO: Extracting file 4/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/diam/pristine/A_cF8_227_a_target_nb_atoms128_rotid3_pristine.tar.gz
INFO: Extracting file 5/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/diam/pristine/A_cF8_227_a_target_nb_atoms128_rotid4_pristine.tar.gz
INFO: Extracting file 5/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/diam/pristine/A_cF8_227_a_target_nb_atoms128_rotid4_pristine.tar.gz
INFO: Extracting file 5/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/diam/pristine/A_cF8_227_a_target_nb_atoms128_rotid4_pristine.tar.gz
INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_in

In [42]:
structure_list_pristine = structure_list_hcp_pristine + structure_list_sc_pristine + structure_list_fcc_pristine + structure_list_diam_pristine + structure_list_bcc_pristine
target_list_pristine = target_list_hcp_pristine + target_list_sc_pristine + target_list_fcc_pristine + target_list_diam_pristine + target_list_bcc_pristine

for idx, item in enumerate(target_list_pristine):
    item['data'][0]['target'] = y_true[idx]
    
for idx, structure in enumerate(structure_list_pristine):
    structure.info['target'] = y_true[idx]
    
#y_pred_disp04 = []
#for structure in structure_list_04:
#    y_pred_disp04.append(ase_get_spacegroup(structure, symprec=1e-1).no)

In [43]:
# make dataset

path_to_x, path_to_y, path_to_summary = prepare_dataset(
    structure_list=structure_list_pristine,
    target_list=target_list_pristine,
    desc_metadata='diffraction_3d_sh_spectrum',
    dataset_name='hcp-sc-fcc-diam-bcc_pristine',
    target_name='target',
    target_categorical=True,
    input_dims=(50, 32),
    configs=configs,
    dataset_folder=dataset_folder,
    main_folder=configs['io']['main_folder'],
    desc_folder=configs['io']['desc_folder'],
    tmp_folder=configs['io']['tmp_folder'],
    notes="Hcp, sc, fcc, diam and sc structures pristine")

x, y, dataset_info = load_dataset_from_file(path_to_x=path_to_x, path_to_y=path_to_y,
                                                              path_to_summary=path_to_summary)


Counter(y)

INFO: Writing x to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_pristine_x.pkl
INFO: Writing x to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_pristine_x.pkl
INFO: Writing x to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_pristine_x.pkl
INFO: Writing y to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_pristine_y.pkl
INFO: Writing y to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_pristine_y.pkl
INFO: Writing y to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_pristine_y.pkl
INFO: Summary file written in /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_pristine_summary.json.
INFO: Summary file written in /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_pristine_summary.json.
INFO: Summary file written in /home/ziletti/Documents/calc_nomadml/rot_inv

Counter({0: 4500, 1: 900, 2: 900, 3: 900, 4: 900})

## Displacements 0.4%

In [44]:
# hcp - spacegroup 194
filenames_disp04_hcp = ['hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid0_disp0004.tar.gz',
                        'hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid1_disp0004.tar.gz',
                        'hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid2_disp0004.tar.gz',
                        'hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid3_disp0004.tar.gz',
                        'hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid4_disp0004.tar.gz']
# sc - spacegroup 221
filenames_disp04_sc = ['sc/disp/A_cP1_221_a_target_nb_atoms128_rotid0_disp0004.tar.gz',
                       'sc/disp/A_cP1_221_a_target_nb_atoms128_rotid1_disp0004.tar.gz',
                       'sc/disp/A_cP1_221_a_target_nb_atoms128_rotid2_disp0004.tar.gz',
                       'sc/disp/A_cP1_221_a_target_nb_atoms128_rotid3_disp0004.tar.gz',
                       'sc/disp/A_cP1_221_a_target_nb_atoms128_rotid4_disp0004.tar.gz']

# fcc - spacegroup 225
filenames_disp04_fcc = ['fcc/disp/A_cF4_225_a_target_nb_atoms128_rotid0_disp0004.tar.gz',
                         'fcc/disp/A_cF4_225_a_target_nb_atoms128_rotid1_disp0004.tar.gz',
                         'fcc/disp/A_cF4_225_a_target_nb_atoms128_rotid2_disp0004.tar.gz',
                         'fcc/disp/A_cF4_225_a_target_nb_atoms128_rotid3_disp0004.tar.gz',
                         'fcc/disp/A_cF4_225_a_target_nb_atoms128_rotid4_disp0004.tar.gz']

# diam - spacegroup 227
filenames_disp04_diam = ['diam/disp/A_cF8_227_a_target_nb_atoms128_rotid0_disp0004.tar.gz',
                          'diam/disp/A_cF8_227_a_target_nb_atoms128_rotid1_disp0004.tar.gz',
                          'diam/disp/A_cF8_227_a_target_nb_atoms128_rotid2_disp0004.tar.gz',
                          'diam/disp/A_cF8_227_a_target_nb_atoms128_rotid3_disp0004.tar.gz',
                          'diam/disp/A_cF8_227_a_target_nb_atoms128_rotid4_disp0004.tar.gz']
# bcc - spacegroup 229
filenames_disp04_bcc = ['bcc/disp/A_cI2_229_a_target_nb_atoms128_rotid0_disp0004.tar.gz',
                         'bcc/disp/A_cI2_229_a_target_nb_atoms128_rotid1_disp0004.tar.gz',
                         'bcc/disp/A_cI2_229_a_target_nb_atoms128_rotid2_disp0004.tar.gz',
                         'bcc/disp/A_cI2_229_a_target_nb_atoms128_rotid3_disp0004.tar.gz',
                         'bcc/disp/A_cI2_229_a_target_nb_atoms128_rotid4_disp0004.tar.gz']
                         
                         
desc_files_disp04_hcp = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_disp04_hcp]
desc_files_disp04_sc = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_disp04_sc]
desc_files_disp04_fcc = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_disp04_fcc]
desc_files_disp04_diam = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_disp04_diam]
desc_files_disp04_bcc = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_disp04_bcc]

y_true = []
target_list_hcp_disp04, structure_list_hcp_disp04 = load_descriptor(desc_files=desc_files_disp04_hcp, configs=configs)
y_true = y_true + [194]*len(structure_list_hcp_disp04)

target_list_sc_disp04, structure_list_sc_disp04 = load_descriptor(desc_files=desc_files_disp04_sc, configs=configs)
y_true = y_true + [221]*len(structure_list_sc_disp04)

target_list_fcc_disp04, structure_list_fcc_disp04 = load_descriptor(desc_files=desc_files_disp04_fcc, configs=configs)
y_true = y_true + [225]*len(structure_list_fcc_disp04)

target_list_diam_disp04, structure_list_diam_disp04 = load_descriptor(desc_files=desc_files_disp04_diam, configs=configs)
y_true = y_true + [227]*len(structure_list_diam_disp04)

target_list_bcc_disp04, structure_list_bcc_disp04 = load_descriptor(desc_files=desc_files_disp04_bcc, configs=configs)
y_true = y_true + [229]*len(structure_list_bcc_disp04)

INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid0_disp0004.tar.gz
INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid0_disp0004.tar.gz
INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid0_disp0004.tar.gz
INFO: Extracting file 2/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid1_disp0004.tar.gz
INFO: Extracting file 2/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid1_disp0004.tar.gz
INFO: Extracting file 2/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid1_disp0004.tar.gz
INFO: Extracting file 3/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/disp/A_hP

INFO: Extracting file 4/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/diam/disp/A_cF8_227_a_target_nb_atoms128_rotid3_disp0004.tar.gz
INFO: Extracting file 4/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/diam/disp/A_cF8_227_a_target_nb_atoms128_rotid3_disp0004.tar.gz
INFO: Extracting file 5/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/diam/disp/A_cF8_227_a_target_nb_atoms128_rotid4_disp0004.tar.gz
INFO: Extracting file 5/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/diam/disp/A_cF8_227_a_target_nb_atoms128_rotid4_disp0004.tar.gz
INFO: Extracting file 5/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/diam/disp/A_cF8_227_a_target_nb_atoms128_rotid4_disp0004.tar.gz
INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/bcc/disp/A_cI2_229_a_target_nb_atoms128_rotid0_disp0004.tar.gz
INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/bcc/disp

In [45]:
structure_list_disp04 = structure_list_hcp_disp04 + structure_list_sc_disp04 + structure_list_fcc_disp04 + structure_list_diam_disp04 + structure_list_bcc_disp04
target_list_disp04 = target_list_hcp_disp04 + target_list_sc_disp04 + target_list_fcc_disp04 + target_list_diam_disp04 + target_list_bcc_disp04

for idx, item in enumerate(target_list_disp04):
    item['data'][0]['target'] = y_true[idx]
    
for idx, structure in enumerate(structure_list_disp04):
    structure.info['target'] = y_true[idx]
    
#y_pred_disp04 = []
#for structure in structure_list_04:
#    y_pred_disp04.append(ase_get_spacegroup(structure, symprec=1e-1).no)

In [46]:
# make dataset

path_to_x, path_to_y, path_to_summary = prepare_dataset(
    structure_list=structure_list_disp04,
    target_list=target_list_disp04,
    desc_metadata='diffraction_3d_sh_spectrum',
    dataset_name='hcp-sc-fcc-diam-bcc_displacement-0.4%',
    target_name='target',
    target_categorical=True,
    input_dims=(50, 32),
    configs=configs,
    dataset_folder=dataset_folder,
    main_folder=configs['io']['main_folder'],
    desc_folder=configs['io']['desc_folder'],
    tmp_folder=configs['io']['tmp_folder'],
    notes="Hcp, sc, fcc, diam and sc structures with 0.4% displacement")

x, y, dataset_info = load_dataset_from_file(path_to_x=path_to_x, path_to_y=path_to_y,
                                                              path_to_summary=path_to_summary)


Counter(y)

INFO: Writing x to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_displacement-0.4%_x.pkl
INFO: Writing x to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_displacement-0.4%_x.pkl
INFO: Writing x to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_displacement-0.4%_x.pkl
INFO: Writing y to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_displacement-0.4%_y.pkl
INFO: Writing y to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_displacement-0.4%_y.pkl
INFO: Writing y to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_displacement-0.4%_y.pkl
INFO: Summary file written in /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_displacement-0.4%_summary.json.
INFO: Summary file written in /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_displacement-0.4%_summary.json.
IN

Counter({0: 4500, 1: 900, 2: 900, 3: 900, 4: 900})

## Displacements 1.0%

In [47]:
# hcp - spacegroup 194
filenames_disp1_hcp = ['hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid0_disp001.tar.gz',
                        'hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid1_disp001.tar.gz',
                        'hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid2_disp001.tar.gz',
                        'hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid3_disp001.tar.gz',
                        'hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid4_disp001.tar.gz']
# sc - spacegroup 221
filenames_disp1_sc = ['sc/disp/A_cP1_221_a_target_nb_atoms128_rotid0_disp001.tar.gz',
                       'sc/disp/A_cP1_221_a_target_nb_atoms128_rotid1_disp001.tar.gz',
                       'sc/disp/A_cP1_221_a_target_nb_atoms128_rotid2_disp001.tar.gz',
                       'sc/disp/A_cP1_221_a_target_nb_atoms128_rotid3_disp001.tar.gz',
                       'sc/disp/A_cP1_221_a_target_nb_atoms128_rotid4_disp001.tar.gz']

# fcc - spacegroup 225
filenames_disp1_fcc = ['fcc/disp/A_cF4_225_a_target_nb_atoms128_rotid0_disp001.tar.gz',
                         'fcc/disp/A_cF4_225_a_target_nb_atoms128_rotid1_disp001.tar.gz',
                         'fcc/disp/A_cF4_225_a_target_nb_atoms128_rotid2_disp001.tar.gz',
                         'fcc/disp/A_cF4_225_a_target_nb_atoms128_rotid3_disp001.tar.gz',
                         'fcc/disp/A_cF4_225_a_target_nb_atoms128_rotid4_disp001.tar.gz']

# diam - spacegroup 227
filenames_disp1_diam = ['diam/disp/A_cF8_227_a_target_nb_atoms128_rotid0_disp001.tar.gz',
                          'diam/disp/A_cF8_227_a_target_nb_atoms128_rotid1_disp001.tar.gz',
                          'diam/disp/A_cF8_227_a_target_nb_atoms128_rotid2_disp001.tar.gz',
                          'diam/disp/A_cF8_227_a_target_nb_atoms128_rotid3_disp001.tar.gz',
                          'diam/disp/A_cF8_227_a_target_nb_atoms128_rotid4_disp001.tar.gz']
# bcc - spacegroup 229
filenames_disp1_bcc = ['bcc/disp/A_cI2_229_a_target_nb_atoms128_rotid0_disp001.tar.gz',
                         'bcc/disp/A_cI2_229_a_target_nb_atoms128_rotid1_disp001.tar.gz',
                         'bcc/disp/A_cI2_229_a_target_nb_atoms128_rotid2_disp001.tar.gz',
                         'bcc/disp/A_cI2_229_a_target_nb_atoms128_rotid3_disp001.tar.gz',
                         'bcc/disp/A_cI2_229_a_target_nb_atoms128_rotid4_disp001.tar.gz']
                         
                         
desc_files_disp1_hcp = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_disp1_hcp]
desc_files_disp1_sc = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_disp1_sc]
desc_files_disp1_fcc = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_disp1_fcc]
desc_files_disp1_diam = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_disp1_diam]
desc_files_disp1_bcc = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_disp1_bcc]

y_true = []
target_list_hcp_disp1, structure_list_hcp_disp1 = load_descriptor(desc_files=desc_files_disp1_hcp, configs=configs)
y_true = y_true + [194]*len(target_list_hcp_disp1)

target_list_sc_disp1, structure_list_sc_disp1 = load_descriptor(desc_files=desc_files_disp1_sc, configs=configs)
y_true = y_true + [221]*len(target_list_sc_disp1)

target_list_fcc_disp1, structure_list_fcc_disp1 = load_descriptor(desc_files=desc_files_disp1_fcc, configs=configs)
y_true = y_true + [225]*len(target_list_fcc_disp1)

target_list_diam_disp1, structure_list_diam_disp1 = load_descriptor(desc_files=desc_files_disp1_diam, configs=configs)
y_true = y_true + [227]*len(target_list_diam_disp1)

target_list_bcc_disp1, structure_list_bcc_disp1 = load_descriptor(desc_files=desc_files_disp1_bcc, configs=configs)
y_true = y_true + [229]*len(target_list_bcc_disp1)

INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid0_disp001.tar.gz
INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid0_disp001.tar.gz
INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid0_disp001.tar.gz
INFO: Extracting file 2/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid1_disp001.tar.gz
INFO: Extracting file 2/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid1_disp001.tar.gz
INFO: Extracting file 2/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid1_disp001.tar.gz
INFO: Extracting file 3/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/disp/A_hP2_194_

INFO: Extracting file 4/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/diam/disp/A_cF8_227_a_target_nb_atoms128_rotid3_disp001.tar.gz
INFO: Extracting file 5/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/diam/disp/A_cF8_227_a_target_nb_atoms128_rotid4_disp001.tar.gz
INFO: Extracting file 5/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/diam/disp/A_cF8_227_a_target_nb_atoms128_rotid4_disp001.tar.gz
INFO: Extracting file 5/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/diam/disp/A_cF8_227_a_target_nb_atoms128_rotid4_disp001.tar.gz
INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/bcc/disp/A_cI2_229_a_target_nb_atoms128_rotid0_disp001.tar.gz
INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/bcc/disp/A_cI2_229_a_target_nb_atoms128_rotid0_disp001.tar.gz
INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/bcc/disp/A_cI2_

In [48]:
structure_list_disp1 = structure_list_hcp_disp1 + structure_list_sc_disp1 + structure_list_fcc_disp1 + structure_list_diam_disp1 + structure_list_bcc_disp1
target_list_disp1 = target_list_hcp_disp1 + target_list_sc_disp1 + target_list_fcc_disp1 + target_list_diam_disp1 + target_list_bcc_disp1

for idx, item in enumerate(target_list_disp1):
    item['data'][0]['target'] = y_true[idx]
    
for idx, structure in enumerate(structure_list_1):
    structure.info['target'] = y_true[idx]
    
#y_pred_disp1 = []
#for structure in structure_list_1:
#    y_pred_disp1.append(ase_get_spacegroup(structure, symprec=1e-1).no)

In [49]:
# make dataset

path_to_x, path_to_y, path_to_summary = prepare_dataset(
    structure_list=structure_list_disp1,
    target_list=target_list_disp1,
    desc_metadata='diffraction_3d_sh_spectrum',
    dataset_name='hcp-sc-fcc-diam-bcc_displacement-1%',
    target_name='target',
    target_categorical=True,
    input_dims=(50, 32),
    configs=configs,
    dataset_folder=dataset_folder,
    main_folder=configs['io']['main_folder'],
    desc_folder=configs['io']['desc_folder'],
    tmp_folder=configs['io']['tmp_folder'],
    notes="Hcp, sc, fcc, diam and sc structures with 1% displacement")

x, y, dataset_info = load_dataset_from_file(path_to_x=path_to_x, path_to_y=path_to_y,
                                                              path_to_summary=path_to_summary)


Counter(y)

INFO: Writing x to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_displacement-1%_x.pkl
INFO: Writing x to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_displacement-1%_x.pkl
INFO: Writing x to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_displacement-1%_x.pkl
INFO: Writing y to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_displacement-1%_y.pkl
INFO: Writing y to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_displacement-1%_y.pkl
INFO: Writing y to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_displacement-1%_y.pkl
INFO: Summary file written in /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_displacement-1%_summary.json.
INFO: Summary file written in /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_displacement-1%_summary.json.
INFO: Summary file

Counter({0: 4500, 1: 900, 2: 900, 3: 900, 4: 900})

## Displacements 2.0%

In [50]:
# hcp - spacegroup 194
filenames_disp2_hcp = ['hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid0_disp002.tar.gz',
                        'hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid1_disp002.tar.gz',
                        'hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid2_disp002.tar.gz',
                        'hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid3_disp002.tar.gz',
                        'hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid4_disp002.tar.gz']
# sc - spacegroup 221
filenames_disp2_sc = ['sc/disp/A_cP1_221_a_target_nb_atoms128_rotid0_disp002.tar.gz',
                       'sc/disp/A_cP1_221_a_target_nb_atoms128_rotid1_disp002.tar.gz',
                       'sc/disp/A_cP1_221_a_target_nb_atoms128_rotid2_disp002.tar.gz',
                       'sc/disp/A_cP1_221_a_target_nb_atoms128_rotid3_disp002.tar.gz',
                       'sc/disp/A_cP1_221_a_target_nb_atoms128_rotid4_disp002.tar.gz']

# fcc - spacegroup 225
filenames_disp2_fcc = ['fcc/disp/A_cF4_225_a_target_nb_atoms128_rotid0_disp002.tar.gz',
                         'fcc/disp/A_cF4_225_a_target_nb_atoms128_rotid1_disp002.tar.gz',
                         'fcc/disp/A_cF4_225_a_target_nb_atoms128_rotid2_disp002.tar.gz',
                         'fcc/disp/A_cF4_225_a_target_nb_atoms128_rotid3_disp002.tar.gz',
                         'fcc/disp/A_cF4_225_a_target_nb_atoms128_rotid4_disp002.tar.gz']

# diam - spacegroup 227
filenames_disp2_diam = ['diam/disp/A_cF8_227_a_target_nb_atoms128_rotid0_disp002.tar.gz',
                          'diam/disp/A_cF8_227_a_target_nb_atoms128_rotid1_disp002.tar.gz',
                          'diam/disp/A_cF8_227_a_target_nb_atoms128_rotid2_disp002.tar.gz',
                          'diam/disp/A_cF8_227_a_target_nb_atoms128_rotid3_disp002.tar.gz',
                          'diam/disp/A_cF8_227_a_target_nb_atoms128_rotid4_disp002.tar.gz']
# bcc - spacegroup 229
filenames_disp2_bcc = ['bcc/disp/A_cI2_229_a_target_nb_atoms128_rotid0_disp002.tar.gz',
                         'bcc/disp/A_cI2_229_a_target_nb_atoms128_rotid1_disp002.tar.gz',
                         'bcc/disp/A_cI2_229_a_target_nb_atoms128_rotid2_disp002.tar.gz',
                         'bcc/disp/A_cI2_229_a_target_nb_atoms128_rotid3_disp002.tar.gz',
                         'bcc/disp/A_cI2_229_a_target_nb_atoms128_rotid4_disp002.tar.gz']
                         
                         
desc_files_disp2_hcp = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_disp2_hcp]
desc_files_disp2_sc = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_disp2_sc]
desc_files_disp2_fcc = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_disp2_fcc]
desc_files_disp2_diam = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_disp2_diam]
desc_files_disp2_bcc = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_disp2_bcc]

y_true = []
target_list_hcp_disp2, structure_list_hcp_disp2 = load_descriptor(desc_files=desc_files_disp2_hcp, configs=configs)
y_true = y_true + [194]*len(structure_list_hcp_disp2)

target_list_sc_disp2, structure_list_sc_disp2 = load_descriptor(desc_files=desc_files_disp2_sc, configs=configs)
y_true = y_true + [221]*len(structure_list_sc_disp2)

target_list_fcc_disp2, structure_list_fcc_disp2 = load_descriptor(desc_files=desc_files_disp2_fcc, configs=configs)
y_true = y_true + [225]*len(structure_list_fcc_disp2)

target_list_diam_disp2, structure_list_diam_disp2 = load_descriptor(desc_files=desc_files_disp2_diam, configs=configs)
y_true = y_true + [227]*len(structure_list_diam_disp2)

target_list_bcc_disp2, structure_list_bcc_disp2 = load_descriptor(desc_files=desc_files_disp2_bcc, configs=configs)
y_true = y_true + [229]*len(structure_list_bcc_disp2)

INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid0_disp002.tar.gz
INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid0_disp002.tar.gz
INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid0_disp002.tar.gz
INFO: Extracting file 2/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid1_disp002.tar.gz
INFO: Extracting file 2/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid1_disp002.tar.gz
INFO: Extracting file 2/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid1_disp002.tar.gz
INFO: Extracting file 3/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/disp/A_hP2_194_

INFO: Extracting file 4/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/diam/disp/A_cF8_227_a_target_nb_atoms128_rotid3_disp002.tar.gz
INFO: Extracting file 5/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/diam/disp/A_cF8_227_a_target_nb_atoms128_rotid4_disp002.tar.gz
INFO: Extracting file 5/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/diam/disp/A_cF8_227_a_target_nb_atoms128_rotid4_disp002.tar.gz
INFO: Extracting file 5/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/diam/disp/A_cF8_227_a_target_nb_atoms128_rotid4_disp002.tar.gz
INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/bcc/disp/A_cI2_229_a_target_nb_atoms128_rotid0_disp002.tar.gz
INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/bcc/disp/A_cI2_229_a_target_nb_atoms128_rotid0_disp002.tar.gz
INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/bcc/disp/A_cI2_

In [51]:
structure_list_disp2 = structure_list_hcp_disp2 + structure_list_sc_disp2 + structure_list_fcc_disp2 + structure_list_diam_disp2 + structure_list_bcc_disp2
target_list_disp2 = target_list_hcp_disp2 + target_list_sc_disp2 + target_list_fcc_disp2 + target_list_diam_disp2 + target_list_bcc_disp2

for idx, item in enumerate(target_list_disp2):
    item['data'][0]['target'] = y_true[idx]
    
for idx, structure in enumerate(structure_list_disp2):
    structure.info['target'] = y_true[idx]
    
#y_pred_disp2 = []
#for structure in structure_list_2:
#    y_pred_disp2.append(ase_get_spacegroup(structure, symprec=1e-1).no)

In [52]:
# make dataset

path_to_x, path_to_y, path_to_summary = prepare_dataset(
    structure_list=structure_list_disp2,
    target_list=target_list_disp2,
    desc_metadata='diffraction_3d_sh_spectrum',
    dataset_name='hcp-sc-fcc-diam-bcc_displacement-2%',
    target_name='target',
    target_categorical=True,
    input_dims=(50, 32),
    configs=configs,
    dataset_folder=dataset_folder,
    main_folder=configs['io']['main_folder'],
    desc_folder=configs['io']['desc_folder'],
    tmp_folder=configs['io']['tmp_folder'],
    notes="Hcp, sc, fcc, diam and sc structures with 2% displacement")

x, y, dataset_info = load_dataset_from_file(path_to_x=path_to_x, path_to_y=path_to_y,
                                                              path_to_summary=path_to_summary)


Counter(y)

INFO: Writing x to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_displacement-2%_x.pkl
INFO: Writing x to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_displacement-2%_x.pkl
INFO: Writing x to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_displacement-2%_x.pkl
INFO: Writing y to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_displacement-2%_y.pkl
INFO: Writing y to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_displacement-2%_y.pkl
INFO: Writing y to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_displacement-2%_y.pkl
INFO: Summary file written in /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_displacement-2%_summary.json.
INFO: Summary file written in /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_displacement-2%_summary.json.
INFO: Summary file

Counter({0: 4500, 1: 900, 2: 900, 3: 900, 4: 900})

## Displacements 4.0%

In [53]:
# hcp - spacegroup 194
filenames_disp4_hcp = ['hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid0_disp004.tar.gz',
                        'hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid1_disp004.tar.gz',
                        'hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid2_disp004.tar.gz',
                        'hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid3_disp004.tar.gz',
                        'hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid4_disp004.tar.gz']
# sc - spacegroup 221
filenames_disp4_sc = ['sc/disp/A_cP1_221_a_target_nb_atoms128_rotid0_disp004.tar.gz',
                       'sc/disp/A_cP1_221_a_target_nb_atoms128_rotid1_disp004.tar.gz',
                       'sc/disp/A_cP1_221_a_target_nb_atoms128_rotid2_disp004.tar.gz',
                       'sc/disp/A_cP1_221_a_target_nb_atoms128_rotid3_disp004.tar.gz',
                       'sc/disp/A_cP1_221_a_target_nb_atoms128_rotid4_disp004.tar.gz']

# fcc - spacegroup 225
filenames_disp4_fcc = ['fcc/disp/A_cF4_225_a_target_nb_atoms128_rotid0_disp004.tar.gz',
                         'fcc/disp/A_cF4_225_a_target_nb_atoms128_rotid1_disp004.tar.gz',
                         'fcc/disp/A_cF4_225_a_target_nb_atoms128_rotid2_disp004.tar.gz',
                         'fcc/disp/A_cF4_225_a_target_nb_atoms128_rotid3_disp004.tar.gz',
                         'fcc/disp/A_cF4_225_a_target_nb_atoms128_rotid4_disp004.tar.gz']

# diam - spacegroup 227
filenames_disp4_diam = ['diam/disp/A_cF8_227_a_target_nb_atoms128_rotid0_disp004.tar.gz',
                          'diam/disp/A_cF8_227_a_target_nb_atoms128_rotid1_disp004.tar.gz',
                          'diam/disp/A_cF8_227_a_target_nb_atoms128_rotid2_disp004.tar.gz',
                          'diam/disp/A_cF8_227_a_target_nb_atoms128_rotid3_disp004.tar.gz',
                          'diam/disp/A_cF8_227_a_target_nb_atoms128_rotid4_disp004.tar.gz']
# bcc - spacegroup 229
filenames_disp4_bcc = ['bcc/disp/A_cI2_229_a_target_nb_atoms128_rotid0_disp004.tar.gz',
                         'bcc/disp/A_cI2_229_a_target_nb_atoms128_rotid1_disp004.tar.gz',
                         'bcc/disp/A_cI2_229_a_target_nb_atoms128_rotid2_disp004.tar.gz',
                         'bcc/disp/A_cI2_229_a_target_nb_atoms128_rotid3_disp004.tar.gz',
                         'bcc/disp/A_cI2_229_a_target_nb_atoms128_rotid4_disp004.tar.gz']
                         
                         
desc_files_disp4_hcp = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_disp4_hcp]
desc_files_disp4_sc = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_disp4_sc]
desc_files_disp4_fcc = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_disp4_fcc]
desc_files_disp4_diam = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_disp4_diam]
desc_files_disp4_bcc = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_disp4_bcc]

y_true = []
target_list_hcp_disp4, structure_list_hcp_disp4 = load_descriptor(desc_files=desc_files_disp4_hcp, configs=configs)
y_true = y_true + [194]*len(structure_list_hcp_disp4)

target_list_sc_disp4, structure_list_sc_disp4 = load_descriptor(desc_files=desc_files_disp4_sc, configs=configs)
y_true = y_true + [221]*len(structure_list_sc_disp4)

target_list_fcc_disp4, structure_list_fcc_disp4 = load_descriptor(desc_files=desc_files_disp4_fcc, configs=configs)
y_true = y_true + [225]*len(structure_list_fcc_disp4)

target_list_diam_disp4, structure_list_diam_disp4 = load_descriptor(desc_files=desc_files_disp4_diam, configs=configs)
y_true = y_true + [227]*len(structure_list_diam_disp4)

target_list_bcc_disp4, structure_list_bcc_disp4 = load_descriptor(desc_files=desc_files_disp4_bcc, configs=configs)
y_true = y_true + [229]*len(structure_list_bcc_disp4)

INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid0_disp004.tar.gz
INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid0_disp004.tar.gz
INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid0_disp004.tar.gz
INFO: Extracting file 2/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid1_disp004.tar.gz
INFO: Extracting file 2/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid1_disp004.tar.gz
INFO: Extracting file 2/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/disp/A_hP2_194_c_target_nb_atoms128_rotid1_disp004.tar.gz
INFO: Extracting file 3/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/disp/A_hP2_194_

INFO: Extracting file 4/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/diam/disp/A_cF8_227_a_target_nb_atoms128_rotid3_disp004.tar.gz
INFO: Extracting file 5/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/diam/disp/A_cF8_227_a_target_nb_atoms128_rotid4_disp004.tar.gz
INFO: Extracting file 5/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/diam/disp/A_cF8_227_a_target_nb_atoms128_rotid4_disp004.tar.gz
INFO: Extracting file 5/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/diam/disp/A_cF8_227_a_target_nb_atoms128_rotid4_disp004.tar.gz
INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/bcc/disp/A_cI2_229_a_target_nb_atoms128_rotid0_disp004.tar.gz
INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/bcc/disp/A_cI2_229_a_target_nb_atoms128_rotid0_disp004.tar.gz
INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/bcc/disp/A_cI2_

In [54]:
structure_list_disp4 = structure_list_hcp_disp4 + structure_list_sc_disp4 + structure_list_fcc_disp4 + structure_list_diam_disp4 + structure_list_bcc_disp4
target_list_disp4 = target_list_hcp_disp4 + target_list_sc_disp4 + target_list_fcc_disp4 + target_list_diam_disp4 + target_list_bcc_disp4

for idx, item in enumerate(target_list_disp4):
    item['data'][0]['target'] = y_true[idx]
    
for idx, structure in enumerate(structure_list_disp4):
    structure.info['target'] = y_true[idx]
    
#y_pred_disp4 = []
#for structure in structure_list_4:
#    y_pred_disp4.append(ase_get_spacegroup(structure, symprec=1e-1).no)

In [55]:
# make dataset

path_to_x, path_to_y, path_to_summary = prepare_dataset(
    structure_list=structure_list_disp4,
    target_list=target_list_disp4,
    desc_metadata='diffraction_3d_sh_spectrum',
    dataset_name='hcp-sc-fcc-diam-bcc_displacement-4%',
    target_name='target',
    target_categorical=True,
    input_dims=(50, 32),
    configs=configs,
    dataset_folder=dataset_folder,
    main_folder=configs['io']['main_folder'],
    desc_folder=configs['io']['desc_folder'],
    tmp_folder=configs['io']['tmp_folder'],
    notes="Hcp, sc, fcc, diam and sc structures with 4% displacement")

x, y, dataset_info = load_dataset_from_file(path_to_x=path_to_x, path_to_y=path_to_y,
                                                              path_to_summary=path_to_summary)


Counter(y)

INFO: Writing x to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_displacement-4%_x.pkl
INFO: Writing x to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_displacement-4%_x.pkl
INFO: Writing x to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_displacement-4%_x.pkl
INFO: Writing y to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_displacement-4%_y.pkl
INFO: Writing y to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_displacement-4%_y.pkl
INFO: Writing y to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_displacement-4%_y.pkl
INFO: Summary file written in /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_displacement-4%_summary.json.
INFO: Summary file written in /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_displacement-4%_summary.json.
INFO: Summary file

Counter({0: 4500, 1: 900, 2: 900, 3: 900, 4: 900})

## Vacancies 1.0%

In [56]:
# hcp - spacegroup 194
filenames_vac1_hcp = ['hcp/vac/A_hP2_194_c_target_nb_atoms128_rotid0_vac01.tar.gz',
                        'hcp/vac/A_hP2_194_c_target_nb_atoms128_rotid1_vac01.tar.gz',
                        'hcp/vac/A_hP2_194_c_target_nb_atoms128_rotid2_vac01.tar.gz',
                        'hcp/vac/A_hP2_194_c_target_nb_atoms128_rotid3_vac01.tar.gz',
                        'hcp/vac/A_hP2_194_c_target_nb_atoms128_rotid4_vac01.tar.gz']
# sc - spacegroup 221
filenames_vac1_sc = ['sc/vac/A_cP1_221_a_target_nb_atoms128_rotid0_vac01.tar.gz',
                       'sc/vac/A_cP1_221_a_target_nb_atoms128_rotid1_vac01.tar.gz',
                       'sc/vac/A_cP1_221_a_target_nb_atoms128_rotid2_vac01.tar.gz',
                       'sc/vac/A_cP1_221_a_target_nb_atoms128_rotid3_vac01.tar.gz',
                       'sc/vac/A_cP1_221_a_target_nb_atoms128_rotid4_vac01.tar.gz']

# fcc - spacegroup 225
filenames_vac1_fcc = ['fcc/vac/A_cF4_225_a_target_nb_atoms128_rotid0_vac01.tar.gz',
                         'fcc/vac/A_cF4_225_a_target_nb_atoms128_rotid1_vac01.tar.gz',
                         'fcc/vac/A_cF4_225_a_target_nb_atoms128_rotid2_vac01.tar.gz',
                         'fcc/vac/A_cF4_225_a_target_nb_atoms128_rotid3_vac01.tar.gz',
                         'fcc/vac/A_cF4_225_a_target_nb_atoms128_rotid4_vac01.tar.gz']

# diam - spacegroup 227
filenames_vac1_diam = ['diam/vac/A_cF8_227_a_target_nb_atoms128_rotid0_vac01.tar.gz',
                          'diam/vac/A_cF8_227_a_target_nb_atoms128_rotid1_vac01.tar.gz',
                          'diam/vac/A_cF8_227_a_target_nb_atoms128_rotid2_vac01.tar.gz',
                          'diam/vac/A_cF8_227_a_target_nb_atoms128_rotid3_vac01.tar.gz',
                          'diam/vac/A_cF8_227_a_target_nb_atoms128_rotid4_vac01.tar.gz']
# bcc - spacegroup 229
filenames_vac1_bcc = ['bcc/vac/A_cI2_229_a_target_nb_atoms128_rotid0_vac01.tar.gz',
                         'bcc/vac/A_cI2_229_a_target_nb_atoms128_rotid1_vac01.tar.gz',
                         'bcc/vac/A_cI2_229_a_target_nb_atoms128_rotid2_vac01.tar.gz',
                         'bcc/vac/A_cI2_229_a_target_nb_atoms128_rotid3_vac01.tar.gz',
                         'bcc/vac/A_cI2_229_a_target_nb_atoms128_rotid4_vac01.tar.gz']
                         
                         
desc_files_vac1_hcp = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_vac1_hcp]
desc_files_vac1_sc = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_vac1_sc]
desc_files_vac1_fcc = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_vac1_fcc]
desc_files_vac1_diam = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_vac1_diam]
desc_files_vac1_bcc = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_vac1_bcc]

y_true = []
target_list_hcp_vac1, structure_list_hcp_vac1 = load_descriptor(desc_files=desc_files_vac1_hcp, configs=configs)
y_true = y_true + [194]*len(structure_list_hcp_vac1)

target_list_sc_vac1, structure_list_sc_vac1 = load_descriptor(desc_files=desc_files_vac1_sc, configs=configs)
y_true = y_true + [221]*len(structure_list_sc_vac1)

target_list_fcc_vac1, structure_list_fcc_vac1 = load_descriptor(desc_files=desc_files_vac1_fcc, configs=configs)
y_true = y_true + [225]*len(structure_list_fcc_vac1)

target_list_diam_vac1, structure_list_diam_vac1 = load_descriptor(desc_files=desc_files_vac1_diam, configs=configs)
y_true = y_true + [227]*len(structure_list_diam_vac1)

target_list_bcc_vac1, structure_list_bcc_vac1 = load_descriptor(desc_files=desc_files_vac1_bcc, configs=configs)
y_true = y_true + [229]*len(structure_list_bcc_vac1)

INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/vac/A_hP2_194_c_target_nb_atoms128_rotid0_vac01.tar.gz
INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/vac/A_hP2_194_c_target_nb_atoms128_rotid0_vac01.tar.gz
INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/vac/A_hP2_194_c_target_nb_atoms128_rotid0_vac01.tar.gz
INFO: Extracting file 2/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/vac/A_hP2_194_c_target_nb_atoms128_rotid1_vac01.tar.gz
INFO: Extracting file 2/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/vac/A_hP2_194_c_target_nb_atoms128_rotid1_vac01.tar.gz
INFO: Extracting file 2/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/vac/A_hP2_194_c_target_nb_atoms128_rotid1_vac01.tar.gz
INFO: Extracting file 3/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/vac/A_hP2_194_c_target_nb_atoms12

INFO: Extracting file 5/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/diam/vac/A_cF8_227_a_target_nb_atoms128_rotid4_vac01.tar.gz
INFO: Extracting file 5/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/diam/vac/A_cF8_227_a_target_nb_atoms128_rotid4_vac01.tar.gz
INFO: Extracting file 5/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/diam/vac/A_cF8_227_a_target_nb_atoms128_rotid4_vac01.tar.gz
INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/bcc/vac/A_cI2_229_a_target_nb_atoms128_rotid0_vac01.tar.gz
INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/bcc/vac/A_cI2_229_a_target_nb_atoms128_rotid0_vac01.tar.gz
INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/bcc/vac/A_cI2_229_a_target_nb_atoms128_rotid0_vac01.tar.gz
INFO: Extracting file 2/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/bcc/vac/A_cI2_229_a_target_nb_atom

In [57]:
structure_list_vac1 = structure_list_hcp_vac1 + structure_list_sc_vac1 + structure_list_fcc_vac1 + structure_list_diam_vac1 + structure_list_bcc_vac1
target_list_vac1 = target_list_hcp_vac1 + target_list_sc_vac1 + target_list_fcc_vac1 + target_list_diam_vac1 + target_list_bcc_vac1

for idx, item in enumerate(target_list_vac1):
    item['data'][0]['target'] = y_true[idx]
    
for idx, structure in enumerate(structure_list_vac1):
    structure.info['target'] = y_true[idx]
    
#y_pred_vac1 = []
#for structure in structure_list_vac1:
#    y_pred_vac1.append(ase_get_spacegroup(structure, symprec=1e-1).no)

In [58]:
# make dataset

path_to_x, path_to_y, path_to_summary = prepare_dataset(
    structure_list=structure_list_vac1,
    target_list=target_list_vac1,
    desc_metadata='diffraction_3d_sh_spectrum',
    dataset_name='hcp-sc-fcc-diam-bcc_vacancies-1%',
    target_name='target',
    target_categorical=True,
    input_dims=(50, 32),
    configs=configs,
    dataset_folder=dataset_folder,
    main_folder=configs['io']['main_folder'],
    desc_folder=configs['io']['desc_folder'],
    tmp_folder=configs['io']['tmp_folder'],
    notes="Hcp, sc, fcc, diam and sc structures with 1% vacancies")

x, y, dataset_info = load_dataset_from_file(path_to_x=path_to_x, path_to_y=path_to_y,
                                                              path_to_summary=path_to_summary)


Counter(y)

INFO: Writing x to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_vacancies-1%_x.pkl
INFO: Writing x to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_vacancies-1%_x.pkl
INFO: Writing x to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_vacancies-1%_x.pkl
INFO: Writing y to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_vacancies-1%_y.pkl
INFO: Writing y to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_vacancies-1%_y.pkl
INFO: Writing y to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_vacancies-1%_y.pkl
INFO: Summary file written in /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_vacancies-1%_summary.json.
INFO: Summary file written in /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_vacancies-1%_summary.json.
INFO: Summary file written in /home/zilett

Counter({0: 4500, 1: 900, 2: 900, 3: 900, 4: 900})

## Vacancies 25%

In [59]:
# hcp - spacegroup 194
filenames_vac25_hcp = ['hcp/vac/A_hP2_194_c_target_nb_atoms128_rotid0_vac25.tar.gz',
                        'hcp/vac/A_hP2_194_c_target_nb_atoms128_rotid1_vac25.tar.gz',
                        'hcp/vac/A_hP2_194_c_target_nb_atoms128_rotid2_vac25.tar.gz',
                        'hcp/vac/A_hP2_194_c_target_nb_atoms128_rotid3_vac25.tar.gz',
                        'hcp/vac/A_hP2_194_c_target_nb_atoms128_rotid4_vac25.tar.gz']
# sc - spacegroup 221
filenames_vac25_sc = ['sc/vac/A_cP1_221_a_target_nb_atoms128_rotid0_vac25.tar.gz',
                       'sc/vac/A_cP1_221_a_target_nb_atoms128_rotid1_vac25.tar.gz',
                       'sc/vac/A_cP1_221_a_target_nb_atoms128_rotid2_vac25.tar.gz',
                       'sc/vac/A_cP1_221_a_target_nb_atoms128_rotid3_vac25.tar.gz',
                       'sc/vac/A_cP1_221_a_target_nb_atoms128_rotid4_vac25.tar.gz']

# fcc - spacegroup 225
filenames_vac25_fcc = ['fcc/vac/A_cF4_225_a_target_nb_atoms128_rotid0_vac25.tar.gz',
                         'fcc/vac/A_cF4_225_a_target_nb_atoms128_rotid1_vac25.tar.gz',
                         'fcc/vac/A_cF4_225_a_target_nb_atoms128_rotid2_vac25.tar.gz',
                         'fcc/vac/A_cF4_225_a_target_nb_atoms128_rotid3_vac25.tar.gz',
                         'fcc/vac/A_cF4_225_a_target_nb_atoms128_rotid4_vac25.tar.gz']

# diam - spacegroup 227
filenames_vac25_diam = ['diam/vac/A_cF8_227_a_target_nb_atoms128_rotid0_vac25.tar.gz',
                          'diam/vac/A_cF8_227_a_target_nb_atoms128_rotid1_vac25.tar.gz',
                          'diam/vac/A_cF8_227_a_target_nb_atoms128_rotid2_vac25.tar.gz',
                          'diam/vac/A_cF8_227_a_target_nb_atoms128_rotid3_vac25.tar.gz',
                          'diam/vac/A_cF8_227_a_target_nb_atoms128_rotid4_vac25.tar.gz']
# bcc - spacegroup 229
filenames_vac25_bcc = ['bcc/vac/A_cI2_229_a_target_nb_atoms128_rotid0_vac25.tar.gz',
                         'bcc/vac/A_cI2_229_a_target_nb_atoms128_rotid1_vac25.tar.gz',
                         'bcc/vac/A_cI2_229_a_target_nb_atoms128_rotid2_vac25.tar.gz',
                         'bcc/vac/A_cI2_229_a_target_nb_atoms128_rotid3_vac25.tar.gz',
                         'bcc/vac/A_cI2_229_a_target_nb_atoms128_rotid4_vac25.tar.gz']
                         
                         
desc_files_vac25_hcp = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_vac25_hcp]
desc_files_vac25_sc = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_vac25_sc]
desc_files_vac25_fcc = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_vac25_fcc]
desc_files_vac25_diam = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_vac25_diam]
desc_files_vac25_bcc = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_vac25_bcc]

y_true = []
target_list_hcp_vac25, structure_list_hcp_vac25 = load_descriptor(desc_files=desc_files_vac25_hcp, configs=configs)
y_true = y_true + [194]*len(structure_list_hcp_vac25)

target_list_sc_vac25, structure_list_sc_vac25 = load_descriptor(desc_files=desc_files_vac25_sc, configs=configs)
y_true = y_true + [221]*len(structure_list_sc_vac25)

target_list_fcc_vac25, structure_list_fcc_vac25 = load_descriptor(desc_files=desc_files_vac25_fcc, configs=configs)
y_true = y_true + [225]*len(structure_list_fcc_vac25)

target_list_diam_vac25, structure_list_diam_vac25 = load_descriptor(desc_files=desc_files_vac25_diam, configs=configs)
y_true = y_true + [227]*len(structure_list_diam_vac25)

target_list_bcc_vac25, structure_list_bcc_vac25 = load_descriptor(desc_files=desc_files_vac25_bcc, configs=configs)
y_true = y_true + [229]*len(structure_list_bcc_vac25)

INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/vac/A_hP2_194_c_target_nb_atoms128_rotid0_vac25.tar.gz
INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/vac/A_hP2_194_c_target_nb_atoms128_rotid0_vac25.tar.gz
INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/vac/A_hP2_194_c_target_nb_atoms128_rotid0_vac25.tar.gz
INFO: Extracting file 2/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/vac/A_hP2_194_c_target_nb_atoms128_rotid1_vac25.tar.gz
INFO: Extracting file 2/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/vac/A_hP2_194_c_target_nb_atoms128_rotid1_vac25.tar.gz
INFO: Extracting file 2/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/vac/A_hP2_194_c_target_nb_atoms128_rotid1_vac25.tar.gz
INFO: Extracting file 3/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/vac/A_hP2_194_c_target_nb_atoms12

INFO: Extracting file 5/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/diam/vac/A_cF8_227_a_target_nb_atoms128_rotid4_vac25.tar.gz
INFO: Extracting file 5/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/diam/vac/A_cF8_227_a_target_nb_atoms128_rotid4_vac25.tar.gz
INFO: Extracting file 5/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/diam/vac/A_cF8_227_a_target_nb_atoms128_rotid4_vac25.tar.gz
INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/bcc/vac/A_cI2_229_a_target_nb_atoms128_rotid0_vac25.tar.gz
INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/bcc/vac/A_cI2_229_a_target_nb_atoms128_rotid0_vac25.tar.gz
INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/bcc/vac/A_cI2_229_a_target_nb_atoms128_rotid0_vac25.tar.gz
INFO: Extracting file 2/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/bcc/vac/A_cI2_229_a_target_nb_atom

In [60]:
structure_list_vac25 = structure_list_hcp_vac25 + structure_list_sc_vac25 + structure_list_fcc_vac25 + structure_list_diam_vac25 + structure_list_bcc_vac25
target_list_vac25 = target_list_hcp_vac25 + target_list_sc_vac25 + target_list_fcc_vac25 + target_list_diam_vac25 + target_list_bcc_vac25

for idx, item in enumerate(target_list_vac25):
    item['data'][0]['target'] = y_true[idx]
    
for idx, structure in enumerate(structure_list_vac25):
    structure.info['target'] = y_true[idx]
    
#y_pred_vac25 = []
#for structure in structure_list_vac25:
#    y_pred_vac25.append(ase_get_spacegroup(structure, symprec=1e-1).no)

In [61]:
# make dataset

path_to_x, path_to_y, path_to_summary = prepare_dataset(
    structure_list=structure_list_vac25,
    target_list=target_list_vac25,
    desc_metadata='diffraction_3d_sh_spectrum',
    dataset_name='hcp-sc-fcc-diam-bcc_vacancies-25%',
    target_name='target',
    target_categorical=True,
    input_dims=(50, 32),
    configs=configs,
    dataset_folder=dataset_folder,
    main_folder=configs['io']['main_folder'],
    desc_folder=configs['io']['desc_folder'],
    tmp_folder=configs['io']['tmp_folder'],
    notes="Hcp, sc, fcc, diam and sc structures with 25% vacancies")

x, y, dataset_info = load_dataset_from_file(path_to_x=path_to_x, path_to_y=path_to_y,
                                                              path_to_summary=path_to_summary)


Counter(y)

INFO: Writing x to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_vacancies-25%_x.pkl
INFO: Writing x to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_vacancies-25%_x.pkl
INFO: Writing x to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_vacancies-25%_x.pkl
INFO: Writing y to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_vacancies-25%_y.pkl
INFO: Writing y to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_vacancies-25%_y.pkl
INFO: Writing y to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_vacancies-25%_y.pkl
INFO: Summary file written in /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_vacancies-25%_summary.json.
INFO: Summary file written in /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_vacancies-25%_summary.json.
INFO: Summary file written in /hom

Counter({0: 4500, 1: 900, 2: 900, 3: 900, 4: 900})

## Vacancies 50%

In [62]:
# hcp - spacegroup 194
filenames_vac50_hcp = ['hcp/vac/A_hP2_194_c_target_nb_atoms128_rotid0_vac50.tar.gz',
                        'hcp/vac/A_hP2_194_c_target_nb_atoms128_rotid1_vac50.tar.gz',
                        'hcp/vac/A_hP2_194_c_target_nb_atoms128_rotid2_vac50.tar.gz',
                        'hcp/vac/A_hP2_194_c_target_nb_atoms128_rotid3_vac50.tar.gz',
                        'hcp/vac/A_hP2_194_c_target_nb_atoms128_rotid4_vac50.tar.gz']
# sc - spacegroup 221
filenames_vac50_sc = ['sc/vac/A_cP1_221_a_target_nb_atoms128_rotid0_vac50.tar.gz',
                       'sc/vac/A_cP1_221_a_target_nb_atoms128_rotid1_vac50.tar.gz',
                       'sc/vac/A_cP1_221_a_target_nb_atoms128_rotid2_vac50.tar.gz',
                       'sc/vac/A_cP1_221_a_target_nb_atoms128_rotid3_vac50.tar.gz',
                       'sc/vac/A_cP1_221_a_target_nb_atoms128_rotid4_vac50.tar.gz']

# fcc - spacegroup 225
filenames_vac50_fcc = ['fcc/vac/A_cF4_225_a_target_nb_atoms128_rotid0_vac50.tar.gz',
                         'fcc/vac/A_cF4_225_a_target_nb_atoms128_rotid1_vac50.tar.gz',
                         'fcc/vac/A_cF4_225_a_target_nb_atoms128_rotid2_vac50.tar.gz',
                         'fcc/vac/A_cF4_225_a_target_nb_atoms128_rotid3_vac50.tar.gz',
                         'fcc/vac/A_cF4_225_a_target_nb_atoms128_rotid4_vac50.tar.gz']

# diam - spacegroup 227
filenames_vac50_diam = ['diam/vac/A_cF8_227_a_target_nb_atoms128_rotid0_vac50.tar.gz',
                          'diam/vac/A_cF8_227_a_target_nb_atoms128_rotid1_vac50.tar.gz',
                          'diam/vac/A_cF8_227_a_target_nb_atoms128_rotid2_vac50.tar.gz',
                          'diam/vac/A_cF8_227_a_target_nb_atoms128_rotid3_vac50.tar.gz',
                          'diam/vac/A_cF8_227_a_target_nb_atoms128_rotid4_vac50.tar.gz']
# bcc - spacegroup 229
filenames_vac50_bcc = ['bcc/vac/A_cI2_229_a_target_nb_atoms128_rotid0_vac50.tar.gz',
                         'bcc/vac/A_cI2_229_a_target_nb_atoms128_rotid1_vac50.tar.gz',
                         'bcc/vac/A_cI2_229_a_target_nb_atoms128_rotid2_vac50.tar.gz',
                         'bcc/vac/A_cI2_229_a_target_nb_atoms128_rotid3_vac50.tar.gz',
                         'bcc/vac/A_cI2_229_a_target_nb_atoms128_rotid4_vac50.tar.gz']
                         
                         
desc_files_vac50_hcp = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_vac50_hcp]
desc_files_vac50_sc = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_vac50_sc]
desc_files_vac50_fcc = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_vac50_fcc]
desc_files_vac50_diam = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_vac50_diam]
desc_files_vac50_bcc = [os.path.join(configs['io']['desc_folder'], item) for item in filenames_vac50_bcc]

y_true = []
target_list_hcp_vac50, structure_list_hcp_vac50 = load_descriptor(desc_files=desc_files_vac50_hcp, configs=configs)
y_true = y_true + [194]*len(structure_list_hcp_vac50)

target_list_sc_vac50, structure_list_sc_vac50 = load_descriptor(desc_files=desc_files_vac50_sc, configs=configs)
y_true = y_true + [221]*len(structure_list_sc_vac50)

target_list_fcc_vac50, structure_list_fcc_vac50 = load_descriptor(desc_files=desc_files_vac50_fcc, configs=configs)
y_true = y_true + [225]*len(structure_list_fcc_vac50)

target_list_diam_vac50, structure_list_diam_vac50 = load_descriptor(desc_files=desc_files_vac50_diam, configs=configs)
y_true = y_true + [227]*len(structure_list_diam_vac50)

target_list_bcc_vac50, structure_list_bcc_vac50 = load_descriptor(desc_files=desc_files_vac50_bcc, configs=configs)
y_true = y_true + [229]*len(structure_list_bcc_vac50)

INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/vac/A_hP2_194_c_target_nb_atoms128_rotid0_vac50.tar.gz
INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/vac/A_hP2_194_c_target_nb_atoms128_rotid0_vac50.tar.gz
INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/vac/A_hP2_194_c_target_nb_atoms128_rotid0_vac50.tar.gz
INFO: Extracting file 2/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/vac/A_hP2_194_c_target_nb_atoms128_rotid1_vac50.tar.gz
INFO: Extracting file 2/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/vac/A_hP2_194_c_target_nb_atoms128_rotid1_vac50.tar.gz
INFO: Extracting file 2/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/vac/A_hP2_194_c_target_nb_atoms128_rotid1_vac50.tar.gz
INFO: Extracting file 3/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/hcp/vac/A_hP2_194_c_target_nb_atoms12

INFO: Extracting file 5/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/diam/vac/A_cF8_227_a_target_nb_atoms128_rotid4_vac50.tar.gz
INFO: Extracting file 5/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/diam/vac/A_cF8_227_a_target_nb_atoms128_rotid4_vac50.tar.gz
INFO: Extracting file 5/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/diam/vac/A_cF8_227_a_target_nb_atoms128_rotid4_vac50.tar.gz
INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/bcc/vac/A_cI2_229_a_target_nb_atoms128_rotid0_vac50.tar.gz
INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/bcc/vac/A_cI2_229_a_target_nb_atoms128_rotid0_vac50.tar.gz
INFO: Extracting file 1/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/bcc/vac/A_cI2_229_a_target_nb_atoms128_rotid0_vac50.tar.gz
INFO: Extracting file 2/5: /home/ziletti/Documents/calc_nomadml/rot_inv_3d/desc_folder/bcc/vac/A_cI2_229_a_target_nb_atom

In [63]:
structure_list_vac50 = structure_list_hcp_vac50 + structure_list_sc_vac50 + structure_list_fcc_vac50 + structure_list_diam_vac50 + structure_list_bcc_vac50
target_list_vac50 = target_list_hcp_vac50 + target_list_sc_vac50 + target_list_fcc_vac50 + target_list_diam_vac50 + target_list_bcc_vac50

for idx, item in enumerate(target_list_vac50):
    item['data'][0]['target'] = y_true[idx]
    
for idx, structure in enumerate(structure_list_vac50):
    structure.info['target'] = y_true[idx]
    
#y_pred_vac50 = []
#for structure in structure_list_vac25:
#    y_pred_vac50.append(ase_get_spacegroup(structure, symprec=1e-1).no)

In [64]:
# make dataset

path_to_x, path_to_y, path_to_summary = prepare_dataset(
    structure_list=structure_list_vac50,
    target_list=target_list_vac50,
    desc_metadata='diffraction_3d_sh_spectrum',
    dataset_name='hcp-sc-fcc-diam-bcc_vacancies-50%',
    target_name='target',
    target_categorical=True,
    input_dims=(50, 32),
    configs=configs,
    dataset_folder=dataset_folder,
    main_folder=configs['io']['main_folder'],
    desc_folder=configs['io']['desc_folder'],
    tmp_folder=configs['io']['tmp_folder'],
    notes="Hcp, sc, fcc, diam and sc structures with 50% vacancies")

x, y, dataset_info = load_dataset_from_file(path_to_x=path_to_x, path_to_y=path_to_y,
                                                              path_to_summary=path_to_summary)


Counter(y)

INFO: Writing x to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_vacancies-50%_x.pkl
INFO: Writing x to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_vacancies-50%_x.pkl
INFO: Writing x to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_vacancies-50%_x.pkl
INFO: Writing y to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_vacancies-50%_y.pkl
INFO: Writing y to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_vacancies-50%_y.pkl
INFO: Writing y to /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_vacancies-50%_y.pkl
INFO: Summary file written in /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_vacancies-50%_summary.json.
INFO: Summary file written in /home/ziletti/Documents/calc_nomadml/rot_inv_3d/datasets/hcp-sc-fcc-diam-bcc_vacancies-50%_summary.json.
INFO: Summary file written in /hom

Counter({0: 4500, 1: 900, 2: 900, 3: 900, 4: 900})

## Classification results spglib

In [65]:
accuracy_score(y_true, y_pred_disp04)

NameError: name 'y_pred_disp04' is not defined

In [None]:
accuracy_score(y_true, y_pred_disp1)

In [None]:
accuracy_score(y_true, y_pred_disp2)

In [None]:
accuracy_score(y_true, y_pred_disp4)

In [None]:
accuracy_score(y_true, y_pred_vac1)

In [None]:
accuracy_score(y_true, y_pred_vac25)

In [None]:
accuracy_score(y_true, y_pred_vac50)

In [None]:
# Compute confusion matrix
cnf_matrix = confusion_matrix(y_true, y_pred)
np.set_printoptions(precision=2)
cnf_matrix

## Classification results using DISH and neural network

## Loading prototype database and check the classification for pristine structures

In [None]:
from ase.visualize import view
idx=0
# change idx if you want to visualize another structure 
# idx=0 visualizes the first structure in the list
# Note: 0<=idx<len(ase_atoms_list)
view(structure*(1, 1, 1), viewer='x3d')

In [None]:
db_files_prototypes_basedir = '/home/ziletti/Documents/calc_nomadml/rot_inv_3d/db_ase/'

proto_names = ['A_cP1_221_a']

prototypes_basedir = '/home/ziletti/Documents/calc_nomadml/rot_inv_3d/prototypes_aflow_new'
ase_db_files = [os.path.join(db_files_prototypes_basedir, proto_name) + '.db' for proto_name in proto_names]

db_protos = zip(proto_names, ase_db_files)

In [None]:
from ai4materials.utils.utils_data_retrieval import read_ase_db
from ai4materials.utils.utils_crystals import create_supercell
import random 

for idx_db, db_proto in enumerate(db_protos):
    ase_atoms_list = read_ase_db(db_path=ase_db_files[idx_db])

In [None]:
y_pred = []
y_pred_sc = []
for idx, structure in enumerate(ase_atoms_list[:4]):
    print idx
    supercell =  create_supercell(structure, create_replicas_by='nb_atoms', target_nb_atoms=128, optimal_supercell=True)
    alpha = random.random() * 360.0
    structure.rotate(alpha, 'x', rotate_cell=True, center='COU')

    beta = random.random() * 360.0
    structure.rotate(beta, 'y', rotate_cell=True, center='COU')

    gamma = random.random() * 360.0
    structure.rotate(gamma, 'z', rotate_cell=True, center='COU')
        
    y_pred.append(ase_get_spacegroup(structure, symprec=1e-1).no)
    y_pred_sc.append(ase_get_spacegroup(supercell, symprec=1e-1).no)

In [None]:
y_true = [221]*len(ase_atoms_list)
accuracy_score(y_pred, y_pred_sc)