# Load Tasks and VASP Files From NERSC

### Load Imports And Open Maggma Stores

In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

In [3]:
%run imports.py

Imports successfully loaded
index_mask_from_objects2 succesfully loaded
index_mask_from_objects4 succesfully loaded


MaterialsProjectCompatibility will be updated with new correction classes as well as new values of corrections and uncertainties in 2020
  def get_pourbaix_entries(self, chemsys, solid_compat=MaterialsProjectCompatibility()):


In [4]:
working_dir = os.getcwd()

In [32]:
%run maggma_stores.py

All maggma stores (db, elfcar_store, chgcar_store, aeccar0_store, aeccar2_store) successfully loaded 
Run 'connect_to_stores()' to connect to stores


In [33]:
connect_to_stores()

All connections successful


### Retrieve Tasks From NERSC

In [7]:
tasks_iter = db.query({'tags':{'$regex':'production-scan'}})
tasks = [t for t in tasks_iter]
len(tasks)

### Sort Element and Binary Compound Tasks

In [10]:
is_binary = lambda f: len(Composition(f).elements) == 2
binary_tasks = [t for t in tasks if is_binary(t['formula_pretty'])]
len(binary_tasks)

In [None]:
tasks_dict = {}

for t in binary_tasks:
    f = t['formula_pretty']
    if f not in tasks_dict.keys():
        tasks_dict[f] = t
    elif t['output']['energy_per_atom'] < tasks_dict[f]['output']['energy_per_atom']:
        tasks_dict[f] = t

In [11]:
is_element = lambda f: len(Composition(f).elements) == 1
element_tasks = [t for t in tasks if is_element(t['formula_pretty'])]
len(element_tasks)

In [13]:
elem_tasks_dict = {}

for t in element_tasks:
    f = re.sub(r'\d+', '', t['formula_pretty'])
    if f not in elem_tasks_dict.keys():
        elem_tasks_dict[f] = t
    elif t['output']['energy_per_atom'] < elem_tasks_dict[f]['output']['energy_per_atom']:
        elem_tasks_dict[f] = t

In [None]:
elem_data_json = {f:elem_tasks_dict[f] for f in elem_tasks_dict.keys()}

### Find Subset of Binary Compounds to Study 
#### Formula must: (1) be in SCAN calculations, (2) be in JANAF experimental data, (3) match specified anion/cation criteria 

In [18]:
scan_formulas = set([Composition(t['formula_pretty']) for t in binary_tasks])
len(scan_formulas)

4943

In [19]:
janaf_formulas = []

for f in pd.read_csv('mpcontribs_janaf_thermo.csv')['Formula'].tolist():
    try:
        janaf_formulas.append(Composition(f))
    except:
        print(f)
        
janaf_formulas = set(janaf_formulas)

e-


In [20]:
len(scan_formulas & janaf_formulas)

178

In [23]:
task_ids = [t['task_id'] for t in tasks_dict.values()]

In [25]:
anions = set([Element(e) for e in ['N', 'O', 'F', 'Cl', 'Br']])
cats = ['Ti', 'V', 'Cr', 'Mn', 'Fe', 'Ni'] + ['Li', 'Na', 'K', 'Rb', 'Cs'] + ['Be', 'Mg', 'Ca', 'Sr', 'Ba']
cations = set([Element(e) for e in cats])

In [26]:
selection = [f for f in (scan_formulas & janaf_formulas) if \
             set(Composition(f).elements) & anions or set(Composition(f).elements) & cations]
len(selection)

In [28]:
selected_formulas = [c.reduced_formula for c in selection]
print(selected_formulas)

['LiH', 'Be2C', 'Be3N2', 'BN', 'BeO', 'B2O3', 'BeF2', 'Li3N', 'MgB2', 'MgB4', 'AlN', 'Al2O3', 'AlF3', 'BeS', 'LiCl', 'BeCl2', 'TiB2', 'KF', 'TiB', 'AlCl3', 'TiH2', 'KO2', 'TiC', 'CaO', 'NaCl', 'CaF2', 'MgCl2', 'Cr3C2', 'BeBr2', 'Cr2O3', 'FeO', 'Fe2O3', 'CoO', 'NaBr', 'MgBr2', 'AlBr3', 'FeS', 'FeS2', 'FeCl2', 'FeCl3', 'CoCl2', 'KBr', 'CaBr2', 'SrF2', 'BeI2', 'TiBr3', 'TiBr4', 'SrS', 'FeBr2', 'SrCl2', 'Li2O', 'BaO', 'BaF2', 'NbCl5', 'MoCl5', 'LiF', 'NaH', 'CsF', 'BaS', 'BaCl2', 'SrBr2', 'ZrBr3', 'MoBr2', 'MoBr3', 'ICl', 'CsCl', 'KI', 'CaI2', 'ZrBr4', 'TiI3', 'NbBr5', 'FeI2', 'Ta2O5', 'WO2', 'WO3', 'BaBr2', 'HgO', 'HgF2', 'TaCl5', 'PbO2', 'Pb3O4', 'HgCl2', 'WCl2', 'PbCl2', 'WCl4', 'Mg3N2', 'NaO2', 'MgF2', 'KH', 'BaI2', 'WBr6', 'P3N5', 'SiO2', 'Si3N4', 'HgBr2', 'PbBr2', 'Mg2Si', 'K2O', 'K2O2', 'Na2S', 'MgS', 'TiN', 'Cr7C3', 'Cr23C6', 'VN', 'TiO', 'TiO2', 'CrN', 'Cr2N', 'TiF3', 'TiF4', 'Ti2O3', 'V2O3', 'Fe3O4', 'Co3O4', 'FeF2', 'FeF3', 'K2S', 'CaS', 'KCl', 'CoF3', 'CaCl2', 'CuO', 'Cu2O', 'L

In [30]:
selected_ids = [tasks_dict[f]['task_id'] for f in selected_formulas]
print(selected_ids)

[6405, 3890, 7373, 11133, 3914, 5804, 10090, 3957, 3996, 6135, 3955, 4648, 4827, 3901, 2779, 6017, 4392, 3992, 4837, 6454, 4601, 4826, 3929, 3926, 3930, 4019, 6412, 5874, 6240, 5312, 2936, 10792, 4617, 4102, 4782, 8414, 5364, 4870, 7394, 10278, 10118, 4406, 6359, 4026, 10408, 9789, 9416, 2180, 4999, 4510, 3947, 4072, 2072, 9887, 10201, 2597, 3882, 4212, 4226, 4682, 9493, 7850, 9879, 8450, 8661, 4331, 4687, 5723, 10375, 8706, 10389, 5431, 7076, 7562, 10285, 7722, 5662, 4089, 9731, 4656, 9068, 8428, 9914, 6190, 7962, 10642, 5462, 4611, 4035, 7921, 7740, 6344, 5557, 5570, 6853, 6417, 4283, 4311, 5692, 4252, 3942, 3989, 9927, 8986, 4758, 2167, 4787, 2820, 7468, 8989, 10948, 6283, 7408, 11087, 6862, 4757, 6094, 2085, 4079, 4346, 7239, 5863, 6872, 4493, 4644, 5822, 10674, 9327, 7178, 4800, 7019, 6111, 5298, 6936, 3979, 4018, 6355, 8994, 8091, 8563, 10373, 10179, 9534, 4319, 5278, 7051, 8061, 7958, 10101, 4298, 5275, 4201, 8381, 9665, 4108, 10524, 4002, 5663, 3907, 3915]


### Load ELFCARs, CHGCARs, and AECCARs (0 and 2)

In [34]:
car_dict = {}

for ID in tqdm(selected_ids):
    elfcar = [e for e in elfcar_store.query({"metadata.task_id":ID})][0]
    if elfcar.get("data_aug"):
        del elfcar["data_aug"]
    ELF = Elfcar.from_dict(elfcar)
    
    chgcar = [e for e in chgcar_store.query({"metadata.task_id":ID})][0]
    CHG = Chgcar.from_dict(chgcar)

    aec0 = [e for e in aeccar0_store.query({"metadata.task_id":ID})][0]
    if aec0.get("data_aug"):
        del aec0["data_aug"]
    AEC0 = Chgcar.from_dict(aec0)

    aec2 = [e for e in aeccar2_store.query({"metadata.task_id":ID})][0]
    if aec2.get("data_aug"):
        del aec2["data_aug"] # bug fix line
    AEC2 = Chgcar.from_dict(aec2)
    
    car_dict[ID] = [ELF, CHG, AEC0, AEC2]


  0%|          | 0/160 [00:00<?, ?it/s][A
  1%|          | 1/160 [00:02<06:53,  2.60s/it][A
  1%|▏         | 2/160 [00:04<06:21,  2.41s/it][A
  2%|▏         | 3/160 [00:14<11:52,  4.54s/it][A
  2%|▎         | 4/160 [00:16<09:52,  3.80s/it][A
  3%|▎         | 5/160 [00:17<08:00,  3.10s/it][A
  4%|▍         | 6/160 [00:23<10:14,  3.99s/it][A
  4%|▍         | 7/160 [00:27<10:15,  4.02s/it][A
  5%|▌         | 8/160 [00:29<08:17,  3.27s/it][A
  6%|▌         | 9/160 [00:30<06:37,  2.63s/it][A
  6%|▋         | 10/160 [00:34<07:36,  3.04s/it][A
  7%|▋         | 11/160 [00:36<06:35,  2.65s/it][A
  8%|▊         | 12/160 [00:39<07:14,  2.93s/it][A
  8%|▊         | 13/160 [00:44<08:17,  3.38s/it][A
  9%|▉         | 14/160 [00:45<06:41,  2.75s/it][A
  9%|▉         | 15/160 [00:47<06:24,  2.65s/it][A
 10%|█         | 16/160 [00:52<08:05,  3.37s/it][A
 11%|█         | 17/160 [00:54<06:29,  2.72s/it][A
 11%|█▏        | 18/160 [00:55<05:46,  2.44s/it][A
 12%|█▏        | 19/160 [00:5

In [35]:
car_dict.keys()

dict_keys([6405, 3890, 7373, 11133, 3914, 5804, 10090, 3957, 3996, 6135, 3955, 4648, 4827, 3901, 2779, 6017, 4392, 3992, 4837, 6454, 4601, 4826, 3929, 3926, 3930, 4019, 6412, 5874, 6240, 5312, 2936, 10792, 4617, 4102, 4782, 8414, 5364, 4870, 7394, 10278, 10118, 4406, 6359, 4026, 10408, 9789, 9416, 2180, 4999, 4510, 3947, 4072, 2072, 9887, 10201, 2597, 3882, 4212, 4226, 4682, 9493, 7850, 9879, 8450, 8661, 4331, 4687, 5723, 10375, 8706, 10389, 5431, 7076, 7562, 10285, 7722, 5662, 4089, 9731, 4656, 9068, 8428, 9914, 6190, 7962, 10642, 5462, 4611, 4035, 7921, 7740, 6344, 5557, 5570, 6853, 6417, 4283, 4311, 5692, 4252, 3942, 3989, 9927, 8986, 4758, 2167, 4787, 2820, 7468, 8989, 10948, 6283, 7408, 11087, 6862, 4757, 6094, 2085, 4079, 4346, 7239, 5863, 6872, 4493, 4644, 5822, 10674, 9327, 7178, 4800, 7019, 6111, 5298, 6936, 3979, 4018, 6355, 8994, 8091, 8563, 10373, 10179, 9534, 4319, 5278, 7051, 8061, 7958, 10101, 4298, 5275, 4201, 8381, 9665, 4108, 10524, 4002, 5663, 3907, 3915])

### Write Files Locally

In [40]:
for i in tqdm(car_dict.keys()):
    label = str(i)
    lst = car_dict[i]
    filepath = 'files/'
    lst[0].write_file(filepath + 'ELFCAR_{0}'.format(str(i)))
    lst[1].write_file(filepath + 'CHGCAR_{0}'.format(str(i)))
    try:
        chgref = lst[2] + lst[3]
        chgref.write_file(filepath + 'CHGREF_{}'.format(str(i)))
    except ValueError:
        pass



  1%|          | 1/160 [00:00<02:30,  1.05it/s][A
  1%|▏         | 2/160 [00:02<02:52,  1.09s/it][A
  2%|▏         | 3/160 [00:12<09:33,  3.65s/it][A
  2%|▎         | 4/160 [00:14<08:27,  3.25s/it][A
  3%|▎         | 5/160 [00:15<07:02,  2.73s/it][A
  4%|▍         | 6/160 [00:23<11:10,  4.35s/it][A
  4%|▍         | 7/160 [00:29<11:52,  4.66s/it][A
  5%|▌         | 8/160 [00:31<10:06,  3.99s/it][A
  6%|▌         | 9/160 [00:33<08:17,  3.30s/it][A
  6%|▋         | 10/160 [00:42<12:37,  5.05s/it][A
  7%|▋         | 11/160 [00:45<11:09,  4.49s/it][A
  8%|▊         | 12/160 [00:53<13:26,  5.45s/it][A
  8%|▊         | 13/160 [01:01<14:54,  6.08s/it][A
  9%|▉         | 14/160 [01:03<11:56,  4.91s/it][A
  9%|▉         | 15/160 [01:08<12:17,  5.08s/it][A
 10%|█         | 16/160 [01:21<17:31,  7.30s/it][A
 11%|█         | 17/160 [01:23<13:39,  5.73s/it][A
 11%|█▏        | 18/160 [01:27<12:24,  5.25s/it][A
 12%|█▏        | 19/160 [01:31<11:34,  4.93s/it][A
 12%|█▎        | 20