# Load Tasks and VASP Files From NERSC

### Load Imports And Open Maggma Stores

In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

In [3]:
%run imports.py


__init__ is deprecated
MaterialsProjectCompatibility will be updated with new correction classes as well as new values of corrections and uncertainties in 2020



Imports successfully loaded


In [4]:
working_dir = os.getcwd()

In [5]:
%run maggma_stores.py

All maggma stores (db, elfcar_store, chgcar_store, aeccar0_store, aeccar2_store) successfully loaded 
Run 'connect_to_stores()' to connect to stores


In [6]:
connect_to_stores()

All connections successful


### Retrieve Tasks From NERSC

In [7]:
tasks_iter = db.query({'tags':{'$regex':'production-scan'}})
tasks = [t for t in tasks_iter]
len(tasks)

8295

### Sort Element and Binary Compound Tasks

In [8]:
is_binary = lambda f: len(Composition(f).elements) == 2
binary_tasks = [t for t in tasks if is_binary(t['formula_pretty'])]
len(binary_tasks)

5726

In [9]:
tasks_dict = {}

for t in binary_tasks:
    f = t['formula_pretty']
    if f not in tasks_dict.keys():
        tasks_dict[f] = t
    elif t['output']['energy_per_atom'] < tasks_dict[f]['output']['energy_per_atom']:
        tasks_dict[f] = t

In [10]:
is_element = lambda f: len(Composition(f).elements) == 1
element_tasks = [t for t in tasks if is_element(t['formula_pretty'])]
len(element_tasks)

712

In [11]:
elem_tasks_dict = {}

for t in element_tasks:
    f = re.sub(r'\d+', '', t['formula_pretty'])
    if f not in elem_tasks_dict.keys():
        elem_tasks_dict[f] = t
    elif t['output']['energy_per_atom'] < elem_tasks_dict[f]['output']['energy_per_atom']:
        elem_tasks_dict[f] = t

In [12]:
elem_data_json = {f:elem_tasks_dict[f] for f in elem_tasks_dict.keys()}
elem_ids = {f:elem_data_json[f]['task_id'] for f in elem_data_json.keys()}

### Find Subset of Binary Compounds to Study 
#### Formula must: (1) be in SCAN calculations, (2) be in JANAF experimental data, (3) match specified anion/cation criteria 

In [13]:
scan_formulas = set([Composition(t['formula_pretty']) for t in binary_tasks])
len(scan_formulas)

4943

In [15]:
janaf_formulas = []
janaf_data = pd.read_csv('data/janaf_thermo.csv')

for f in janaf_data['Formula'].tolist():
    try:
        janaf_formulas.append(Composition(f))
    except:
        print(f)
        
janaf_formulas = set(janaf_formulas)

e-


In [16]:
mp_expt_data = loadfn('data/MP_expt_energy_all.json')
mp_expt_formulas = set([Composition(f) for f in mp_expt_data.keys()])

In [17]:
len(scan_formulas & (mp_expt_formulas | janaf_formulas))

626

In [18]:
task_ids = [t['task_id'] for t in tasks_dict.values()]

In [19]:
anions = set([Element(e) for e in ['N', 'O', 'F', 'Cl', 'Br']])
cats = ['Ti', 'V', 'Cr', 'Mn', 'Fe', 'Ni'] + ['Li', 'Na', 'K', 'Rb', 'Cs'] + ['Be', 'Mg', 'Ca', 'Sr', 'Ba']
cations = set([Element(e) for e in cats])

In [20]:
selection = [f for f in (scan_formulas & (mp_expt_formulas | janaf_formulas)) if \
             set(Composition(f).elements) & anions or set(Composition(f).elements) & cations]
len(selection)

427

In [21]:
sel = [f for f in (scan_formulas & (mp_expt_formulas | janaf_formulas)) if \
             set(Composition(f).elements) & anions and set(Composition(f).elements) & cations]
len(sel)

96

In [22]:
sel

[Comp: Be3 N2,
 Comp: Be1 F2,
 Comp: Be1 O1,
 Comp: Li3 N1,
 Comp: Li2 O1,
 Comp: Be1 Cl2,
 Comp: Ca3 N2,
 Comp: Ca1 O1,
 Comp: K1 F1,
 Comp: K2 O1,
 Comp: K2 O2,
 Comp: Ca1 F2,
 Comp: Be1 Br2,
 Comp: Ca1 Br2,
 Comp: K1 Br1,
 Comp: Ba1 F2,
 Comp: Li1 F1,
 Comp: Ba1 Cl2,
 Comp: Ba1 Br2,
 Comp: Mg1 O1,
 Comp: Mg1 F2,
 Comp: K1 O2,
 Comp: Na1 Cl1,
 Comp: Mg1 Cl2,
 Comp: Cr2 N1,
 Comp: Cr1 N1,
 Comp: Cr2 O3,
 Comp: V2 O3,
 Comp: V1 O2,
 Comp: Cr1 F2,
 Comp: Cr1 F3,
 Comp: V1 F4,
 Comp: Fe2 O3,
 Comp: Fe3 O4,
 Comp: Fe1 F2,
 Comp: Mn1 F2,
 Comp: Fe1 F3,
 Comp: K1 Cl1,
 Comp: Ni1 O1,
 Comp: Ca1 Cl2,
 Comp: Li1 Br1,
 Comp: V1 Cl2,
 Comp: V1 Cl3,
 Comp: V1 Cl4,
 Comp: Cr1 Cl3,
 Comp: Mn1 Cl2,
 Comp: Fe1 Cl2,
 Comp: Fe1 Cl3,
 Comp: Ni1 Cl2,
 Comp: Rb2 O1,
 Comp: Na1 Br1,
 Comp: Mg1 Br2,
 Comp: Rb1 F1,
 Comp: Sr1 O1,
 Comp: Rb1 Cl1,
 Comp: Sr1 Cl2,
 Comp: Li2 O2,
 Comp: Ti1 Br3,
 Comp: Ti1 Br4,
 Comp: Mn1 Br2,
 Comp: Fe1 Br2,
 Comp: Cs2 O1,
 Comp: Cs1 O2,
 Comp: Ba1 O1,
 Comp: Ba1 O2,
 Comp: Cs1

In [20]:
selected_formulas = [c.reduced_formula for c in selection]
print(selected_formulas)

['LiH', 'BeH2', 'H2O', 'Be2C', 'Be3N2', 'BN', 'B2O3', 'BeF2', 'BeO', 'Li3N', 'Li2O', 'AlN', 'Al2O3', 'AlF3', 'BeCl2', 'BeS', 'CaH2', 'AlCl3', 'CCl4', 'Ca3N2', 'CaO', 'KF', 'K2O', 'K2O2', 'CaF2', 'Cr5B3', 'BeBr2', 'BBr3', 'As2O3', 'AsF3', 'As2O5', 'CoO', 'CaS', 'AlBr3', 'AsCl3', 'CoCl2', 'Ag2O', 'AgF', 'BaH2', 'BeI2', 'CaBr2', 'Rb2S', 'CaSe', 'KBr', 'AgCl', 'BaF2', 'LiF', 'CoBr2', 'BaS', 'BaCl2', 'CdCl2', 'Ce2O3', 'CeF3', 'NdF3', 'EuN', 'Ca3Sb2', 'EuO', 'AgBr', 'CeCl3', 'Er2O3', 'Au2O3', 'EuCl3', 'BaBr2', 'BiF3', 'Bi2O3', 'ErCl3', 'MgB2', 'AuCl', 'AuCl3', 'HfCl4', 'BiCl3', 'Ca2Pb', 'Ba2Sn', 'MgO', 'AuBr', 'MgF2', 'HfBr4', 'BiBr3', 'SiO2', 'HgBr', 'PbBr2', 'UFe2', 'SO3', 'ThBr4', 'UBr4', 'Mg2Si', 'KO2', 'Na2S', 'CrB', 'MgS', 'NaCl', 'ScN', 'TiC', 'Cr23C6', 'Cr7C3', 'Cr3C2', 'MgCl2', 'Cr2N', 'CrN', 'Fe2B', 'FeB', 'Mn2B', 'CaMg2', 'Cr2O3', 'V2O3', 'VO2', 'PCl3', 'CaAl2', 'CrF2', 'CrF3', 'VF4', 'Ni3B', 'CaSi', 'Fe2O3', 'Fe3O4', 'Ni2B', 'Ni4B3', 'Co3O4', 'FeF2', 'K2S', 'MnF2', 'FeF3', 'CoF3'

In [21]:
selected_ids = [tasks_dict[f]['task_id'] for f in selected_formulas]
print(selected_ids)

[6405, 5600, 8356, 3890, 7373, 11133, 5804, 10090, 3914, 3957, 3947, 3955, 4648, 4827, 6017, 3901, 5450, 6454, 11002, 9634, 3926, 3992, 4311, 5692, 4019, 6110, 6240, 7703, 8519, 7365, 8363, 4617, 4079, 8414, 8172, 10118, 4832, 3976, 6262, 10408, 6359, 5058, 4155, 4406, 3950, 2072, 2597, 4700, 4226, 4682, 5193, 10746, 9102, 8247, 4202, 11163, 4153, 4251, 7003, 9583, 6155, 8509, 7722, 7034, 10596, 7731, 3996, 11143, 10598, 7879, 10545, 10580, 8466, 3915, 6450, 4611, 10270, 11176, 5557, 5688, 6417, 6360, 7356, 8186, 8884, 4283, 4826, 4252, 2345, 3942, 3930, 3923, 3929, 8986, 9927, 5874, 6412, 7468, 2820, 5540, 6295, 6341, 11185, 5312, 7408, 2840, 8234, 5236, 5614, 7476, 7494, 8436, 4896, 10792, 11087, 4742, 9288, 6862, 4757, 2085, 6101, 6094, 7239, 4346, 4255, 6430, 5390, 5863, 4493, 6872, 4025, 4923, 4618, 5483, 5822, 4285, 4644, 5695, 4027, 4159, 4046, 7032, 4861, 5689, 6013, 4520, 6994, 4825, 9303, 9776, 8531, 5364, 4870, 5111, 7394, 10278, 4249, 4800, 7019, 9915, 6111, 6014, 5298, 693

In [22]:
binary_ids = {selected_formulas[i]:selected_ids[i] for i in range(len(selected_formulas))}

### Load ELFCARs, CHGCARs, and AECCARs (0 and 2)

In [25]:
# Binary Compounds

car_dict = {}

for ID in tqdm(selected_ids):
    elfcar = [e for e in elfcar_store.query({"metadata.task_id":ID})][0]
    if elfcar.get("data_aug"):
        del elfcar["data_aug"]
    ELF = Elfcar.from_dict(elfcar)
    
    chgcar = [e for e in chgcar_store.query({"metadata.task_id":ID})][0]
    CHG = Chgcar.from_dict(chgcar)

    aec0 = [e for e in aeccar0_store.query({"metadata.task_id":ID})][0]
    if aec0.get("data_aug"):
        del aec0["data_aug"]
    AEC0 = Chgcar.from_dict(aec0)

    aec2 = [e for e in aeccar2_store.query({"metadata.task_id":ID})][0]
    if aec2.get("data_aug"):
        del aec2["data_aug"] # bug fix line
    AEC2 = Chgcar.from_dict(aec2)
    
    car_dict[ID] = [ELF, CHG, AEC0, AEC2]


  0%|          | 0/427 [00:00<?, ?it/s][A
  0%|          | 1/427 [00:03<25:24,  3.58s/it][A
  0%|          | 2/427 [00:15<42:26,  5.99s/it][A
  1%|          | 3/427 [00:32<1:05:48,  9.31s/it][A
  1%|          | 4/427 [00:33<48:40,  6.91s/it]  [A
  1%|          | 5/427 [00:42<52:41,  7.49s/it][A
  1%|▏         | 6/427 [00:44<40:17,  5.74s/it][A
  2%|▏         | 7/427 [00:49<39:33,  5.65s/it][A
  2%|▏         | 8/427 [00:53<35:27,  5.08s/it][A
  2%|▏         | 9/427 [00:54<27:06,  3.89s/it][A
  2%|▏         | 10/427 [00:55<21:49,  3.14s/it][A
  3%|▎         | 11/427 [00:56<17:05,  2.47s/it][A
  3%|▎         | 12/427 [00:58<14:51,  2.15s/it][A
  3%|▎         | 13/427 [01:02<19:59,  2.90s/it][A
  3%|▎         | 14/427 [01:07<24:15,  3.52s/it][A
  4%|▎         | 15/427 [01:14<30:57,  4.51s/it][A
  4%|▎         | 16/427 [01:15<24:07,  3.52s/it][A
  4%|▍         | 17/427 [01:18<23:15,  3.40s/it][A
  4%|▍         | 18/427 [01:27<34:27,  5.05s/it][A
  4%|▍         | 19/427 [

 72%|███████▏  | 306/427 [1:06:09<36:48, 18.25s/it][A
 72%|███████▏  | 307/427 [1:06:21<32:45, 16.38s/it][A
 72%|███████▏  | 308/427 [1:06:39<33:11, 16.74s/it][A
 72%|███████▏  | 309/427 [1:06:44<26:04, 13.26s/it][A
 73%|███████▎  | 310/427 [1:06:47<20:07, 10.32s/it][A
 73%|███████▎  | 311/427 [1:07:22<34:06, 17.65s/it][A
 73%|███████▎  | 312/427 [1:07:25<25:15, 13.18s/it][A
 73%|███████▎  | 313/427 [1:07:31<21:20, 11.23s/it][A
 74%|███████▎  | 314/427 [1:07:35<16:40,  8.85s/it][A
 74%|███████▍  | 315/427 [1:07:40<14:37,  7.83s/it][A
 74%|███████▍  | 316/427 [1:08:00<21:17, 11.51s/it][A
 74%|███████▍  | 317/427 [1:08:05<17:09,  9.36s/it][A
 74%|███████▍  | 318/427 [1:08:07<13:02,  7.18s/it][A
 75%|███████▍  | 319/427 [1:08:44<29:07, 16.18s/it][A
 75%|███████▍  | 320/427 [1:08:48<22:11, 12.44s/it][A
 75%|███████▌  | 321/427 [1:08:55<19:32, 11.06s/it][A
 75%|███████▌  | 322/427 [1:09:15<23:49, 13.61s/it][A
 76%|███████▌  | 323/427 [1:09:25<21:41, 12.52s/it][A
 76%|█████

In [26]:
elem_elfcars = {}

for ID in tqdm(elem_ids.values()):
    elfcar = [e for e in elfcar_store.query({"metadata.task_id":ID})][0]
    if elfcar.get("data_aug"):
        del elfcar["data_aug"]
    ELF = Elfcar.from_dict(elfcar)
    elem_elfcars[ID] = ELF


  0%|          | 0/88 [00:00<?, ?it/s][A
  1%|          | 1/88 [00:00<00:17,  4.99it/s][A
  3%|▎         | 3/88 [00:00<00:14,  5.77it/s][A
  6%|▌         | 5/88 [00:00<00:12,  6.79it/s][A
  7%|▋         | 6/88 [00:00<00:11,  7.36it/s][A
  9%|▉         | 8/88 [00:00<00:10,  7.75it/s][A
 10%|█         | 9/88 [00:01<00:10,  7.51it/s][A
 11%|█▏        | 10/88 [00:01<00:10,  7.54it/s][A
 12%|█▎        | 11/88 [00:01<00:09,  7.75it/s][A
 14%|█▎        | 12/88 [00:01<00:09,  8.10it/s][A
 15%|█▍        | 13/88 [00:01<00:09,  7.82it/s][A
 16%|█▌        | 14/88 [00:01<00:09,  8.11it/s][A
 17%|█▋        | 15/88 [00:01<00:08,  8.30it/s][A
 18%|█▊        | 16/88 [00:01<00:08,  8.54it/s][A
 19%|█▉        | 17/88 [00:02<00:08,  8.65it/s][A
 20%|██        | 18/88 [00:02<00:08,  8.05it/s][A
 22%|██▏       | 19/88 [00:02<00:08,  7.71it/s][A
 23%|██▎       | 20/88 [00:02<00:08,  7.63it/s][A
 24%|██▍       | 21/88 [00:02<00:08,  7.55it/s][A
 26%|██▌       | 23/88 [00:02<00:07,  8.19it/

### Write Files Locally

In [27]:
# Binary Compounds

for i in tqdm(car_dict.keys()):
    label = str(i)
    lst = car_dict[i]
    filepath = 'files/'
    lst[0].write_file(filepath + 'ELFCAR_{0}'.format(str(i)))
    lst[1].write_file(filepath + 'CHGCAR_{0}'.format(str(i)))
    try:
        chgref = lst[2] + lst[3]
        chgref.write_file(filepath + 'CHGREF_{}'.format(str(i)))
    except ValueError:
        pass
    
dumpfn(binary_ids, 'data/binary_formulas_ids.json')



Structures are different. Make sure you know what you are doing...


  0%|          | 1/427 [00:01<09:09,  1.29s/it][A
  0%|          | 2/427 [00:16<38:33,  5.44s/it][A
  1%|          | 3/427 [00:45<1:27:56, 12.44s/it][A
  1%|          | 4/427 [00:47<1:05:54,  9.35s/it][A
  1%|          | 5/427 [01:01<1:15:14, 10.70s/it][A
  1%|▏         | 6/427 [01:04<1:00:24,  8.61s/it][A
  2%|▏         | 7/427 [01:16<1:07:32,  9.65s/it][A
  2%|▏         | 8/427 [01:24<1:03:19,  9.07s/it][A
  2%|▏         | 9/427 [01:26<48:40,  6.99s/it]  [A
  2%|▏         | 10/427 [01:30<40:56,  5.89s/it][A
  3%|▎         | 11/427 [01:32<33:02,  4.77s/it][A
  3%|▎         | 12/427 [01:35<30:28,  4.41s/it][A
  3%|▎         | 13/427 [01:48<47:36,  6.90s/it][A
  3%|▎         | 14/427 [01:58<53:44,  7.81s/it][A
  4%|▎         | 15/427 [02:11<1:05:02,  9.47s/it][A
  4%|▎         | 16/427 [02:14<50:48,  7.42s/it]  [A
  4%|▍         | 17/427 [02:23<54:12,  7.93s/it][A
  4%|▍         | 18/427 [02:45<1:22:

 69%|██████▉   | 295/427 [1:51:39<20:09,  9.17s/it][A
 69%|██████▉   | 296/427 [1:51:49<20:44,  9.50s/it][A
 70%|██████▉   | 297/427 [1:51:55<18:01,  8.32s/it][A
 70%|██████▉   | 298/427 [1:52:05<19:11,  8.92s/it][A
 70%|███████   | 299/427 [1:52:51<42:52, 20.10s/it][A
 70%|███████   | 300/427 [1:53:16<45:28, 21.48s/it][A
 70%|███████   | 301/427 [1:53:26<37:45, 17.98s/it][A
 71%|███████   | 302/427 [1:53:31<29:20, 14.08s/it][A
 71%|███████   | 303/427 [1:54:06<42:19, 20.48s/it][A
 71%|███████   | 304/427 [1:54:25<40:39, 19.83s/it][A
 71%|███████▏  | 305/427 [1:55:14<58:03, 28.55s/it][A
 72%|███████▏  | 306/427 [1:55:33<52:10, 25.87s/it][A
 72%|███████▏  | 307/427 [1:55:49<45:50, 22.92s/it][A
 72%|███████▏  | 308/427 [1:56:17<48:12, 24.31s/it][A
 72%|███████▏  | 309/427 [1:56:26<39:06, 19.88s/it][A
 73%|███████▎  | 310/427 [1:56:30<29:23, 15.07s/it][A
 73%|███████▎  | 311/427 [1:57:24<51:45, 26.77s/it][A
 73%|███████▎  | 312/427 [1:57:28<38:15, 19.96s/it][A
 73%|█████

In [28]:
# Elements

for i in tqdm(elem_ids.values()):
    elfcar = elem_elfcars[i]
    filepath = 'files/'
    elfcar.write_file(filepath + 'ELFCAR_{0}'.format(str(i)))
    
dumpfn(elem_ids, 'data/element_formulas_ids.json')


  0%|          | 0/88 [00:00<?, ?it/s][A
  1%|          | 1/88 [00:00<00:36,  2.39it/s][A
  2%|▏         | 2/88 [00:00<00:28,  2.99it/s][A
  3%|▎         | 3/88 [00:01<00:36,  2.32it/s][A
  5%|▍         | 4/88 [00:01<00:32,  2.59it/s][A
  7%|▋         | 6/88 [00:01<00:27,  3.03it/s][A
  9%|▉         | 8/88 [00:02<00:28,  2.82it/s][A
 10%|█         | 9/88 [00:03<00:30,  2.61it/s][A
 11%|█▏        | 10/88 [00:03<00:31,  2.46it/s][A
 12%|█▎        | 11/88 [00:03<00:25,  3.06it/s][A
 14%|█▎        | 12/88 [00:04<00:27,  2.74it/s][A
 15%|█▍        | 13/88 [00:04<00:26,  2.78it/s][A
 16%|█▌        | 14/88 [00:04<00:23,  3.17it/s][A
 17%|█▋        | 15/88 [00:05<00:21,  3.43it/s][A
 18%|█▊        | 16/88 [00:05<00:24,  2.96it/s][A
 19%|█▉        | 17/88 [00:05<00:22,  3.21it/s][A

No electronegativity for Ne. Setting to NaN. This has no physical meaning, and is mainly done to avoid errors caused by the code expecting a float.


 22%|██▏       | 19/88 [00:06<00:18,  3.77it/s]

In [29]:
# Tasks

task_ids_dict = {}
for t in tasks:
    task_ids_dict[t['task_id']] = t
    
def entry_from_task(task):
    return ComputedStructureEntry(Structure.from_dict(task["output"]["structure"]),
                                  task["output"]["energy"],
                                  parameters={"run_type": task["calcs_reversed"][0]["run_type"],
                                              "potcar_spec": task["calcs_reversed"][0]["input"]["potcar_spec"]},
                                  entry_id = task.get("task_id", None))

task_ids_selected = {}
for ID in list(car_dict.keys()) + list(elem_ids.values()):
    task_ids_selected[ID] = entry_from_task(task_ids_dict[ID])
    
dumpfn(task_ids_selected, 'data/computed_entries.json')

In [None]:
# Experimental Energies
selected_expt_energies = {}

for i in range(len(selected_ids)):
    f = selected_formulas[i]
    if Composition(f) in janaf_formulas:
        selected_expt_energies[f] = janaf_data.iloc[janaf_data['Formula'] == f]['']
   

dumpfn(xxx, 'selected_experimental_energies.json')

In [15]:
ID = 7356

In [22]:
elfcar = [e for e in elfcar_store.query({"metadata.task_id":ID})][0]
if elfcar.get("data_aug"):
    del elfcar["data_aug"]
ELF = Elfcar.from_dict(elfcar)

In [23]:
chgcar = [e for e in chgcar_store.query({"metadata.task_id":ID})][0]
CHG = Chgcar.from_dict(chgcar)

In [24]:
aec0 = [e for e in aeccar0_store.query({"metadata.task_id":ID})][0]
if aec0.get("data_aug"):
    del aec0["data_aug"]
AEC0 = Chgcar.from_dict(aec0)

aec2 = [e for e in aeccar2_store.query({"metadata.task_id":ID})][0]
if aec2.get("data_aug"):
    del aec2["data_aug"] # bug fix line
AEC2 = Chgcar.from_dict(aec2)

In [28]:
ELF.write_file('ELFCAR_7356')
CHG.write_file('CHGCAR_7356')
AEC0.as_dict('AECCAR0_7356')
AEC2.write_file('AECCAR2_7356')

ValueError: invalid literal for int() with base 10: ''

In [31]:
dumpfn(AEC0.as_dict(), 'AECCAR0_7356.json')
dumpfn(AEC2.as_dict(), 'AECCAR2_7356.json')

In [33]:
AEC2.structure.composition

Comp: S4 O12