# Load Tasks and VASP Files From NERSC

### Load Imports And Open Maggma Stores

In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

In [3]:
%run imports.py


__init__ is deprecated
MaterialsProjectCompatibility will be updated with new correction classes as well as new values of corrections and uncertainties in 2020



Imports successfully loaded


In [4]:
working_dir = os.getcwd()

In [5]:
%run maggma_stores.py

All maggma stores (db, elfcar_store, chgcar_store, aeccar0_store, aeccar2_store) successfully loaded 
Run 'connect_to_stores()' to connect to stores


In [6]:
connect_to_stores()

All connections successful


### Retrieve Tasks From NERSC

In [7]:
tasks_iter = db.query({'tags':{'$regex':'production-scan'}})
tasks = [t for t in tasks_iter]
len(tasks)

8295

### Sort Element and Binary Compound Tasks

In [8]:
is_binary = lambda f: len(Composition(f).elements) == 2
binary_tasks = [t for t in tasks if is_binary(t['formula_pretty'])]
len(binary_tasks)

5726

In [9]:
tasks_dict = {}

for t in binary_tasks:
    f = t['formula_pretty']
    if f not in tasks_dict.keys():
        tasks_dict[f] = t
    elif t['output']['energy_per_atom'] < tasks_dict[f]['output']['energy_per_atom']:
        tasks_dict[f] = t

In [10]:
is_element = lambda f: len(Composition(f).elements) == 1
element_tasks = [t for t in tasks if is_element(t['formula_pretty'])]
len(element_tasks)

712

In [11]:
elem_tasks_dict = {}

for t in element_tasks:
    f = re.sub(r'\d+', '', t['formula_pretty'])
    if f not in elem_tasks_dict.keys():
        elem_tasks_dict[f] = t
    elif t['output']['energy_per_atom'] < elem_tasks_dict[f]['output']['energy_per_atom']:
        elem_tasks_dict[f] = t

In [15]:
elem_data_json = {f:elem_tasks_dict[f] for f in elem_tasks_dict.keys()}
elem_ids = {f:elem_data_json[f]['task_id'] for f in elem_data_json.keys()}

### Find Subset of Binary Compounds to Study 
#### Formula must: (1) be in SCAN calculations, (2) be in JANAF experimental data, (3) match specified anion/cation criteria 

In [13]:
scan_formulas = set([Composition(t['formula_pretty']) for t in binary_tasks])
len(scan_formulas)

4943

In [16]:
janaf_formulas = []

for f in pd.read_csv('data/janaf_thermo.csv')['Formula'].tolist():
    try:
        janaf_formulas.append(Composition(f))
    except:
        print(f)
        
janaf_formulas = set(janaf_formulas)

e-


In [17]:
len(scan_formulas & janaf_formulas)

178

In [18]:
task_ids = [t['task_id'] for t in tasks_dict.values()]

In [19]:
anions = set([Element(e) for e in ['N', 'O', 'F', 'Cl', 'Br']])
cats = ['Ti', 'V', 'Cr', 'Mn', 'Fe', 'Ni'] + ['Li', 'Na', 'K', 'Rb', 'Cs'] + ['Be', 'Mg', 'Ca', 'Sr', 'Ba']
cations = set([Element(e) for e in cats])

In [20]:
selection = [f for f in (scan_formulas & janaf_formulas) if \
             set(Composition(f).elements) & anions or set(Composition(f).elements) & cations]
len(selection)

160

In [21]:
selected_formulas = [c.reduced_formula for c in selection]
print(selected_formulas)

['LiH', 'Be2C', 'Be3N2', 'BN', 'BeO', 'B2O3', 'BeF2', 'Li3N', 'MgB2', 'MgB4', 'AlN', 'Al2O3', 'AlF3', 'BeS', 'LiCl', 'BeCl2', 'TiB2', 'KF', 'TiB', 'AlCl3', 'TiH2', 'KO2', 'TiC', 'CaO', 'NaCl', 'CaF2', 'MgCl2', 'Cr3C2', 'BeBr2', 'Cr2O3', 'FeO', 'Fe2O3', 'CoO', 'NaBr', 'MgBr2', 'AlBr3', 'FeS', 'FeS2', 'FeCl2', 'FeCl3', 'CoCl2', 'KBr', 'CaBr2', 'SrF2', 'BeI2', 'TiBr3', 'TiBr4', 'SrS', 'FeBr2', 'SrCl2', 'Li2O', 'BaO', 'BaF2', 'NbCl5', 'MoCl5', 'LiF', 'NaH', 'CsF', 'BaS', 'BaCl2', 'SrBr2', 'ZrBr3', 'MoBr2', 'MoBr3', 'ICl', 'CsCl', 'KI', 'CaI2', 'ZrBr4', 'TiI3', 'NbBr5', 'FeI2', 'Ta2O5', 'WO2', 'WO3', 'BaBr2', 'HgO', 'HgF2', 'TaCl5', 'PbO2', 'Pb3O4', 'HgCl2', 'WCl2', 'PbCl2', 'WCl4', 'Mg3N2', 'NaO2', 'MgF2', 'KH', 'BaI2', 'WBr6', 'P3N5', 'SiO2', 'Si3N4', 'HgBr2', 'PbBr2', 'Mg2Si', 'K2O', 'K2O2', 'Na2S', 'MgS', 'TiN', 'Cr7C3', 'Cr23C6', 'VN', 'TiO', 'TiO2', 'CrN', 'Cr2N', 'TiF3', 'TiF4', 'Ti2O3', 'V2O3', 'Fe3O4', 'Co3O4', 'FeF2', 'FeF3', 'K2S', 'CaS', 'KCl', 'CoF3', 'CaCl2', 'CuO', 'Cu2O', 'L

In [22]:
selected_ids = [tasks_dict[f]['task_id'] for f in selected_formulas]
print(selected_ids)

[6405, 3890, 7373, 11133, 3914, 5804, 10090, 3957, 3996, 6135, 3955, 4648, 4827, 3901, 2779, 6017, 4392, 3992, 4837, 6454, 4601, 4826, 3929, 3926, 3930, 4019, 6412, 5874, 6240, 5312, 2936, 10792, 4617, 4102, 4782, 8414, 5364, 4870, 7394, 10278, 10118, 4406, 6359, 4026, 10408, 9789, 9416, 2180, 4999, 4510, 3947, 4072, 2072, 9887, 10201, 2597, 3882, 4212, 4226, 4682, 9493, 7850, 9879, 8450, 8661, 4331, 4687, 5723, 10375, 8706, 10389, 5431, 7076, 7562, 10285, 7722, 5662, 4089, 9731, 4656, 9068, 8428, 9914, 6190, 7962, 10642, 5462, 4611, 4035, 7921, 7740, 6344, 5557, 5570, 6853, 6417, 4283, 4311, 5692, 4252, 3942, 3989, 9927, 8986, 4758, 2167, 4787, 2820, 7468, 8989, 10948, 6283, 7408, 11087, 6862, 4757, 6094, 2085, 4079, 4346, 7239, 5863, 6872, 4493, 4644, 5822, 10674, 9327, 7178, 4800, 7019, 6111, 5298, 6936, 3979, 4018, 6355, 8994, 8091, 8563, 10373, 10179, 9534, 4319, 5278, 7051, 8061, 7958, 10101, 4298, 5275, 4201, 8381, 9665, 4108, 10524, 4002, 5663, 3907, 3915]


In [23]:
binary_ids = {selected_formulas[i]:selected_ids[i] for i in range(len(selected_formulas))}

### Load ELFCARs, CHGCARs, and AECCARs (0 and 2)

In [25]:
# Binary Compounds

car_dict = {}

for ID in tqdm(selected_ids):
    elfcar = [e for e in elfcar_store.query({"metadata.task_id":ID})][0]
    if elfcar.get("data_aug"):
        del elfcar["data_aug"]
    ELF = Elfcar.from_dict(elfcar)
    
    chgcar = [e for e in chgcar_store.query({"metadata.task_id":ID})][0]
    CHG = Chgcar.from_dict(chgcar)

    aec0 = [e for e in aeccar0_store.query({"metadata.task_id":ID})][0]
    if aec0.get("data_aug"):
        del aec0["data_aug"]
    AEC0 = Chgcar.from_dict(aec0)

    aec2 = [e for e in aeccar2_store.query({"metadata.task_id":ID})][0]
    if aec2.get("data_aug"):
        del aec2["data_aug"] # bug fix line
    AEC2 = Chgcar.from_dict(aec2)
    
    car_dict[ID] = [ELF, CHG, AEC0, AEC2]


  0%|          | 0/160 [00:00<?, ?it/s][A
  1%|          | 1/160 [00:01<04:28,  1.69s/it][A
  1%|▏         | 2/160 [00:02<04:02,  1.53s/it][A
  2%|▏         | 3/160 [00:10<08:32,  3.26s/it][A
  2%|▎         | 4/160 [00:11<07:06,  2.74s/it][A
  3%|▎         | 5/160 [00:12<05:34,  2.16s/it][A
  4%|▍         | 6/160 [00:16<07:13,  2.81s/it][A
  4%|▍         | 7/160 [00:19<07:00,  2.75s/it][A
  5%|▌         | 8/160 [00:20<05:42,  2.25s/it][A
  6%|▌         | 9/160 [00:21<04:32,  1.80s/it][A
  6%|▋         | 10/160 [00:25<06:14,  2.50s/it][A
  7%|▋         | 11/160 [00:26<05:16,  2.13s/it][A
  8%|▊         | 12/160 [00:29<06:07,  2.49s/it][A
  8%|▊         | 13/160 [00:34<07:41,  3.14s/it][A
  9%|▉         | 14/160 [00:35<06:16,  2.58s/it][A
  9%|▉         | 15/160 [00:38<06:15,  2.59s/it][A
 10%|█         | 16/160 [00:43<07:47,  3.25s/it][A
 11%|█         | 17/160 [00:44<06:08,  2.58s/it][A
 11%|█▏        | 18/160 [00:45<05:21,  2.27s/it][A
 12%|█▏        | 19/160 [00:4

 98%|█████████▊| 156/160 [15:40<00:31,  7.81s/it][A
 98%|█████████▊| 157/160 [15:42<00:18,  6.03s/it][A
 99%|█████████▉| 158/160 [15:45<00:10,  5.21s/it][A
 99%|█████████▉| 159/160 [15:46<00:03,  3.98s/it][A
100%|██████████| 160/160 [15:47<00:00,  5.92s/it][A


In [26]:
elem_elfcars = {}

for ID in tqdm(elem_ids.values()):
    elfcar = [e for e in elfcar_store.query({"metadata.task_id":ID})][0]
    if elfcar.get("data_aug"):
        del elfcar["data_aug"]
    ELF = Elfcar.from_dict(elfcar)
    elem_elfcars[ID] = ELF


  0%|          | 0/88 [00:00<?, ?it/s][A
  1%|          | 1/88 [00:00<00:15,  5.63it/s][A
  2%|▏         | 2/88 [00:00<00:14,  5.92it/s][A
  3%|▎         | 3/88 [00:00<00:17,  4.86it/s][A
  5%|▍         | 4/88 [00:00<00:15,  5.36it/s][A
  6%|▌         | 5/88 [00:00<00:13,  6.10it/s][A
  7%|▋         | 6/88 [00:01<00:16,  5.09it/s][A
  8%|▊         | 7/88 [00:01<00:14,  5.43it/s][A
  9%|▉         | 8/88 [00:01<00:22,  3.60it/s][A
 10%|█         | 9/88 [00:02<00:20,  3.77it/s][A
 11%|█▏        | 10/88 [00:02<00:18,  4.17it/s][A
 14%|█▎        | 12/88 [00:02<00:15,  4.82it/s][A
 15%|█▍        | 13/88 [00:02<00:13,  5.62it/s][A
 17%|█▋        | 15/88 [00:02<00:11,  6.46it/s][A
 18%|█▊        | 16/88 [00:03<00:13,  5.29it/s][A
 19%|█▉        | 17/88 [00:03<00:12,  5.65it/s][A
 20%|██        | 18/88 [00:03<00:12,  5.83it/s][A
 22%|██▏       | 19/88 [00:03<00:10,  6.66it/s][A
 23%|██▎       | 20/88 [00:03<00:12,  5.51it/s][A
 24%|██▍       | 21/88 [00:03<00:10,  6.33it/s]

### Write Files Locally

In [27]:
# Binary Compounds

for i in tqdm(car_dict.keys()):
    label = str(i)
    lst = car_dict[i]
    filepath = 'files/'
    lst[0].write_file(filepath + 'ELFCAR_{0}'.format(str(i)))
    lst[1].write_file(filepath + 'CHGCAR_{0}'.format(str(i)))
    try:
        chgref = lst[2] + lst[3]
        chgref.write_file(filepath + 'CHGREF_{}'.format(str(i)))
    except ValueError:
        pass
    
dumpfn(binary_ids, 'data/binary_formulas_ids.json')



Structures are different. Make sure you know what you are doing...


  1%|          | 1/160 [00:00<02:31,  1.05it/s][A
  1%|▏         | 2/160 [00:02<02:53,  1.10s/it][A
  2%|▏         | 3/160 [00:11<09:26,  3.61s/it][A
  2%|▎         | 4/160 [00:14<08:26,  3.25s/it][A
  3%|▎         | 5/160 [00:15<07:04,  2.74s/it][A
  4%|▍         | 6/160 [00:25<12:45,  4.97s/it][A
  4%|▍         | 7/160 [00:32<13:40,  5.36s/it][A
  5%|▌         | 8/160 [00:34<11:26,  4.52s/it][A
  6%|▌         | 9/160 [00:36<09:12,  3.66s/it][A
  6%|▋         | 10/160 [00:44<12:31,  5.01s/it][A
  7%|▋         | 11/160 [00:47<10:33,  4.25s/it][A
  8%|▊         | 12/160 [00:54<12:58,  5.26s/it][A
  8%|▊         | 13/160 [01:02<14:30,  5.92s/it][A
  9%|▉         | 14/160 [01:04<11:25,  4.70s/it][A
  9%|▉         | 15/160 [01:09<12:02,  4.98s/it][A
 10%|█         | 16/160 [01:20<16:24,  6.84s/it][A
 11%|█         | 17/160 [01:22<12:29,  5.24s/it][A
 11%|█▏        | 18/160 [01:25<11:04,  4.68s/it][A
 1

 96%|█████████▌| 153/160 [31:48<01:26, 12.30s/it][A
 96%|█████████▋| 154/160 [32:22<01:53, 18.87s/it][A
 97%|█████████▋| 155/160 [32:25<01:10, 14.16s/it][A
 98%|█████████▊| 156/160 [32:52<01:12, 18.02s/it][A
 98%|█████████▊| 157/160 [32:55<00:40, 13.65s/it][A
 99%|█████████▉| 158/160 [33:04<00:24, 12.17s/it][A
 99%|█████████▉| 159/160 [33:06<00:09,  9.00s/it][A
100%|██████████| 160/160 [33:07<00:00, 12.42s/it][A


In [33]:
# Elements

for i in tqdm(elem_ids.values()):
    elfcar = elem_elfcars[i]
    filepath = 'files/'
    elfcar.write_file(filepath + 'ELFCAR_{0}'.format(str(i)))
    
dumpfn(elem_ids, 'data/element_formulas_ids.json')


  0%|          | 0/88 [00:00<?, ?it/s][A
  1%|          | 1/88 [00:00<00:18,  4.62it/s][A
  2%|▏         | 2/88 [00:00<00:15,  5.42it/s][A
  3%|▎         | 3/88 [00:00<00:24,  3.50it/s][A
  5%|▍         | 4/88 [00:01<00:22,  3.66it/s][A
  7%|▋         | 6/88 [00:01<00:19,  4.15it/s][A
  9%|▉         | 8/88 [00:02<00:21,  3.77it/s][A
 10%|█         | 9/88 [00:02<00:23,  3.37it/s][A
 11%|█▏        | 10/88 [00:02<00:24,  3.18it/s][A
 12%|█▎        | 11/88 [00:02<00:19,  3.92it/s][A
 14%|█▎        | 12/88 [00:03<00:21,  3.51it/s][A
 15%|█▍        | 13/88 [00:03<00:21,  3.50it/s][A
 16%|█▌        | 14/88 [00:03<00:18,  3.94it/s][A
 17%|█▋        | 15/88 [00:03<00:17,  4.21it/s][A
 18%|█▊        | 16/88 [00:04<00:20,  3.56it/s][A
 19%|█▉        | 17/88 [00:04<00:18,  3.74it/s][A

No electronegativity for Ne. Setting to NaN. This has no physical meaning, and is mainly done to avoid errors caused by the code expecting a float.


 22%|██▏       | 19/88 [00:05<00:17,  3.85it/s]