##### Load Packages

In [1]:
%matplotlib inline
import os
import boto3
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
import os
from tqdm import tqdm

from camd.agent.base import HypothesisAgent
from camd.analysis import AnalyzerBase
from camd.experiment.base import ATFSampler
from camd.campaigns.base import Campaign

##### Load data and process compostions into more readable format

In [2]:
path = 'CAMD/scratches/JCAP data/'
jcap_pickle = 'jcap_optical_encoding.pickle'

jcap_df = pd.read_pickle(jcap_pickle)
print('This dataset has {} samples, {} features'.format(jcap_df.shape[0], jcap_df.shape[1]))

This dataset has 100430 samples, 45 features


In [3]:
# process the compositions so they are not sparsed representation
# There are two columns about composition. One is the full composition in dictionary format (e.g. {'Bi': 0.95, 'Mn': 0.05}).
# The other is the element combination in tuples, sorted alphabetically. 
compositions = []
for index, row in jcap_df.iterrows():
    composition = {elem: round(ratio,3) for elem, ratio in zip(row['Fe':'Rb'].index, row['Fe':'Rb']) if ratio != 0} 
    compositions.append(composition)
    
processed_jcap_df = jcap_df[['bandgap']].copy()
processed_jcap_df['full_composition'] = compositions

elem_combo_sorted = [tuple(sorted(comp.keys())) for comp in compositions]
processed_jcap_df['elem_combo'] = elem_combo_sorted

# get the binary and ternary compound
selected_jcap_df = processed_jcap_df[(processed_jcap_df.elem_combo.map(len)==2)|(processed_jcap_df.elem_combo.map(len)==3)]

In [4]:
selected_jcap_df.head(3)

Unnamed: 0_level_0,bandgap,full_composition,elem_combo
comp_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1.742198,"{'Bi': 0.95, 'Mn': 0.05}","(Bi, Mn)"
2,1.771895,"{'Fe': 0.05, 'Bi': 0.95}","(Bi, Fe)"
3,1.771431,"{'Bi': 0.9, 'Mn': 0.1}","(Bi, Mn)"


In [None]:
selected_jcap_df.shape

##### Pull MP oxide data

In [None]:
# Get all the combinations and a list of their compositions
from collections import defaultdict

combinations = defaultdict(list)
for index, row in selected_jcap_df.iterrows():
    combinations[row['elem_combo']].append(row['full_composition'])

In [None]:
from pymatgen import MPRester, Composition
from pymatgen.analysis.phase_diagram import GrandPotentialPhaseDiagram, PDPlotter

MP_mu_O = -4.29
mpr = MPRester("68rWEneaZFyIaKh15uKr")

stable_structure_by_compositions = {}
for elem_combo, full_compositions in tqdm(combinations.items()):
    entries = mpr.get_entries_in_chemsys(list(elem_combo) + ['O'])
    gppd = GrandPotentialPhaseDiagram(entries, {"O": MP_mu_O})
#     pdplotter = PDPlotter(gppd)
    for composition in full_compositions:
        entries = gppd.all_entries
        ehulls = [gppd.get_form_energy_per_atom(entry) for entry in entries]
        stable_structure_by_compositions[tuple(composition.items())] = [entry.original_comp for entry, e_hull in zip(entries, ehulls) if e_hull<0.2]

#         decomp = gppd.get_decomposition(Composition(composition))
#         stable_structure_by_compositions[composition] = [entry.original_comp for entry, frac in decomp.items()] 
#  plt = pdplotter.get_plot()

In [None]:
# import pickle
# pickle.dump(stable_structure_by_compositions, open('stable_structure_by_compositions.pkl', 'wb'))

##### CAMD submit

In [30]:
# Helper script taken from 
# https://github.awsinternal.tri.global/materials/camd_api/blob/master/scripts/submit_LaFeAs_class.py

import argparse
from camd_apigw.s3_utils import submit_chemsys, CAMD_SYNC_S3_BUCKET, \
    CAMD_DEFAULT_CAMPAIGN
import itertools
from tqdm import tqdm


argparser = argparse.ArgumentParser()
argparser.add_argument("--campaign", "-r", default=CAMD_DEFAULT_CAMPAIGN,
                       help="Type of run, e. g. proto-dft or oqmd-atf")
argparser.add_argument("--bucket", "-b", default=CAMD_SYNC_S3_BUCKET,
                       help="Bucket for submission, e.g. camd-runs or camd-test")

def get_possible_chemsyses(combinations):
    """Function to get all of the oxide to test"""
    return ['-'.join(combination) +'-O' for combination in combinations]

In [38]:
# all the unique composition combination from JCAP data (sorted alphabetically and not duplicated)
elem_combo_set = set(list(selected_jcap_df['elem_combo'])) 
combinations_of_interest = sorted(list(elem_combo_set), reverse=True) 

# CAMD is last in, first out, so we will sort the list above by length
sorted_interest = sorted(combinations_of_interest, key=len, reverse=True)[-10:]

# args = argparser.parse_args()
# for chemsys in tqdm(get_possible_chemsyses(sorted_interest)):
#     submit_chemsys(chemsys, bucket_name=args.bucket,
#                    campaign=args.campaign)

In [39]:
get_possible_chemsyses(sorted_interest)

['Ba-Ca-O',
 'Al-V-O',
 'Al-Sb-O',
 'Al-Pb-O',
 'Al-Mn-O',
 'Al-Bi-O',
 'Ag-Sm-O',
 'Ag-Mn-O',
 'Ag-Er-O',
 'Ag-Bi-O']

In [40]:
from camd_apigw import s3_utils

for chemsys in tqdm(get_possible_chemsyses(sorted_interest)):
    print(s3_utils.get_run_status(chemsys))

 10%|█         | 1/10 [00:01<00:13,  1.45s/it]

{'state': 'NOT-STARTED', 'data': {'last_submitted': '2020-06-05T17:41:59.784892'}}


 20%|██        | 2/10 [00:02<00:11,  1.43s/it]

{'state': 'NOT-STARTED', 'data': {'last_submitted': '2020-06-05T17:42:00.837149'}}


 30%|███       | 3/10 [00:04<00:09,  1.41s/it]

{'state': 'NOT-STARTED', 'data': {'last_submitted': '2020-06-05T17:42:01.701499'}}


 40%|████      | 4/10 [00:05<00:08,  1.40s/it]

{'state': 'NOT-STARTED', 'data': {'last_submitted': '2020-06-05T17:42:02.578854'}}


 50%|█████     | 5/10 [00:06<00:06,  1.39s/it]

{'state': 'NOT-STARTED', 'data': {'last_submitted': '2020-06-05T17:42:03.436719'}}


 60%|██████    | 6/10 [00:08<00:05,  1.37s/it]

{'state': 'NOT-STARTED', 'data': {'last_submitted': '2020-06-05T17:42:04.319871'}}


 70%|███████   | 7/10 [00:09<00:04,  1.36s/it]

{'state': 'NOT-STARTED', 'data': {'last_submitted': '2020-06-05T17:42:05.179194'}}


 80%|████████  | 8/10 [00:11<00:02,  1.40s/it]

{'state': 'NOT-STARTED', 'data': {'last_submitted': '2020-06-05T17:42:06.059521'}}


 90%|█████████ | 9/10 [00:12<00:01,  1.37s/it]

{'state': 'NOT-STARTED', 'data': {'last_submitted': '2020-06-05T17:42:06.898448'}}


100%|██████████| 10/10 [00:13<00:00,  1.36s/it]

{'state': 'IN-PROGRESS'}



