##Setup

In [22]:
from os import path
try:
  import google.colab
  IN_COLAB = True
  base_dir = "/content/drive/MyDrive/"
except:
  IN_COLAB = False
  base_dir = path.join("data", "external")

In [None]:
if IN_COLAB:
  %pip install crabnet
  %pip install pymatgen

##Materials Project API Key
Get your Materials Project API key from a file that you store in your Google Drive (see below) or current directory (.).

The file named mp-api-key.json placed directly in your MyDrive folder or in your current directory would look like the following:

{
    "API_KEY": "YOUR_API_KEY"
}

In [None]:
import json
if IN_COLAB:
  from google.colab import drive
  drive.mount('/content/drive')
  apikey_fpath = "/content/drive/MyDrive/mp-api-key.json"
  try:
    # https://stackoverflow.com/a/68442279/13697228
    with open(apikey_fpath, 'r') as f:
        json_data = json.load(f)
        api_key = json_data["API_KEY"]
  except Exception as e:
    print(e)
    api_key = "" #@param {type:"string"}
    if api_key == "":
      print(f"Couldn't load API key from {apikey_fpath}, and user-input API key is also empty.")
    print(f"defaulting to user-input API key {api_key}")
    pass
else:
  api_key = None
  print("make sure that you have run `pmg config --add PMG_MAPI_KEY <USER_API_KEY>`")

In [25]:
from pymatgen.ext.matproj import MPRester
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from crabnet.crabnet_ import CrabNet

##Download data from materials project

In [None]:
with MPRester(api_key) as m:
      results = m.query(
          {"nelements": {"$gte": 2}},
          properties=["e_above_hull", "full_formula", "icsd_ids"],
      )

##Getting training and validation datasets for e_above_hull

In [None]:
formula = []
target = []

for s in tqdm(results):
    f = s["full_formula"]
    e = s["e_above_hull"]
    formula.append(f)
    target.append(e)
  
df_e = pd.DataFrame(list(zip(formula, target)), columns =['formula', 'target'])
df_e = df_e[df_e['target'].notna()]

train_df_e, val_df_e = train_test_split(df_e, test_size=0.20, random_state=42)

##Training CrabNet for e_above_hull

In [None]:
cb_e = CrabNet(mat_prop="e_above_hull", compute_device="cuda", epochs = 10000)
cb_e.fit(train_df_e)
val_pred_e, val_sigma_e = cb_e.predict(val_df_e, return_uncertainty=True)

##Predict energy above hull for our predicted materials

Place the file named peak_score.csv directly in your MyDrive folder or in your current directory.

In [None]:
test_data_fpath = "/content/drive/MyDrive/peak_score.csv"

with open(test_data_fpath, 'r') as f:
    test_df_e = pd.read_csv(test_data_fpath, sep=",")

test_pred_e, test_sigma_e = cb_e.predict(test_df_e, return_uncertainty=True)

test_df_e["predicted_e_above_hull"] = test_pred_e
test_df_e["uncertainty_for_e_above_hull"] = test_sigma_e
print(test_df_e)

##Getting training and validation datasets for stability

In [None]:
formula = []
target = []

for s in tqdm(results):
    f = s["full_formula"]
    st = s["icsd_ids"]
    formula.append(f)
    if st:
      target.append(1)
    else:
      target.append(0)
  
df_st = pd.DataFrame(list(zip(formula, target)), columns =['formula', 'target'])

train_df_st, val_df_st = train_test_split(df_st, test_size=0.20, random_state=42)

##Training CrabNet for stability prediction

In [None]:
cb_st = CrabNet(mat_prop="stability", classification=True, compute_device="cuda", epochs = 10000)
cb_st.fit(train_df_st)
val_pred_st, val_sigma_st = cb_st.predict(val_df_st, return_uncertainty=True)

##Predicting stability

In [None]:
test_df_st = test_df_e
test_pred_st = cb_st.predict(test_df_st)

test_df_st["predicted_stability"] = test_pred_st
#test_df_st["uncertainty_for_stability"] = test_sigma_st
#print(test_df_st)

test_df_st.to_csv("final.csv")
!cp final.csv "/content/drive/MyDrive/"