In [1]:
import numpy as np
import requests
import json
import io
import os
import ase.io
from ase.io import read

GRAPHQL = 'http://api.catalysis-hub.org/graphql'
ROOT_DIR = os.getcwd()

In [2]:
KEY_VALUES = [
    "chemicalComposition",
    "surfaceComposition",
    "facet",
    "sites",
    "coverages",
    "reactants",
    "products",
    "Equation",
    "reactionEnergy",
    "activationEnergy",
    "dftCode",
    "dftFunctional",
    "username",
    "pubId",
    "id",
]

In [3]:
def query_reactions(endcursor):
    query_string = "{"
    query_string += f'reactions(first: 50, after: "{endcursor}"'
    query_string += ', activationEnergy: 100, op: "<"'
    query_string += """) {
  totalCount
  pageInfo {
    endCursor
  }
  edges {
    node {"""
  
    for key_value in KEY_VALUES:
        query_string += str("\n" + " "*6 + key_value)
        
    query_string += """
      systems {
        id
        Trajdata
        energy
        InputFile(format: "xyz")
        keyValuePairs
      }
    }
  }
}}"""
    
    data = requests.post(GRAPHQL, {"query": query_string})
    try:
        data = data.json()["data"]
    except Exception as e:
        print(e)
        print("Error: Something went wrong. Please check your query string.")
    return data

In [4]:
def parse_reaction(reaction):
    reaction_dict = {}
    key_value_pairs = {}
    for key_value in KEY_VALUES:
        try:
            key_value_pairs[key_value] = reaction[key_value]
        except ValueError:
            key_value_pairs[key_value] = "None"
    if key_value_pairs["coverages"] is None:
        key_value_pairs["coverages"] = "None"
    if key_value_pairs["sites"] is None:
        key_value_pairs["sites"] = "None"
    reaction_dict["key_value_pairs"] = key_value_pairs
    structures = []
    for structure in reaction["systems"]:
        struct = {}
        struct["energy"] = structure["energy"]
        struct["InputFile"] = structure["InputFile"]
        struct["keyValuePairs"] = structure["keyValuePairs"]
        structures.append(struct)
    reaction_dict["structures"] = structures
    return reaction_dict

In [5]:
# Run queries and save results to file
reaction_list = {}
N_fetched = 0
endcursor = ""
n = 0
totalcount = 100000
while n * 50 + 1 < totalcount:
    data = query_reactions(endcursor)
    for reaction in data["reactions"]["edges"]:
        reaction = reaction["node"]
        reaction_list[reaction["id"]] = parse_reaction(reaction)
    endcursor = data["reactions"]["pageInfo"]["endCursor"]
    totalcount = data["reactions"]["totalCount"]
    N_fetched += totalcount
    count = 50 * (n + 1)
    if count < totalcount:
        pass
    else:
        count = totalcount
    print("Fetched reactions {} - {} / {}".format(
        50 * n + 1, count, totalcount
    ))
    n += 1
print("Done!")

with open (f"{ROOT_DIR}/data/reactions_cathub.json", "w") as outfile:
    json.dump(reaction_list, outfile)

Fetched reactions 1 - 50 / 1424
Fetched reactions 51 - 100 / 1424
Fetched reactions 101 - 150 / 1424
Fetched reactions 151 - 200 / 1424
Fetched reactions 201 - 250 / 1424
Fetched reactions 251 - 300 / 1424
Fetched reactions 301 - 350 / 1424
Fetched reactions 351 - 400 / 1424
Fetched reactions 401 - 450 / 1424
Fetched reactions 451 - 500 / 1424
Fetched reactions 501 - 550 / 1424
Fetched reactions 551 - 600 / 1424
Fetched reactions 601 - 650 / 1424
Fetched reactions 651 - 700 / 1424
Fetched reactions 701 - 750 / 1424
Fetched reactions 751 - 800 / 1424
Fetched reactions 801 - 850 / 1424
Fetched reactions 851 - 900 / 1424
Fetched reactions 901 - 950 / 1424
Fetched reactions 951 - 1000 / 1424
Fetched reactions 1001 - 1050 / 1424
Fetched reactions 1051 - 1100 / 1424
Fetched reactions 1101 - 1150 / 1424
Fetched reactions 1151 - 1200 / 1424
Fetched reactions 1201 - 1250 / 1424
Fetched reactions 1251 - 1300 / 1424
Fetched reactions 1301 - 1350 / 1424
Fetched reactions 1351 - 1400 / 1424
Fetched