In [3]:
import json
import aiida
from aiida import orm
from aiida.orm import load_node, load_group

### Investigating MC2D archive entry: https://archive.materialscloud.org/record/2022.84

### Non-aiida files

In [12]:
with open('../archive-data/structure_2d.json') as f:
    structure_2d_json = json.load(f)

print(f"total number of entries: {len(structure_2d_json)}")

total number of entries: 3078


In [9]:
# list keys in the data
list(structure_2d_json[0].keys())

['abundance',
 'initial_3D_spg',
 'band_gap',
 'number_of_atoms',
 'bands_uuid',
 'initial_3D_bulk_structure_uuid',
 'initial_3D_formula',
 'point_group',
 'binding_energy_per_substructure_per_unit_area_df2',
 'formula',
 'all_3D_parents',
 'initial_3D_source_db',
 'space_group',
 'number_of_species',
 'as_extracted_2D_structure_uuid',
 'relaxed_3D_bulk_structure_df2_uuid',
 'prototype',
 'initial_3D_db_id',
 'optimized_2D_structure_uuid']

In [13]:
# collect all the optimized_2D_structure_uuid
uuids_json = set()
for s in structure_2d_json:
    if 'optimized_2D_structure_uuid' in s:
        uuids_json.add(s['optimized_2D_structure_uuid'])

print(f"number of entries with optimized_2D_structure_uuid: {len(uuids_json)}")

number of entries with optimized_2D_structure_uuid: 2742


In [15]:
# confirm that the optimized_2d_structures.zip file contains all the same uuids:

from pathlib import Path

uuids_cif = set()

for file in Path('../archive-data/optimized_2d_structures/optimized_structures_new').glob('*.cif'):
    uuids_cif.add(file.stem)

print(uuids_cif == uuids_json)

True


### AiiDA data

In [16]:
# made a clean profile and imported MC2D_export_20220622.aiida

aiida.load_profile("mc2d_test")

Profile<uuid='7436edf268b9495aac39b8799ae8bf77' name='mc2d_test'>

In [17]:
# see all groups

! verdi group list --all --all-users --count

[22m  PK  Label                                                                               Type string    User                           Node count
----  ----------------------------------------------------------------------------------  -------------  ---------------------------  ------------
  21  20220621-191136                                                                     core.import    aiida@prnmarvelsrv3.epfl.ch        406009
   1  20220622-101846                                                                     core.import    aiida@prnmarvelsrv3.epfl.ch        113209
  24  20240416-234347                                                                     core.import    info@materialscloud.org            518934
   5  absolute_magnetization                                                              core           aiida@prnmarvelsrv3.epfl.ch            56
  20  all_MC2D_uuids_20220613                                                             core           aiida@pr

In [45]:
# check how many structure UUIDs specified in the json file are in the whole AiiDA database
builder = orm.QueryBuilder().append(
        orm.StructureData, project=['*'], tag='structure', filters={'uuid': {'in': uuids_json}}
    )

nodes_in_aiida = builder.all()
print(f"Number of nodes found corresponding to structure_2d.json: {len(nodes_in_aiida)}")

uuids_in_aiida = set(n[0].uuid for n in nodes_in_aiida)

Number of nodes found corresponding to structure_2d.json: 2693


In [46]:
json_uuids_missing_in_aiida = uuids_json - uuids_in_aiida

print(f"Number of structure_2d.json uuids missing in AiiDA DB: {len(json_uuids_missing_in_aiida)}")

with open("json_uuids_missing_in_aiida.txt", 'w') as fh:
    for uuid in sorted(json_uuids_missing_in_aiida):
        fh.write(uuid + "\n")

Number of structure_2d.json uuids missing in AiiDA DB: 49


In [47]:
# check what UUIDS are in the "structure_2D" AiiDA group

builder = orm.QueryBuilder().append(
        orm.Group, filters={"label": "structure_2D"}, tag='group'
    ).append(
        orm.StructureData, with_group="group", project=['*'], tag='structure'
    )

nodes_structure_2d = builder.all()

uuids_structure_2d = set(n[0].uuid for n in nodes_structure_2d)

In [48]:
json_uuids_missing_in_aiida_group = uuids_json - uuids_structure_2d

print(f"Number of structure_2d.json uuids missing in AiiDA structure_2d group: {len(json_uuids_missing_in_aiida_group)}")

with open("json_uuids_missing_in_aiida_group.txt", 'w') as fh:
    for uuid in sorted(json_uuids_missing_in_aiida_group):
        fh.write(uuid + "\n")

Number of structure_2d.json uuids missing in AiiDA structure_2d group: 57


In [49]:
aiida_group_uuids_missing_in_json = uuids_structure_2d - uuids_json

print(f"Number of AiiDA structure_2d group uuids missing in structure_2d.json: {len(aiida_group_uuids_missing_in_json)}")

with open("aiida_group_uuids_missing_in_json.txt", 'w') as fh:
    for uuid in sorted(aiida_group_uuids_missing_in_json):
        fh.write(uuid + "\n")

Number of AiiDA structure_2d group uuids missing in structure_2d.json: 74
