In [1]:
from collections import Counter
import json
import os

from pymatgen.ext.matproj import MPRester

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

In [2]:
with open("topo_data.json", "r") as file:
    data = json.load(file)

In [3]:
with open(os.path.expanduser("~/.mpapikey"), "r") as f:
    apikey = f.read().strip()

In [4]:
rna = [k for k, v in data.items() if len(v['topology']) == 1 and v['topology'][0][2] == 'rna']
len(rna)

7

In [5]:
bpq = [k for k, v in data.items() if len(v['topology']) == 1 and v['topology'][0][2] == 'bpq']
len(bpq)

16

In [6]:
bpq

['mp-759957',
 'mp-31237',
 'mp-1188385',
 'mp-758075',
 'mp-725680',
 'mp-1210967',
 'mp-1189240',
 'mp-763244',
 'mp-767419',
 'mp-504945',
 'mp-510638',
 'mp-24142',
 'mp-697263',
 'mp-23768',
 'mp-505254',
 'mp-1193110']

In [7]:
with MPRester(apikey) as mpr:
    docs = mpr.materials.summary.search(
        material_ids=rna + bpq,
        fields=["material_id", "formula_pretty", "structure", "theoretical", "symmetry"]
    )

Retrieving SummaryDoc documents:   0%|          | 0/23 [00:00<?, ?it/s]

In [8]:
[(d.formula_pretty, d.theoretical, d.material_id, d.symmetry.symbol) for d in docs]

[('NaPH2OF6', False, MPID(mp-767419), 'Pnna'),
 ('MnFeH4O2F5', False, MPID(mp-504945), 'Imma'),
 ('MnGaH4O2F5', False, MPID(mp-510638), 'Imma'),
 ('MgAlH4O2F5', False, MPID(mp-24142), 'Imma'),
 ('LiAsH2OF6', False, MPID(mp-697263), 'Imma'),
 ('LiNO3', True, MPID(mp-696822), 'C2/c'),
 ('TaAsO5', True, MPID(mp-1346171), 'P2_12_12_1'),
 ('AgH9O5', True, MPID(mp-769296), 'P2_12_12_1'),
 ('BiAsO5', True, MPID(mp-1347378), 'P2_12_12_1'),
 ('SbAsO5', True, MPID(mp-1340007), 'P2_12_12_1'),
 ('Fe2H4O2F5', False, MPID(mp-758075), 'Imma'),
 ('NaPOF6', False, MPID(mp-725680), 'Imma'),
 ('MnVO2F5', True, MPID(mp-1210967), 'Imma'),
 ('MgAlO2F5', False, MPID(mp-1189240), 'Imma'),
 ('ZnFeH4O2F5', False, MPID(mp-763244), 'Imm2'),
 ('CuHIO4', False, MPID(mp-697072), 'Pnma'),
 ('NaH6C4SN3O5', False, MPID(mp-1197676), 'C2/c'),
 ('V2F7', True, MPID(mp-759957), 'Imma'),
 ('CuAsF7', False, MPID(mp-31237), 'Imma'),
 ('Fe2O2F5', False, MPID(mp-1188385), 'Imma'),
 ('AlZnH4O2F5', False, MPID(mp-23768), 'Imma'),


In [9]:
len(docs)

23

In [10]:
sum(1 for doc in docs if doc.theoretical)

7

In [11]:
fsn = [k for k, v in data.items() if len(v['topology']) == 1 and v['topology'][0][2] == 'fsn']
len(fsn)

179

In [12]:
with MPRester(apikey) as mpr:
    docs_fsn = mpr.materials.summary.search(
        material_ids=fsn,
        fields=["material_id", "formula_pretty", "structure", "theoretical", "symmetry"]
    )

Retrieving SummaryDoc documents:   0%|          | 0/179 [00:00<?, ?it/s]

In [13]:
print("Number of theoretical structures:", sum(1 for doc in docs_fsn if doc.theoretical))
print("Number of experimental structures:", sum(1 for doc in docs_fsn if not doc.theoretical))

Number of theoretical structures: 85
Number of experimental structures: 94


In [14]:
[(d.formula_pretty, d.theoretical, d.material_id, d.symmetry.symbol) for d in docs_fsn if not d.theoretical]

[('CoTe2', False, MPID(mp-1103359), 'Pa-3'),
 ('CuSe2', False, MPID(mp-2280), 'Pa-3'),
 ('Te2Os', False, MPID(mp-2142), 'Pa-3'),
 ('RhSe2', False, MPID(mp-983), 'Pa-3'),
 ('Te2Rh', False, MPID(mp-754), 'Pa-3'),
 ('UC2', False, MPID(mp-1102444), 'Pa-3'),
 ('NiP2', False, MPID(mp-22619), 'Pa-3'),
 ('Bi2Pt', False, MPID(mp-22864), 'Pa-3'),
 ('Sb2Pd', False, MPID(mp-1356), 'Pa-3'),
 ('NiS2', False, MPID(mp-2282), 'Pa-3'),
 ('SiP2', False, MPID(mp-21065), 'Pa-3'),
 ('SiAs2', False, MPID(mp-21268), 'Pa-3'),
 ('CoSe2', False, MPID(mp-22309), 'Pa-3'),
 ('CuS2', False, MPID(mp-1068), 'Pa-3'),
 ('NiTe2', False, MPID(mp-1102324), 'Pa-3'),
 ('ZnO2', False, MPID(mp-8484), 'Pa-3'),
 ('As2Pt', False, MPID(mp-2513), 'Pa-3'),
 ('As2Pd', False, MPID(mp-20465), 'Pa-3'),
 ('CdSe2', False, MPID(mp-1095493), 'Pa-3'),
 ('RhS2', False, MPID(mp-22555), 'Pa-3'),
 ('MgO2', False, MPID(mp-2589), 'Pa-3'),
 ('RuSe2', False, MPID(mp-1922), 'Pa-3'),
 ('CuTe2', False, MPID(mp-1103235), 'Pa-3'),
 ('NiSe2', False, MPID(

In [15]:
c = Counter([d.symmetry.symbol for d in docs_fsn if not d.theoretical])
c.most_common()

[('Pa-3', 54), ('P2_13', 33), ('Pca2_1', 5), ('Pna2_1', 1), ('P1', 1)]

In [16]:
[(d.formula_pretty, d.theoretical, d.material_id, d.symmetry.symbol) for d in docs_fsn if not d.theoretical and "O" in d.formula_pretty]

[('Te2Os', False, MPID(mp-2142), 'Pa-3'),
 ('ZnO2', False, MPID(mp-8484), 'Pa-3'),
 ('MgO2', False, MPID(mp-2589), 'Pa-3'),
 ('OsSe2', False, MPID(mp-2480), 'Pa-3'),
 ('SiP2O7', False, MPID(mp-18293), 'Pa-3'),
 ('P2WO7', False, MPID(mp-505687), 'Pa-3'),
 ('ThP2O7', False, MPID(mp-5156), 'Pa-3'),
 ('MoP2O7', False, MPID(mp-505406), 'Pa-3'),
 ('CeP2O7', False, MPID(mp-22524), 'Pa-3'),
 ('TiP2O7', False, MPID(mp-1200136), 'Pa-3'),
 ('ZrV2O7', False, MPID(mp-554326), 'Pa-3'),
 ('SnP2O7', False, MPID(mp-17887), 'Pa-3'),
 ('ZrP2O7', False, MPID(mp-5024), 'Pa-3'),
 ('HfV2O7', False, MPID(mp-505679), 'Pa-3'),
 ('InSb(P2O7)2', False, MPID(mp-1196350), 'Pna2_1'),
 ('Zr(WO4)2', False, MPID(mp-18778), 'P2_13'),
 ('Hf(WO4)2', False, MPID(mp-1204711), 'P2_13'),
 ('NaO2', False, MPID(mp-614), 'Pa-3'),
 ('CdO2', False, MPID(mp-2310), 'Pa-3'),
 ('OsS2', False, MPID(mp-20905), 'Pa-3'),
 ('UP2O7', False, MPID(mp-1200695), 'P1')]

In [17]:
[(d.formula_pretty, d.theoretical, d.material_id, d.symmetry.symbol) for d in docs_fsn if not d.theoretical and "W" in d.formula_pretty]

[('P2WO7', False, MPID(mp-505687), 'Pa-3'),
 ('Zr(WO4)2', False, MPID(mp-18778), 'P2_13'),
 ('Hf(WO4)2', False, MPID(mp-1204711), 'P2_13')]

In [18]:
[(d.formula_pretty, d.theoretical, d.material_id, d.symmetry.symbol) for d in docs_fsn if not d.theoretical and "O" in d.formula_pretty]

[('Te2Os', False, MPID(mp-2142), 'Pa-3'),
 ('ZnO2', False, MPID(mp-8484), 'Pa-3'),
 ('MgO2', False, MPID(mp-2589), 'Pa-3'),
 ('OsSe2', False, MPID(mp-2480), 'Pa-3'),
 ('SiP2O7', False, MPID(mp-18293), 'Pa-3'),
 ('P2WO7', False, MPID(mp-505687), 'Pa-3'),
 ('ThP2O7', False, MPID(mp-5156), 'Pa-3'),
 ('MoP2O7', False, MPID(mp-505406), 'Pa-3'),
 ('CeP2O7', False, MPID(mp-22524), 'Pa-3'),
 ('TiP2O7', False, MPID(mp-1200136), 'Pa-3'),
 ('ZrV2O7', False, MPID(mp-554326), 'Pa-3'),
 ('SnP2O7', False, MPID(mp-17887), 'Pa-3'),
 ('ZrP2O7', False, MPID(mp-5024), 'Pa-3'),
 ('HfV2O7', False, MPID(mp-505679), 'Pa-3'),
 ('InSb(P2O7)2', False, MPID(mp-1196350), 'Pna2_1'),
 ('Zr(WO4)2', False, MPID(mp-18778), 'P2_13'),
 ('Hf(WO4)2', False, MPID(mp-1204711), 'P2_13'),
 ('NaO2', False, MPID(mp-614), 'Pa-3'),
 ('CdO2', False, MPID(mp-2310), 'Pa-3'),
 ('OsS2', False, MPID(mp-20905), 'Pa-3'),
 ('UP2O7', False, MPID(mp-1200695), 'P1')]