<a href="https://colab.research.google.com/github/immischein/ML-bandgap/blob/niko_code/red_data_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# You might need to reinstall the library manually, imports ans pip install:
!pip install mp_api

Collecting mp_api
  Downloading mp_api-0.45.5-py3-none-any.whl.metadata (2.3 kB)
Collecting maggma>=0.57.1 (from mp_api)
  Downloading maggma-0.71.5-py3-none-any.whl.metadata (11 kB)
Collecting pymatgen!=2024.2.20,>=2022.3.7 (from mp_api)
  Downloading pymatgen-2025.5.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Collecting monty>=2024.12.10 (from mp_api)
  Downloading monty-2025.3.3-py3-none-any.whl.metadata (3.6 kB)
Collecting emmet-core>=0.84.3rc6 (from mp_api)
  Downloading emmet_core-0.84.7rc1-py3-none-any.whl.metadata (3.0 kB)
Collecting pydantic-settings>=2.0 (from emmet-core>=0.84.3rc6->mp_api)
  Downloading pydantic_settings-2.9.1-py3-none-any.whl.metadata (3.8 kB)
Collecting pybtex~=0.24 (from emmet-core>=0.84.3rc6->mp_api)
  Downloading pybtex-0.24.0-py2.py3-none-any.whl.metadata (2.0 kB)
Collecting ruamel.yaml>=0.17 (from maggma>=0.57.1->mp_api)
  Downloading ruamel.yaml-0.18.10-py3-none-any.whl.metadata (23 kB)
Collecting pymongo<4.11,>=4.2.

In [2]:
# Import
from mp_api.client import MPRester
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle

In [3]:
ApiKey = "nmtxcmhbUx34lX5wM3xNUMZKf7kooZCK"

 ## Finds the materials that have a shear modulus that is (0, infinity) and contains a band_gap. Notice this uses mpr.materials.summary.search(). Only 3638 materials found.

In [4]:
with MPRester(ApiKey) as mpr:
  # Finds the materials that have a shear modulus from materials.summary.search()
  mpr.materials.summary.search(g_vrh=(None,None), band_gap=(0.001, None))

Retrieving SummaryDoc documents:   0%|          | 0/3636 [00:00<?, ?it/s]

## A more comprehensive elasticity data can also be found by using materials.elasticity.search()

This includes the elastic tensor, which allows one to calculate deformation given a specific direction.

In [5]:
with MPRester(ApiKey) as mpr:
  elasticity_data = mpr.materials.elasticity.search()

Retrieving ElasticityDoc documents:   0%|          | 0/13283 [00:00<?, ?it/s]

In [6]:
print(elasticity_data[1])

[4m[1mMPDataDoc<ElasticityDoc>[0;0m[0;0m
[1mbuilder_meta[0;0m=EmmetMeta(emmet_version='0.84.6rc5', pymatgen_version='2024.11.13', run_id='594815f4-e1b1-4ff4-9598-f5e9f2d2cc26', batch_id=None, database_version=None, build_date=datetime.datetime(2025, 3, 20, 19, 22, 25, 330000, tzinfo=FixedOffset(datetime.timedelta(0), 'UTC')), license=None),
[1mnsites[0;0m=8,
[1melements[0;0m=[Element Si],
[1mnelements[0;0m=1,
[1mcomposition[0;0m=Composition('Si8'),
[1mcomposition_reduced[0;0m=Composition('Si1'),
[1mformula_pretty[0;0m='Si',
[1mformula_anonymous[0;0m='A',
[1mchemsys[0;0m='Si',
[1mvolume[0;0m=169.20556154395194,
[1mdensity[0;0m=2.204989935063734,
[1mdensity_atomic[0;0m=21.150695192993993,
[1msymmetry[0;0m=SymmetryData(crystal_system=<CrystalSystem.tri: 'Triclinic'>, symbol='P1', number=1, point_group='1', symprec=0.1, angle_tolerance=5.0, version='2.6.0'),
[1mproperty_name[0;0m='elasticity',
[1mmaterial_id[0;0m=MPID(mp-644693),
[1mdeprecated[0;0m=True,

# Getting Transition Metal Oxides Information

##  Using the filter technique

In [14]:
from mp_api.client import MPRester
from pymatgen.core.periodic_table import Element

# API_KEY = "YOUR_API_KEY"

# Define all transition metals (periodic table blocks d-block, periods 4–7)
transition_metals = [el.symbol for el in Element if el.is_transition_metal]
transition_metals_set = set(transition_metals)

In [19]:
print(transition_metals)

['Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Y', 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'La', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', 'Ac', 'Rf', 'Db', 'Sg', 'Bh', 'Hs', 'Mt', 'Ds', 'Rg', 'Cn']


In [33]:
filtered_tm_oxides = []

with MPRester(ApiKey) as mpr:
    for tm in transition_metals:
        results = mpr.materials.summary.search(
            band_gap=(0.001, None),  # Only materials with a band gap
            elements=["O", tm],      # Must contain O and the current TM
            fields=["material_id", "formula_pretty", "elements"]
        )
        print(f"{tm}: {len(results)} results")  # Debugging output

        for r in results:
          filtered_tm_oxides.append((r.material_id, r.formula_pretty))
        print(f"{tm}: {len(filtered_tm_oxides)} found filtered total")  # Debugging output

Retrieving SummaryDoc documents:   0%|          | 0/908 [00:00<?, ?it/s]

Sc: 908 results
Sc: 908 found filtered total


Retrieving SummaryDoc documents:   0%|          | 0/3397 [00:00<?, ?it/s]

Ti: 3397 results
Ti: 4305 found filtered total


Retrieving SummaryDoc documents:   0%|          | 0/6225 [00:00<?, ?it/s]

V: 6225 results
V: 10530 found filtered total


Retrieving SummaryDoc documents:   0%|          | 0/3193 [00:00<?, ?it/s]

Cr: 3193 results
Cr: 13723 found filtered total


Retrieving SummaryDoc documents:   0%|          | 0/6776 [00:00<?, ?it/s]

Mn: 6776 results
Mn: 20499 found filtered total


Retrieving SummaryDoc documents:   0%|          | 0/6113 [00:00<?, ?it/s]

Fe: 6113 results
Fe: 26612 found filtered total


Retrieving SummaryDoc documents:   0%|          | 0/4630 [00:00<?, ?it/s]

Co: 4630 results
Co: 31242 found filtered total


Retrieving SummaryDoc documents:   0%|          | 0/2255 [00:00<?, ?it/s]

Ni: 2255 results
Ni: 33497 found filtered total


Retrieving SummaryDoc documents:   0%|          | 0/1941 [00:00<?, ?it/s]

Cu: 1941 results
Cu: 35438 found filtered total


Retrieving SummaryDoc documents:   0%|          | 0/2765 [00:00<?, ?it/s]

Zn: 2765 results
Zn: 38203 found filtered total


Retrieving SummaryDoc documents:   0%|          | 0/1629 [00:00<?, ?it/s]

Y: 1629 results
Y: 39832 found filtered total


Retrieving SummaryDoc documents:   0%|          | 0/1323 [00:00<?, ?it/s]

Zr: 1323 results
Zr: 41155 found filtered total


Retrieving SummaryDoc documents:   0%|          | 0/2574 [00:00<?, ?it/s]

Nb: 2574 results
Nb: 43729 found filtered total


Retrieving SummaryDoc documents:   0%|          | 0/2195 [00:00<?, ?it/s]

Mo: 2195 results
Mo: 45924 found filtered total


Retrieving SummaryDoc documents:   0%|          | 0/40 [00:00<?, ?it/s]

Tc: 40 results
Tc: 45964 found filtered total


Retrieving SummaryDoc documents:   0%|          | 0/267 [00:00<?, ?it/s]

Ru: 267 results
Ru: 46231 found filtered total


Retrieving SummaryDoc documents:   0%|          | 0/133 [00:00<?, ?it/s]

Rh: 133 results
Rh: 46364 found filtered total


Retrieving SummaryDoc documents:   0%|          | 0/151 [00:00<?, ?it/s]

Pd: 151 results
Pd: 46515 found filtered total


Retrieving SummaryDoc documents:   0%|          | 0/771 [00:00<?, ?it/s]

Ag: 771 results
Ag: 47286 found filtered total


Retrieving SummaryDoc documents:   0%|          | 0/776 [00:00<?, ?it/s]

Cd: 776 results
Cd: 48062 found filtered total


Retrieving SummaryDoc documents:   0%|          | 0/2145 [00:00<?, ?it/s]

La: 2145 results
La: 50207 found filtered total


Retrieving SummaryDoc documents:   0%|          | 0/1017 [00:00<?, ?it/s]

Hf: 1017 results
Hf: 51224 found filtered total


Retrieving SummaryDoc documents:   0%|          | 0/1699 [00:00<?, ?it/s]

Ta: 1699 results
Ta: 52923 found filtered total


Retrieving SummaryDoc documents:   0%|          | 0/2339 [00:00<?, ?it/s]

W: 2339 results
W: 55262 found filtered total


Retrieving SummaryDoc documents:   0%|          | 0/298 [00:00<?, ?it/s]

Re: 298 results
Re: 55560 found filtered total


Retrieving SummaryDoc documents:   0%|          | 0/152 [00:00<?, ?it/s]

Os: 152 results
Os: 55712 found filtered total


Retrieving SummaryDoc documents:   0%|          | 0/70 [00:00<?, ?it/s]

Ir: 70 results
Ir: 55782 found filtered total


Retrieving SummaryDoc documents:   0%|          | 0/225 [00:00<?, ?it/s]

Pt: 225 results
Pt: 56007 found filtered total


Retrieving SummaryDoc documents:   0%|          | 0/163 [00:00<?, ?it/s]

Au: 163 results
Au: 56170 found filtered total


Retrieving SummaryDoc documents:   0%|          | 0/358 [00:00<?, ?it/s]

Hg: 358 results
Hg: 56528 found filtered total


Retrieving SummaryDoc documents:   0%|          | 0/10 [00:00<?, ?it/s]

Ac: 10 results
Ac: 56538 found filtered total


Retrieving SummaryDoc documents: 0it [00:00, ?it/s]

Rf: 0 results
Rf: 56538 found filtered total


Retrieving SummaryDoc documents: 0it [00:00, ?it/s]

Db: 0 results
Db: 56538 found filtered total


Retrieving SummaryDoc documents: 0it [00:00, ?it/s]

Sg: 0 results
Sg: 56538 found filtered total


Retrieving SummaryDoc documents: 0it [00:00, ?it/s]

Bh: 0 results
Bh: 56538 found filtered total


Retrieving SummaryDoc documents: 0it [00:00, ?it/s]

Hs: 0 results
Hs: 56538 found filtered total


Retrieving SummaryDoc documents: 0it [00:00, ?it/s]

Mt: 0 results
Mt: 56538 found filtered total


Retrieving SummaryDoc documents: 0it [00:00, ?it/s]

Ds: 0 results
Ds: 56538 found filtered total


Retrieving SummaryDoc documents: 0it [00:00, ?it/s]

Rg: 0 results
Rg: 56538 found filtered total


Retrieving SummaryDoc documents: 0it [00:00, ?it/s]

Cn: 0 results
Cn: 56538 found filtered total
Found 0 unique transition metal oxides with band gap > 0


In [37]:
# Output results
print(f"Found {len(filtered_oxides)} unique transition metal oxides with band gap > 0")
for mid, formula in filtered_oxides[0:10]:
    print(f"{formula} ({mid})")

ScO2 (mp-1206234)
ScO2 (mp-1179114)
ScMo3O8 (mp-25572)
ScHO2 (mp-625199)
ScHO2 (mp-1101379)
ScInO6 (mp-1219449)
ScMo3O8 (mp-2214451)
ScIO (mp-559760)
Cs3ScO3 (mp-756505)
ScPaO4 (mp-1219240)


## If accessing a downloaded with MPRester mpr.materials.summary.search() file

In [57]:
with MPRester(ApiKey) as mpr:
  #Please edit features you want to filter from inside the search() command. TAKING ONLY A SUBSET OF VALUES bandgap (1, 1.1)
  example_docs = mpr.materials.summary.search(band_gap=(2, 3))

Retrieving SummaryDoc documents:   0%|          | 0/15745 [00:00<?, ?it/s]



In [58]:
from mp_api.client import MPRester
from pymatgen.core.periodic_table import Element

# Define all transition metals (periodic table blocks d-block, periods 4–7)
transition_metals = [el.symbol for el in Element if el.is_transition_metal]
transition_metals_set = set(transition_metals)

In [62]:
tm_oxides = [
    doc for doc in example_docs
    if "O" in [str(e) for e in doc.elements] and transition_metals_set.intersection(str(e) for e in doc.elements)
]

In [89]:
test = tm_oxides[0:5]

In [107]:
for item in test:
    print(item.elements) # Access the 'elements' attribute of each individual item in the list

[Element Ac, Element Cr, Element O]
[Element Ac, Element Ga, Element O]
[Element Ag, Element B, Element O]
[Element Ag, Element B, Element O]
[Element Ag, Element C, Element Cl, Element N, Element O, Element S]


In [91]:
print(test[:])

[[4m[1mMPDataDoc<SummaryDoc>[0;0m[0;0m(
[1mbuilder_meta[0;0m=EmmetMeta(emmet_version='0.84.3rc4', pymatgen_version='2024.11.13', run_id='13d67e62-d65a-4ffc-8b7f-d2d2e0041e70', batch_id=None, database_version='2025.04.10', build_date=datetime.datetime(2024, 11, 21, 20, 2, 39, 520000), license='BY-C'),
[1mnsites[0;0m=5,
[1melements[0;0m=[Element Ac, Element Cr, Element O],
[1mnelements[0;0m=3,
[1mcomposition[0;0m=Composition('Ac1 Cr1 O3'),
[1mcomposition_reduced[0;0m=Composition('Ac1 Cr1 O3'),
[1mformula_pretty[0;0m='AcCrO3',
[1mformula_anonymous[0;0m='ABC3',
[1mchemsys[0;0m='Ac-Cr-O',
[1mvolume[0;0m=61.36284504026082,
[1mdensity[0;0m=8.848788046728616,
[1mdensity_atomic[0;0m=12.272569008052164,
[1msymmetry[0;0m=SymmetryData(crystal_system=<CrystalSystem.cubic: 'Cubic'>, symbol='Pm-3m', number=221, point_group='m-3m', symprec=0.1, angle_tolerance=5.0, version='2.5.0'),
[1mproperty_name[0;0m='summary',
[1mmaterial_id[0;0m=MPID(mp-866101),
[1mdeprecated[