In [2]:
import os
import pandas as pd
from tqdm import tqdm
from pymatgen.core.structure import Structure
from mp_api.client import MPRester
from emmet.core.summary import HasProps
import json
import signal

  from .autonotebook import tqdm as notebook_tqdm


## Download data from Materials Project Database
The projected Density of States (PDOS) data is publicly available at [Materials Project Database](https://next-gen.materialsproject.org/).
We will access the data using [Materials Project API](https://next-gen.materialsproject.org/api). To download the data we need an API key that can found in your Materials Project account. Please, paste your API key below access the data.

In [3]:
MP_API_KEY = "your_MP_API_key"
mpr = MPRester(MP_API_KEY)

## Provide directories to save structures and PDOS data 

In [4]:
CIF_DIR = "../data/cif_dir"
DOS_DIR = "../data/dos_dir"

if not os.path.exists(CIF_DIR):
    os.makedirs(CIF_DIR)

if not os.path.exists(DOS_DIR):
    os.makedirs(DOS_DIR)

In [5]:
# Utility to handle no response from Materials Project
class TimeoutException(Exception):
    pass

def timeout_handler(signum, frame):
    raise TimeoutException

### Functions to download data

In [6]:
def download_dos_data(dos_dir: str, ids: list[str], t_response: int = 60):
    success_ids = []
    with MPRester(MP_API_KEY) as mpr:
        signal.signal(signal.SIGALRM, timeout_handler)
        for id in tqdm(ids):
            signal.alarm(t_response)
            try:
                dos = mpr.get_dos_by_material_id(id)
            except TimeoutException:
                print(f" Didn't receive response from server in {t_response} s. Skipping {id}")
                continue
            if dos is None:
                print(f" Database returned None for {id} DOS. Skipping this material")
                continue

            dos_dict = dos.as_dict()
            with open(f'{dos_dir}/{id}_dos.json', 'w') as f:
                json.dump(dos_dict, f)
            success_ids.append(id)
        
        df = pd.DataFrame({"id": success_ids})
        df.to_csv(f"{dos_dir}/mpid_downloaded.csv", index=False)

In [11]:
def download_cif_data(cif_dir: str, ids: list[str]):
    success_ids = []
    with MPRester(MP_API_KEY) as mpr:
        docs = mpr.materials.summary.search(material_ids=ids, fields=["material_id", "structure"])
    
    for doc in docs:
        id = doc.material_id
        structure = doc.structure
        structure.to(filename=f"{cif_dir}/{id}.cif")
        
        df = pd.DataFrame({"id": success_ids})
        df.to_csv(f"{cif_dir}/mpid_downloaded.csv", index=False)

## Get all mp-ids that have PDOS data in the Materials Project database

In [8]:
with MPRester(MP_API_KEY) as mpr:
    docs = mpr.summary.search(has_props=[HasProps.dos], 
                              fields=["material_id"])
    
ids = []
for doc in docs:
    ids.append(doc.material_id)

all_ids = pd.DataFrame({"id": ids})
all_ids

  docs = mpr.summary.search(has_props=[HasProps.dos],
Retrieving SummaryDoc documents: 100%|██████████| 88059/88059 [00:22<00:00, 3859.50it/s]


Unnamed: 0,id
0,mp-28967
1,mp-1042447
2,mp-766094
3,mp-546266
4,mp-559295
...,...
88054,mp-561172
88055,mp-27417
88056,mp-984551
88057,mp-766671


## Download data PDOS data

In [None]:
download_dos_data(dos_dir=DOS_DIR, ids=all_ids["id"].tolist())

## Download data CIF data

In [15]:
download_cif_data(cif_dir=CIF_DIR, ids=all_ids["id"].tolist())

Retrieving SummaryDoc documents: 100%|██████████| 3/3 [00:00<00:00, 79638.68it/s]
