# Materials Project API Tutorial

We will be using the application programming interface (API) of Materials Project to gain data-driven insights about trends in materials.

If you are planning to use the API in the future, please create a free account on the MP website and go to https://next-gen.materialsproject.org/dashboard and look up your personal key in the **API keys** section.

In [None]:
from mp_api.client import MPRester
from emmet.core.symmetry import CrystalSystem
from emmet.core.mpid import MPID
import matplotlib.pyplot as plt
from pymatviz.widgets.structure import StructureWidget
import pandas as pd

import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))  # adds parent directory to path to find utils scripts
from utils.plot_functions import display_structure, plot_periodic_table_heatmap

### Specify your API Key

In [None]:
api_key = 'YOURAPIKEY'

## Fetching Specific Crystal Structures

Firstly, let us pull the cubic ZnS structure from before by specifying the formula and crystal system.

In [None]:
with MPRester(api_key) as mpr:
    ZnS = mpr.summary.search(formula='ZnS', crystal_system=CrystalSystem('Cubic'),
                             fields=['material_id', 'structure'])
ZnS

As we pulled the structure information, we can use it directly in pymatgen to work with it. Let's display it!

In [None]:
ZnS_struct = ZnS[0].structure

fig, ax = plt.subplots(tight_layout=True, figsize=(8,8))
display_structure(ZnS_struct, ax, miller_index=[-1,0,0], repeat=[1,1,1])
plt.show()
# StructureWidget(ZnS_struct)

Using the `material_id` (often referred to as *mpid*) we can pull the DFT-calculated band structure from the MP database.

In [None]:
from pymatgen.electronic_structure.plotter import BSDOSPlotter

ZnS_mpid = str(ZnS[0].material_id)
with MPRester(api_key) as m:
    bs = m.get_bandstructure_by_material_id(MPID(ZnS_mpid))

BSDOSPlotter(fig_size=(10,6)).get_plot(bs)

With `MPRester` we can search the MP database and pull various properties (`fields`) of materials that fullfill criteria that we can specify (*e.g.*, `theoretical=False` filters for materials that have been experimentally verified/synthesized). A list of the fields that can be requested is summarized [here](https://api.materialsproject.org/docs#/Materials%20Summary).

In [None]:
with MPRester(api_key) as m:
    data = mpr.materials.summary.search(theoretical=False, energy_above_hull=(0, 0.02),
                                        fields=['material_id', 'nsites', 'nelements',
                                                'chemsys', 'band_gap', 'symmetry'])

The next code block restructures the requested data into a Pandas DataFrame and adds a column with the crystal system information (may take ~40 seconds to run).

In [None]:
flattened = [{k: v for k, v in dict(d).items() if v is not None} for d in data]
mpids = pd.DataFrame.from_records(flattened)
mpids['symmetry.crystal_system'] = mpids['symmetry'].apply(lambda x: str(x.crystal_system).lower())
mpids = mpids.dropna(axis=1, how='all')
mpids = mpids.drop(columns=['symmetry', 'fields_not_requested'])
# mpids.to_csv('mp_data.csv')
# mpids.to_json('mp_data.json', orient='records')

First, let's have a look at the distribution of materials for the different crystal systems:

In [None]:
print(mpids.dtypes)
# mpids = pd.read_csv('mp_data.csv', index_col = 0)  # Use this in case data cannot be requested from MP

fig, ax = plt.subplots(tight_layout=True, figsize=(10,6))
n_materials = mpids.loc[mpids['nelements'] > 0, 'symmetry.crystal_system'].value_counts()\
                    .reindex(['triclinic', 'monoclinic', 'orthorhombic', 'tetragonal', 'trigonal', 'hexagonal', 'cubic'])

n_materials.plot(kind='bar', ax=ax, fontsize=16, legend=False)
ax.set_ylabel('Number of materials', fontsize=16)
plt.show()

And the distribution of high-symmetry crystal systems containing different chemical elements:

In [None]:
crys_sys = ['cubic', 'hexagonal', 'trigonal', 'tetragonal']

elemental_data_all = {}
elemental_data_cs = {}

for i in mpids.index:
    elements = str(mpids.loc[i, 'chemsys']).split('-')
    for el in elements:
        if el in elemental_data_all:
            elemental_data_all[el] += 1
        else:
            elemental_data_all[el] = 1
    if mpids.loc[i, 'symmetry.crystal_system'] in crys_sys:
      for el in elements:
        if el in elemental_data_cs:
            elemental_data_cs[el] += 1
        else:
            elemental_data_cs[el] = 1
elemental_data_ratio = {x: 100* elemental_data_cs[x] / elemental_data_all[x]
                        for x in elemental_data_all if x in elemental_data_cs}

plt = plot_periodic_table_heatmap(elemental_data_ratio, cbar_label="Ratio (%)",
                                  cbar_label_size=16, show_plot=True,
                                  cmap="YlOrRd", cmap_range=[20,70],
                                  value_format='%d')

Next, we can analyze the average bandgap for materials in each of the crystal systems

In [None]:
# crystal_systems = set(mpids['symmetry.crystal_system'].tolist())
crystal_systems = ['triclinic', 'monoclinic', 'orthorhombic', 'tetragonal',
                   'trigonal', 'hexagonal', 'cubic']
average_bandgaps = pd.DataFrame(columns=['crystal_system', 'bandgap_average'])
average_bandgaps.loc[:, 'crystal_system'] = list(crystal_systems)

for i, cs in enumerate(crystal_systems):
  average_bandgaps.loc[i, 'bandgap_average'] = \
    mpids.loc[mpids['symmetry.crystal_system'] == cs, 'band_gap'].mean()

fig, ax = plt.subplots(tight_layout=True, figsize=(10,6))
average_bandgaps.plot(kind='bar', ax=ax, fontsize=16, legend=False)
ax.set_ylabel('Mean bandgap (eV)', fontsize=16)
ax.set_xticklabels(crystal_systems)
plt.show()


What is maybe even more interesting: How many percent of materials in each crystal system is metallic (*i.e.* $E_\mathrm{gap}=0$)?

In [None]:
metal_semiconductor = pd.DataFrame(columns=['crystal_system', 'n_metallic', 'n_total', 'metallic_fraction'])
metal_semiconductor.loc[:,'crystal_system'] = crystal_systems

for i, cs in enumerate(crystal_systems):
    metal_semiconductor.loc[i, 'n_metallic'] = \
      mpids.loc[(mpids['symmetry.crystal_system'] == cs) & (mpids['band_gap'] == 0), 'band_gap'].count()
    metal_semiconductor.loc[i, 'n_total'] = \
      mpids.loc[(mpids['symmetry.crystal_system'] == cs), 'band_gap'].count()
for i in metal_semiconductor.index:
    metal_semiconductor.loc[i, 'metallic_fraction'] = \
      100 * metal_semiconductor.loc[i, 'n_metallic'] / metal_semiconductor.loc[i, 'n_total']

fig, ax = plt.subplots(tight_layout=True, figsize=(10,6))
metal_semiconductor['metallic_fraction'].plot(kind='bar', ax=ax, fontsize=16, legend=False)
ax.set_ylabel('Fraction of metallic materials (%)', fontsize=16)
ax.set_ylim(0.0, 100.0)
ax.set_xticklabels(crystal_systems)
plt.show()

## Exercise 9.1
Plot the average bandgap based on which element is present in the crystal and pick a subset of crystals based on number of elements `n_elements` (e.g. binary, ternary,...) and/or crystal systems `crys_sys`. Change the two variables and observe the trends.

In [None]:
from statistics import mean
crys_sys_exercise = ['triclinic', 'monoclinic', 'orthorhombic', 'tetragonal', 'trigonal', 'hexagonal', 'cubic']
n_elements = 3

bandgap_data = {}

for i in mpids.index:
    if mpids.loc[i, 'nelements'] <= n_elements and mpids.loc[i, 'symmetry.crystal_system'] in crys_sys_exercise:
      elements = str(mpids.loc[i, 'chemsys']).split('-')
      for el in elements:
          if el in bandgap_data:
              bandgap_data[el].append(mpids.loc[i, 'band_gap'])
          else:
              bandgap_data.setdefault(el,[]).append(mpids.loc[i, 'band_gap'])
bandgap_data_averages = {x: mean(bandgap_data[x]) for x in bandgap_data}

plt = plot_periodic_table_heatmap(bandgap_data_averages,
                                  cbar_label="Average bandgap (eV)",
                                  cbar_label_size=16, show_plot=True,
                                  cmap="YlOrRd", cmap_range=[0,2],
                                  value_format='%.1f')

## Exercise 9.2
Now plot the distribution of materials for all crystal systems based on how many elements are present in the crystal (i.e. binary, ternary,...). As you increase `n_elements`, where does the distribution shift?

In [None]:
n_elements = 1

crys_sys_nelements = pd.DataFrame(columns=['crystal_system', 'n', 'fraction'])
crys_sys_nelements.loc[:, 'crystal_system'] = crystal_systems

for i, cs in enumerate(crystal_systems):
    crys_sys_nelements.loc[i, 'n'] = \
      mpids.loc[(mpids['symmetry.crystal_system'] == cs) & (mpids['nelements'] == n_elements), 'nelements'].count()
for i in crys_sys_nelements.index:
    crys_sys_nelements.loc[i, 'fraction'] = \
      100 * crys_sys_nelements.loc[i, 'n'] / crys_sys_nelements['n'].sum()

fig, ax = plt.subplots(tight_layout=True, figsize=(10,6))
crys_sys_nelements['fraction'].plot(kind='bar', ax=ax, fontsize=16, legend=False)
ax.set_ylabel(f'Fraction of materials with {n_elements:d} element{"s" if n_elements>1 else ""} (%)', fontsize=15)
ax.set_ylim(0, 100)
ax.set_xticklabels(crystal_systems)
plt.show()