In [1]:
from mp_api.client import MPRester

key = "qx3M9lC8cMaGU3gp0ZpKSPor69SxkCU9"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import plotly.graph_objects as go
import pandas as pd
import time
from numpy import mean

### Utilizando API para coletar dados de todos os materiais

In [3]:
with MPRester(key) as mpr:
    # material = "mp-20138"
    # dossi = mpr.get_dos_by_material_id("mp-149")
    # dos = mpr.get_dos_by_material_id(material)
    # fp = dos.get_dos_fp()

    ms = mpr.summary.search()

  ms = mpr.summary.search()
Retrieving SummaryDoc documents: 100%|██████████| 154718/154718 [11:18<00:00, 227.92it/s]


### Funções para encontrar as densidades

In [4]:
def find_zero(df):
    def interpol(dfi: pd.DataFrame):
        dfi.reset_index(drop=True, inplace=True)
        return -(
            dfi.energies[0] * dfi.densities[1] - dfi.energies[1] * dfi.densities[0]
        ) / (dfi.energies[1] - dfi.energies[0])

    serie = (
        df.reset_index(drop=True).energies.shift(1) * df.reset_index(drop=True).energies
    )
    turn_point = serie[serie < 0].index[0]

    return interpol(df[turn_point - 1 : turn_point + 1])

In [5]:
# Encontrar DOS na Efermi para cada elemento
def get_dos_at_efermi(material_id: str) -> pd.DataFrame:
    "Encontra "
    dos = mpr.get_dos_by_material_id(material_id)
    ed = dos.get_element_dos()

    element_values = {}
    for element in ed:
        e_dos = ed[element]
        e_df = pd.DataFrame(
            {
                "energies": e_dos.energies - e_dos.efermi,
                "densities": e_dos.get_densities(),
            }
        )
        tdt = e_df.query(f"energies<{dos.efermi/2} & energies>{-dos.efermi/2}")
        element_values[str(element)] = find_zero(tdt)

    dg = pd.DataFrame(
        {"energies": dos.energies - dos.efermi, "densities": dos.get_densities()}
    )
    fdos = dg.query(f"energies<{dos.efermi/2} & energies>{-dos.efermi/2}")

    return find_zero(fdos), element_values

In [None]:
def to_sql_list(itens: list) -> str:
    "Converte uma lista Python em uma lista formatada para consultas SQL."

    def _convert(x):
        if isinstance(x, str):
            x = x.replace("'", "''")
            return f"'{x}'"
        if isna(x):
            return "NULL"
        elif isinstance(x, date):
            return f"'{x}'"
        else:
            return str(x)

    return f'({",".join(map(_convert, itens))})'


### Coletando dados

In [9]:
li = []
t1 = time.time()
for mms in ms:
    if time.time() > (t1 + 60):
        break

    element_coords = {}
    for coords in mms.structure.sites:
        element_coords[str(coords.specie)] = list(coords.__dict__["_frac_coords"])

    try:
        all_dos = get_dos_at_efermi(str(mms.material_id))
    except:
        all_dos = [None, None]

    li.append(
        [
            int(str(mms.material_id)[3:]),
            mms.nelements,
            str(mms.composition),
            mms.formula_pretty,
            mms.volume,
            mms.density,
            mms.density_atomic,
            str(mms.symmetry.crystal_system),
            mms.symmetry.symbol,
            mms.symmetry.number,
            str(mms.material_id),
            mms.is_stable,
            mms.is_magnetic,
            mms.is_metal,
            mms.is_gap_direct,
            mms.energy_per_atom,
            mms.efermi,
            mms.total_magnetization,
            mms.last_updated,
            mms.deprecated,
            {
                "abc": list(mms.structure.lattice.abc),
                "angles": list(mms.structure.lattice.angles),
            },
            element_coords,
            all_dos[0],
            all_dos[1],
        ]
    )

all_mp = (
    pd.DataFrame(
        li,
        columns=[
            "id",
            "n_elements",
            "composition",
            "formula",
            "volume",
            "density",
            "atomic_density",
            "symetry",
            "symetry_symbol",
            "symetry_number",
            "material_id",
            "is_stable",
            "is_magnetic",
            "is_metal",
            "is_gap_direct",
            "energy_per_atom",
            "efermi",
            "total_magnetization",
            "last_updated",
            "deprecated",
            "lattice_structure",
            "element_coords",
            "dos_at_efermi",
            "elements_dos_at_efermi",
        ],
    )
    .sort_values("id")
    .set_index("id")
)

Retrieving ElectronicStructureDoc documents: 100%|██████████| 1/1 [00:00<?, ?it/s]
Retrieving ElectronicStructureDoc documents: 100%|██████████| 1/1 [00:00<?, ?it/s]
Retrieving ElectronicStructureDoc documents: 100%|██████████| 1/1 [00:00<?, ?it/s]
Retrieving ElectronicStructureDoc documents: 100%|██████████| 1/1 [00:00<?, ?it/s]
Retrieving ElectronicStructureDoc documents: 100%|██████████| 1/1 [00:00<?, ?it/s]
Retrieving ElectronicStructureDoc documents: 100%|██████████| 1/1 [00:00<?, ?it/s]
Retrieving ElectronicStructureDoc documents: 100%|██████████| 1/1 [00:00<?, ?it/s]
Retrieving ElectronicStructureDoc documents: 100%|██████████| 1/1 [00:00<?, ?it/s]
Retrieving ElectronicStructureDoc documents: 100%|██████████| 1/1 [00:00<?, ?it/s]
Retrieving ElectronicStructureDoc documents: 100%|██████████| 1/1 [00:00<?, ?it/s]
Retrieving ElectronicStructureDoc documents: 100%|██████████| 1/1 [00:00<?, ?it/s]
Retrieving ElectronicStructureDoc documents: 100%|██████████| 1/1 [00:00<?, ?it/s]
Retr

In [7]:
len(ms) / len(all_mp) / 60 ## Tempo esperado de demora para upload (em horas)

36.318779342723005

In [11]:
all2 = all_mp[
    [
        "material_id",
        "formula",
        "n_elements",
        "composition",
        "volume",
        "density",
        "atomic_density",
        "symetry",
        "symetry_symbol",
        "symetry_number",
        "is_stable",
        "is_magnetic",
        "is_metal",
        "is_gap_direct",
        "energy_per_atom",
        "efermi",
        "total_magnetization",
        "lattice_structure",
        "element_coords",
        "dos_at_efermi",
        "elements_dos_at_efermi",
        "last_updated",
        "deprecated",
    ]
]

### Gráficos

In [None]:
dg = pd.DataFrame(
    {"energies": dos.energies - dos.efermi, "densities": dos.get_densities()}
)

In [None]:
fdos = dg.query(f"energies<{dos.efermi/2} & energies>{-dos.efermi/2}")

In [None]:
go.Figure(
    [
        # go.Scatter(x=data.energies, y=data.densities*20),
        go.Scatter(x=fdos.energies, y=fdos.densities),
    ]
    + plots
)

### Criando banco de dados

In [None]:
all_mp

In [10]:
schema = """
create table materials (
    id int primary key,
    material_id text not null,
    formula text,
    n_elements int,
    composition text,
    volume real,
    density real,
    atomic_density real,
    symetry text,
    symetry_symbol text,
    symetry_number int,
    is_stable boolean,
    is_magnetic boolean,
    is_metal boolean,
    is_gap_direct boolean,
    energy_per_atom real,
    efermi real,
    total_magnetization real,
    lattice_structure json,
    element_coords json,
    dos_at_efermi real,
    elements_dos_at_efermi json,
    last_updated datetime,
    deprecated boolean
)
"""

In [15]:
all2.element_coords[4681]

{'Na': [0.23727301, 0.7877307, -0.0],
 'Nb': [0.74305054, 0.77118186, 0.74917761],
 'O': [0.67688228, 0.82421774, 0.5]}

In [None]:
import sqlite3
import os


def check_db(filename):
    return os.path.exists(filename)


db_file = "mp_database.db"

if check_db(db_file):
    print("Database already exists. Exiting...")
    exit(0)


with sqlite3.connect(db_file) as conn:
    print("Created the connection!")
    # Execute the SQL query to create the table
    conn.executescript(schema)
    print("Created the Table! Now inserting")
    conn.executescript(
        """
                       insert into images (name, size, date)
                       values
                       {txt}
                       """
    )
    print("Inserted values into the table!")
print("Closed the connection!")