## 1. Generate input structures

In [1]:
from ase.build import bulk
from mlip_arena.tasks.eos_alloy.input import generate_alloy_db

No module named 'deepmd'


In [None]:
structure_template = bulk("Al", a=3.6, cubic=True).repeat([2, 2, 2])
elements = ["Fe", "Ni", "Cr"]
generate_alloy_db(structure_template, elements, upload=True)

## 2. Run equation of state benchmark in parallel

In [1]:
from pathlib import Path

import pandas as pd
from dask.distributed import Client
from dask_jobqueue import SLURMCluster
from prefect_dask import DaskTaskRunner

from mlip_arena.models import REGISTRY
from mlip_arena.tasks.eos_alloy.flow import run_from_db


No module named 'deepmd'


In [None]:
# Orchestrate workflow runner

nodes_per_alloc = 1
gpus_per_alloc = 4
ntasks = 1

cluster_kwargs = dict(
    cores=1,
    memory="64 GB",
    shebang="#!/bin/bash",
    account="m3828",
    walltime="00:30:00",
    job_mem="0",
    job_script_prologue=[
        "source ~/.bashrc",
        "module load python",
        "source activate /pscratch/sd/c/cyrusyc/.conda/mlip-arena",
    ],
    job_directives_skip=["-n", "--cpus-per-task", "-J"],
    job_extra_directives=[
        "-J eos",
        "-q debug",
        f"-N {nodes_per_alloc}",
        "-C cpu",
        # f"-G {gpus_per_alloc}",
    ],
)

cluster = SLURMCluster(**cluster_kwargs)
print(cluster.job_script())
cluster.adapt(minimum_jobs=2, maximum_jobs=2)
client = Client(cluster)

run_from_db_ = run_from_db.with_options(
    task_runner=DaskTaskRunner(address=client.scheduler.address),
    log_prints=True,
)

# Run the workflow

results = run_from_db_(
    db_path="sqs_Fe-Ni-Cr.db", out_path="eos.h5", table_name="Fe-Ni-Cr"
)

#!/bin/bash

#SBATCH -A m3828
#SBATCH --mem=0
#SBATCH -t 00:30:00
#SBATCH -J eos
#SBATCH -q debug
#SBATCH -N 1
#SBATCH -C cpu
source ~/.bashrc
module load python
source activate /pscratch/sd/c/cyrusyc/.conda/mlip-arena
/pscratch/sd/c/cyrusyc/.conda/mlip-arena/bin/python -m distributed.cli.dask_worker tcp://128.55.64.40:37453 --name dummy-name --nthreads 1 --memory-limit 59.60GiB --nanny --death-timeout 60



In [5]:

def postprocess(output, model: str, formula: str):
    row = {
        "formula": formula,
        "method": model,
        "volumes": output["eos"]["volumes"],
        "energies": output["eos"]["energies"],
        "K": output["K"],
    }

    fpath = Path(REGISTRY[model]["family"]) / f"{model}.parquet"

    if not fpath.exists():
        fpath.parent.mkdir(parents=True, exist_ok=True)
        df = pd.DataFrame([row])  # Convert the dictionary to a DataFrame with a list
    else:
        df = pd.read_parquet(fpath)
        new_row = pd.DataFrame([row])  # Convert dictionary to DataFrame with a list
        df = pd.concat([df, new_row], ignore_index=True)

    df.drop_duplicates(subset=["formula", "method"], keep="last", inplace=True)
    df.to_parquet(fpath)

