# Summary

Check that all results have been calculcated.

### Executing

```bash
export NOTEBOOK_PATH="$(realpath 20_el2_stability.ipynb)"
export DATASET_NAME="elaspic-core-mutation-local"
export ORIGINAL_ARRAY_TASK_COUNT=26
sbatch --export=DATASET_NAME,NOTEBOOK_PATH,ORIGINAL_ARRAY_TASK_COUNT --array=10,15 --ntasks-per-node=48 ../scripts/run_notebook_cpu.sh

export NOTEBOOK_PATH="$(realpath 20_el2_stability.ipynb)"
export DATASET_NAME="uniprot-domain-mutation"
export ORIGINAL_ARRAY_TASK_COUNT=2141
export SLURM_TASK_ID_OFFSET=1000
sbatch --export=DATASET_NAME,NOTEBOOK_PATH,ORIGINAL_ARRAY_TASK_COUNT,SLURM_TASK_ID_OFFSET --array=817,826,827,830,832,849,883 --ntasks-per-node=48 ../scripts/run_notebook_cpu.sh

export NOTEBOOK_PATH="$(realpath 20_el2_affinity.ipynb)"
export DATASET_NAME="elaspic-interface-mutation-local"
export ORIGINAL_ARRAY_TASK_COUNT=9
sbatch --export=DATASET_NAME,NOTEBOOK_PATH,ORIGINAL_ARRAY_TASK_COUNT --array= --ntasks-per-node=40 --mem=0 ../scripts/run_notebook_cpu.sh

export NOTEBOOK_PATH="$(realpath 20_el2_affinity.ipynb)"
export DATASET_NAME="uniprot-domain-pair-mutation"
export ORIGINAL_ARRAY_TASK_COUNT=1358
export SLURM_TASK_ID_OFFSET=900
sbatch --export=DATASET_NAME,NOTEBOOK_PATH,ORIGINAL_ARRAY_TASK_COUNT,SLURM_TASK_ID_OFFSET --array=6,139 ../scripts/run_notebook_cpu.sh

# On Cedar
--ntasks-per-node=48
 
# On Niagara,
--ntasks-per-node=40 --mem=0
```


---

## Imports

In [None]:
import glob
import os
import socket
import tempfile
from pathlib import Path

from tqdm.notebook import tqdm

## Parameters

In [None]:
NOTEBOOK_DIR = Path("20_el2_validation").resolve()
NOTEBOOK_DIR.mkdir(exist_ok=True)

NOTEBOOK_DIR

In [None]:
if "DATAPKG_OUTPUT_DIR" in os.environ:
    OUTPUT_DIR = Path(os.getenv("DATAPKG_OUTPUT_DIR")).joinpath("elaspic2").resolve()
else:
    OUTPUT_DIR = NOTEBOOK_DIR.parent
OUTPUT_DIR.mkdir(exist_ok=True)

OUTPUT_DIR

In [None]:
if (slurm_tmpdir := os.getenv("SLURM_TMPDIR")) is not None:
    os.environ["TMPDIR"] = slurm_tmpdir
    
print(tempfile.gettempdir())

In [None]:
if "scinet" in socket.gethostname():
    CPU_COUNT = 40
else:
    CPU_COUNT = max(1, len(os.sched_getaffinity(0)))

CPU_COUNT = max(1, CPU_COUNT // 2)

CPU_COUNT

In [None]:
# DATASET_NAME = "elaspic-core-mutation-local"  # done
# DATASET_NAME = "uniprot-domain-mutation"  # done
# DATASET_NAME = "elaspic-interface-mutation-local"  # done
DATASET_NAME = "uniprot-domain-pair-mutation"  # done

In [None]:
if any(q in DATASET_NAME for q in ["interface", "pair"]):
    DATASET_DIR = OUTPUT_DIR.joinpath("20_el2_affinity", DATASET_NAME)
else:
    DATASET_DIR = OUTPUT_DIR.joinpath("20_el2_stability", DATASET_NAME)

In [None]:
DATASET_ARRAYS = {
    "elaspic-core-mutation-local": 26,
    "uniprot-domain-mutation": 2141,
    "elaspic-interface-mutation-local": 9,
    "uniprot-domain-pair-mutation": 1358,
}

## Workspace

### Find missing tasks

In [None]:
missing_tasks = []

task_count = DATASET_ARRAYS[DATASET_NAME]
for task_id in range(1, task_count + 1):
    output_file = DATASET_DIR.joinpath(f"{DATASET_NAME}-{task_id:04d}-{task_count:04d}.SUCCESS")
    if not output_file.is_file():
        missing_tasks.append(task_id)
        
print(DATASET_NAME, len(missing_tasks), ",".join((str(i) for i in missing_tasks)))

In [None]:
DATASET_NAME

In [None]:
assert not missing_tasks