# Create SLURM scripts to extract new depth recordings using latest `moseq2-app`

In [1]:
import os
import h5py
from pathlib import Path
from toolz import concat, compose, curry
from ruamel import yaml
from tqdm.auto import tqdm
from aging.organization.paths import FOLDERS

In [2]:
user = os.environ['USER']

In [3]:
script = '''#!/bin/env bash
#SBATCH -c 1
#SBATCH -n 1
#SBATCH --mem=10G
#SBATCH -p short
#SBATCH -t 00:40:00
#SBATCH --output=/n/scratch3/users/{user_pth}/tmp/ontogeny/depth-extraction-%j.out

source $HOME/.bashrc
conda activate moseq2-app
moseq2-extract extract "{file_path}" --config-file "/n/groups/datta/win/longtogeny/data/extractions/config.yaml"
'''

In [4]:
def not_extracted(file):
    if file.name.endswith('filepart'):
        return False

    if extracted := (file.parent / "proc" / "results_00.h5").exists():
        try:
            with h5py.File(file.parent / "proc" / "results_00.h5", "r") as h5f:
                list(h5f)
        except Exception as e:
            print(e)
            return True
        with open(file.parent / "proc" / "results_00.yaml", "r") as conf_f:
            config = yaml.safe_load(conf_f)
        extracted = config["complete"]
    # TODO: make sure extraction is newer than 5/30/2023
    return not extracted


def no_depth_doubles(file):
    return not (file.name.endswith("avi") and file.with_suffix(".dat").exists())


def multi_filter(*filters, seq):
    return compose(*(curry(filter)(f) for f in filters))(seq)

In [5]:
# first round, re-extract everything using my moseq config file
# second round, only extract non-extracted data or incomplete extractions
# files = list(concat(f.glob('**/depth.*') for f in folders))

In [6]:
files = multi_filter(not_extracted, no_depth_doubles, seq=concat(f.glob('**/depth.*') for f in FOLDERS))

for f in tqdm(files):
    # skip avi files that have a dat copy - meaning just use dat copy for extractions
    new_script = script.format(user_pth=f"{user[0]}/{user}", file_path=str(f.absolute()))
    with open("tmp.sh", "w") as f:
        f.write(new_script)

    !sbatch tmp.sh
!rm tmp.sh

0it [00:00, ?it/s]

Submitted batch job 14922008
Submitted batch job 14922009
Submitted batch job 14922010
Submitted batch job 14922011
Unable to open file (bad object header version number)
Submitted batch job 14922013
Unable to open file (bad object header version number)
Submitted batch job 14922016
Unable to open file (bad object header version number)
Submitted batch job 14922018
Unable to open file (bad object header version number)
Submitted batch job 14922019
Unable to open file (bad object header version number)
Submitted batch job 14922020
Unable to open file (bad object header version number)
Submitted batch job 14922021
Unable to open file (bad object header version number)
Submitted batch job 14922022
Unable to open file (bad object header version number)
Submitted batch job 14922023
Submitted batch job 14922043
Submitted batch job 14922065
Submitted batch job 14922075
Submitted batch job 14922076
Submitted batch job 14922079
Submitted batch job 14922080
Submitted batch job 14922081
Submitted