# Create SLURM scripts to extract new depth recordings using latest `moseq2-app`

In [1]:
import os
import h5py
from toolz import concat, compose, curry
from ruamel.yaml import YAML
from tqdm.auto import tqdm
from aging.organization.paths import FOLDERS

In [2]:
user = os.environ['USER']

In [3]:
script = '''#!/bin/env bash
#SBATCH -c 1
#SBATCH -n 1
#SBATCH --mem=10G
#SBATCH -p short
#SBATCH -t 00:40:00
#SBATCH --output=/n/scratch3/users/{user_pth}/tmp/ontogeny/depth-extraction-%j.out

source $HOME/.bashrc
conda activate moseq2-app
moseq2-extract extract "{file_path}" --config-file "/n/groups/datta/win/longtogeny/data/extractions/config.yaml"
'''

In [4]:
def not_extracted(file):
    if file.name.endswith('filepart'):
        return False

    if extracted := (file.parent / "proc" / "results_00.h5").exists():
        try:
            with h5py.File(file.parent / "proc" / "results_00.h5", "r") as h5f:
                list(h5f)
        except Exception as e:
            print(e)
            return True
        with open(file.parent / "proc" / "results_00.yaml", "r") as conf_f:
            yaml = YAML(typ='safe', pure=True)
            config = yaml.load(conf_f)
        extracted = config["complete"]
    # TODO: make sure extraction is newer than 5/30/2023
    return not extracted


def no_depth_doubles(file):
    return not (file.name.endswith("avi") and file.with_suffix(".dat").exists())


def multi_filter(*filters, seq):
    return compose(*(curry(filter)(f) for f in filters))(seq)

In [5]:
# first round, re-extract everything using my moseq config file
# second round, only extract non-extracted data or incomplete extractions
# files = list(concat(f.glob('**/depth.*') for f in folders))

In [6]:
files = multi_filter(not_extracted, no_depth_doubles, seq=concat(f.glob('**/depth.*') for f in FOLDERS))

for f in tqdm(files):
    # skip avi files that have a dat copy - meaning just use dat copy for extractions
    print('extracting:', f)
    new_script = script.format(user_pth=f"{user[0]}/{user}", file_path=str(f.absolute()))
    with open("tmp.sh", "w") as f:
        f.write(new_script)

    !sbatch tmp.sh
!rm tmp.sh

0it [00:00, ?it/s]

extracting: /n/groups/datta/min/longtogeny_052023/Females/session_20230630192304/depth.avi
Submitted batch job 23558109
extracting: /n/groups/datta/min/longtogeny_052023/Females/session_20230602114950/depth.avi
Submitted batch job 23558110
extracting: /n/groups/datta/min/longtogeny_052023/Females/session_20230602125117/depth.avi
Submitted batch job 23558111
extracting: /n/groups/datta/min/longtogeny_052023/Females/session_20230915135502/depth.dat
Submitted batch job 23558112
extracting: /n/groups/datta/min/longtogeny_052023/Females/session_20230716173934/depth.avi
Submitted batch job 23558113
extracting: /n/groups/datta/min/longtogeny_052023/Females/session_20230519125454/depth.avi
Submitted batch job 23558114
extracting: /n/groups/datta/min/longtogeny_052023/Females/session_20230618164833/depth.avi
Submitted batch job 23558115
extracting: /n/groups/datta/min/longtogeny_052023/Females/session_20230716155232/depth.avi
Submitted batch job 23558116
extracting: /n/groups/datta/min/longtoge