# Create SLURM scripts to extract new depth recordings using latest `moseq2-app`

In [2]:
import os
from pathlib import Path
from toolz import concat
from ruamel import yaml
from tqdm.auto import tqdm

In [3]:
folders = [
    '/n/groups/datta/Dana/Ontogeny/raw_data/Ontogeny_females',
    '/n/groups/datta/Dana/Ontogeny/raw_data/Ontogeny_males',
    '/n/groups/datta/Dana/Ontogeny/raw_data/longtogeny_pre_unet/Females',
    '/n/groups/datta/Dana/Ontogeny/raw_data/longtogeny_pre_unet/Males',
    '/n/groups/datta/min/dominance_v1',
    '/n/groups/datta/min/community_v1',
    '/n/groups/datta/min/wheel_062023',
    '/n/groups/datta/min/cas_behavior_01',
    '/n/groups/datta/min/sham_behavior_01'
]
folders = [Path(f) for f in folders]

In [4]:
user = os.environ['USER']

In [5]:
script = '''#!/bin/env bash
#SBATCH -c 1
#SBATCH -n 1
#SBATCH --mem=10G
#SBATCH -p short
#SBATCH -t 00:35:00
#SBATCH --output=/n/scratch3/users/{user_pth}/tmp/ontogeny/depth-extraction-%j.out

source $HOME/.bashrc
conda activate moseq2-app
moseq2-extract extract "{file_path}" --config-file "/n/groups/datta/win/longtogeny/data/extractions/config.yaml"
'''

In [6]:
def not_extracted(file):
    if file.name.endswith('filepart'):
        return False

    if extracted := (file.parent / "proc" / "results_00.h5").exists():
        with open(file.parent / "proc" / "results_00.yaml", "r") as conf_f:
            config = yaml.safe_load(conf_f)
        extracted = config["complete"]
    # TODO: make sure extraction is newer than 5/30/2023
    return not extracted

In [7]:
# first round, re-extract everything using my moseq config file
# second round, only extract non-extracted data or incomplete extractions
files = sorted(filter(not_extracted, concat(f.glob('**/depth.*') for f in folders)))
# files = list(concat(f.glob('**/depth.*') for f in folders))

In [8]:
len(files)

921

In [None]:
files

In [9]:
for f in tqdm(files):
    # skip dat files that have an avi copy
    if f.name.endswith("dat") and f.with_suffix(".avi").exists():
        continue
    new_script = script.format(user_pth=f"{user[0]}/{user}", file_path=str(f.absolute()))
    with open("tmp.sh", "w") as f:
        f.write(new_script)

    !sbatch tmp.sh
!rm tmp.sh

  0%|          | 0/921 [00:00<?, ?it/s]

Submitted batch job 11088010
Submitted batch job 11088011
Submitted batch job 11088012
Submitted batch job 11088013
Submitted batch job 11088014
Submitted batch job 11088015
Submitted batch job 11088016
Submitted batch job 11088017
Submitted batch job 11088018
Submitted batch job 11088019
Submitted batch job 11088021
Submitted batch job 11088022
Submitted batch job 11088023
Submitted batch job 11088024
Submitted batch job 11088025
Submitted batch job 11088026
Submitted batch job 11088027
Submitted batch job 11088028
Submitted batch job 11088029
Submitted batch job 11088030
Submitted batch job 11088031
Submitted batch job 11088032
Submitted batch job 11088033
Submitted batch job 11088034
Submitted batch job 11088035
Submitted batch job 11088036
Submitted batch job 11088037
Submitted batch job 11088038
Submitted batch job 11088039
Submitted batch job 11088040
Submitted batch job 11088041
Submitted batch job 11088042
Submitted batch job 11088043
Submitted batch job 11088045
Submitted batc