# Create SLURM scripts to extract new depth recordings using latest `moseq2-app`

In [1]:
import os
from pathlib import Path
from toolz import concat
from ruamel import yaml
from tqdm.auto import tqdm

In [2]:
folders = [
    '/n/groups/datta/Dana/Ontogeny/raw_data/Ontogeny_females',
    '/n/groups/datta/Dana/Ontogeny/raw_data/Ontogeny_males',
    '/n/groups/datta/Dana/Ontogeny/raw_data/longtogeny_pre_unet/Females',
    '/n/groups/datta/Dana/Ontogeny/raw_data/longtogeny_pre_unet/Males',
    '/n/groups/datta/min/dominance_v1',
    '/n/groups/datta/min/community_v1',
    '/n/groups/datta/min/wheel_062023',
    '/n/groups/datta/min/cas_behavior_01',
]
folders = [Path(f) for f in folders]

In [4]:
user = os.environ['USER']

In [5]:
script = '''#!/bin/env bash
#SBATCH -c 1
#SBATCH -n 1
#SBATCH --mem=10G
#SBATCH -p short
#SBATCH -t 00:35:00
#SBATCH --output=/n/scratch3/users/{user_pth}/tmp/ontogeny/depth-extraction-%j.out

source $HOME/.bashrc
conda activate moseq2-app
moseq2-extract extract "{file_path}" --config-file "/n/groups/datta/win/longtogeny/data/extractions/config.yaml"
'''

In [6]:
def not_extracted(file):
    if file.name.endswith('filepart'):
        return False

    if extracted := (file.parent / "proc" / "results_00.h5").exists():
        with open(file.parent / "proc" / "results_00.yaml", "r") as conf_f:
            config = yaml.safe_load(conf_f)
        extracted = config["complete"]
    # TODO: make sure extraction is newer than 5/30/2023
    return not extracted

In [7]:
# first round, re-extract everything using my moseq config file
# second round, only extract non-extracted data or incomplete extractions
files = sorted(filter(not_extracted, concat(f.glob('**/depth.*') for f in folders)))
# files = list(concat(f.glob('**/depth.*') for f in folders))

In [8]:
len(files)

448

In [7]:
files

[PosixPath('/n/groups/datta/Dana/Ontogeny/raw_data/Ontogeny_females/9months/session_20230609135625/depth.dat'),
 PosixPath('/n/groups/datta/Dana/Ontogeny/raw_data/Ontogeny_females/9months/session_20230609135634/depth.dat'),
 PosixPath('/n/groups/datta/Dana/Ontogeny/raw_data/Ontogeny_females/9months/session_20230609135644/depth.dat'),
 PosixPath('/n/groups/datta/Dana/Ontogeny/raw_data/Ontogeny_females/9months/session_20230609135654/depth.dat'),
 PosixPath('/n/groups/datta/Dana/Ontogeny/raw_data/Ontogeny_females/9months/session_20230609151844/depth.dat'),
 PosixPath('/n/groups/datta/Dana/Ontogeny/raw_data/Ontogeny_females/9months/session_20230609151857/depth.dat'),
 PosixPath('/n/groups/datta/Dana/Ontogeny/raw_data/Ontogeny_females/9months/session_20230609151908/depth.dat'),
 PosixPath('/n/groups/datta/Dana/Ontogeny/raw_data/Ontogeny_females/9months/session_20230609151954/depth.dat'),
 PosixPath('/n/groups/datta/Dana/Ontogeny/raw_data/Ontogeny_females/9months/session_20230609160244/depth

In [8]:
for f in tqdm(files):
    # skip dat files that have an avi copy
    if f.name.endswith("dat") and f.with_suffix(".avi").exists():
        continue
    new_script = script.format(user_pth=f"{user[0]}/{user}", file_path=str(f.absolute()))
    with open("tmp.sh", "w") as f:
        f.write(new_script)

    !sbatch tmp.sh
!rm tmp.sh

  0%|          | 0/97 [00:00<?, ?it/s]

Submitted batch job 10713510
Submitted batch job 10713511
Submitted batch job 10713512
Submitted batch job 10713513
Submitted batch job 10713514
Submitted batch job 10713515
Submitted batch job 10713516
Submitted batch job 10713517
Submitted batch job 10713518
Submitted batch job 10713519
Submitted batch job 10713520
Submitted batch job 10713521
Submitted batch job 10713522
Submitted batch job 10713523
Submitted batch job 10713524
Submitted batch job 10713525
Submitted batch job 10713526
Submitted batch job 10713527
Submitted batch job 10713528
Submitted batch job 10713529
Submitted batch job 10713530
Submitted batch job 10713531
Submitted batch job 10713532
Submitted batch job 10713533
Submitted batch job 10713534
Submitted batch job 10713535
Submitted batch job 10713536
Submitted batch job 10713537
Submitted batch job 10713538
Submitted batch job 10713539
Submitted batch job 10713540
Submitted batch job 10713541
Submitted batch job 10713542
Submitted batch job 10713543
Submitted batc