# Fix extractions for the older mice in the ontogeny dataset

In [1]:
import os
import h5py
from pathlib import Path
from toolz import concat, compose, curry
from ruamel import yaml
from tqdm.auto import tqdm
from aging.organization.paths import FOLDERS

In [2]:
user = os.environ['USER']

In [13]:
script = '''#!/bin/env bash
#SBATCH -c 1
#SBATCH -n 1
#SBATCH --mem=10G
#SBATCH -p short
#SBATCH -t 00:40:00
#SBATCH --output=/n/scratch3/users/{user_pth}/tmp/ontogeny/depth-extraction-%j.out

source $HOME/.bashrc
conda activate moseq2-app
moseq2-extract extract "{file_path}" --config-file "/n/groups/datta/win/longtogeny/data/extractions/old_mouse_config.yaml" --output-dir "proc_cleaned"
'''

In [4]:
def not_extracted(file):
    if file.name.endswith('filepart'):
        return False

    if extracted := (file.parent / "proc" / "results_00.h5").exists():
        try:
            with h5py.File(file.parent / "proc" / "results_00.h5", "r") as h5f:
                list(h5f)
        except Exception as e:
            print(e)
            return True
        with open(file.parent / "proc" / "results_00.yaml", "r") as conf_f:
            config = yaml.safe_load(conf_f)
        extracted = config["complete"]
    # TODO: make sure extraction is newer than 5/30/2023
    return not extracted


def no_depth_doubles(file):
    return not (file.name.endswith("avi") and file.with_suffix(".dat").exists())


def multi_filter(*filters, seq):
    return compose(*(curry(filter)(f) for f in filters))(seq)

In [5]:
# first round, re-extract everything using my moseq config file
# second round, only extract non-extracted data or incomplete extractions
# files = list(concat(f.glob('**/depth.*') for f in folders))

In [9]:
# step 1: ontogeny male files
files = sorted(multi_filter(no_depth_doubles, seq=concat(f.glob('**/depth.*') for f in FOLDERS[1:2])))

In [11]:
month_18 = [f for f in files if '18mon' in str(f)]

In [14]:
for f in tqdm(month_18):
    new_script = script.format(
        user_pth=f"{user[0]}/{user}", file_path=str(f.absolute())
    )
    with open("tmp.sh", "w") as f:
        f.write(new_script)

    !sbatch tmp.sh
!rm tmp.sh

  0%|          | 0/27 [00:00<?, ?it/s]

extracting: /n/groups/datta/Dana/Ontogeny/raw_data/Ontogeny_males/18months_29042021/session_20210429093852/depth.avi
Submitted batch job 18607353
extracting: /n/groups/datta/Dana/Ontogeny/raw_data/Ontogeny_males/18months_29042021/session_20210429094029/depth.avi
Submitted batch job 18607354
extracting: /n/groups/datta/Dana/Ontogeny/raw_data/Ontogeny_males/18months_29042021/session_20210429094121/depth.avi
Submitted batch job 18607355
extracting: /n/groups/datta/Dana/Ontogeny/raw_data/Ontogeny_males/18months_29042021/session_20210429094215/depth.avi
Submitted batch job 18607356
extracting: /n/groups/datta/Dana/Ontogeny/raw_data/Ontogeny_males/18months_29042021/session_20210429104908/depth.avi
Submitted batch job 18607357
extracting: /n/groups/datta/Dana/Ontogeny/raw_data/Ontogeny_males/18months_29042021/session_20210429104919/depth.avi
Submitted batch job 18607358
extracting: /n/groups/datta/Dana/Ontogeny/raw_data/Ontogeny_males/18months_29042021/session_20210429104926/depth.avi
Submitt

In [15]:
month_12 = [f for f in files if '12mon' in str(f)]

In [16]:
for f in tqdm(month_12):
    new_script = script.format(
        user_pth=f"{user[0]}/{user}", file_path=str(f.absolute())
    )
    with open("tmp.sh", "w") as f:
        f.write(new_script)

    !sbatch tmp.sh
!rm tmp.sh

  0%|          | 0/26 [00:00<?, ?it/s]

Submitted batch job 18607407
Submitted batch job 18607408
Submitted batch job 18607409
Submitted batch job 18607410
Submitted batch job 18607411
Submitted batch job 18607412
Submitted batch job 18607413
Submitted batch job 18607414
Submitted batch job 18607415
Submitted batch job 18607416
Submitted batch job 18607417
Submitted batch job 18607418
Submitted batch job 18607419
Submitted batch job 18607420
Submitted batch job 18607421
Submitted batch job 18607422
Submitted batch job 18607423
Submitted batch job 18607424
Submitted batch job 18607425
Submitted batch job 18607426
Submitted batch job 18607427
Submitted batch job 18607428
Submitted batch job 18607429
Submitted batch job 18607430
Submitted batch job 18607431
Submitted batch job 18607432
