In [1]:
import os
import subprocess
import sys
from queue import Queue
from threading import Thread

from deepclean.couplings import subtraction_problems

### Environment variables

In [2]:
HOME = "/home/chiajui.chou"
DEEPCLEAN_CONTAINER_ROOT = f"{HOME}/images/deepclean"
DATA_DIR = f"{HOME}/deepclean/data/CDC_test-120Hz"
RESULTS_DIR = f"{HOME}/deepclean/results"
DEEPCLEAN_IFO = "H1"
DEEPCLEAN_PROBLEM = "120Hz"
GPU_INDEX = 0

### Input to deepclean.tasks.Train

In [3]:
# luigi.cfg
cfg = dict.fromkeys(['deepclean', 'core'])
for key in cfg:
    cfg[key] = dict()

cfg['deepclean']['ifo'] = DEEPCLEAN_IFO
cfg['deepclean']['problem'] = DEEPCLEAN_PROBLEM
cfg['deepclean']['strain_channel'] = "GDS-CALIB_STRAIN"
cfg['core']['local_scheduler'] = True
cfg['core']['module'] = "deepclean"
# deepclean.config.deepclean
ifo = cfg['deepclean']['ifo']
problem = [cfg['deepclean']['problem']]
strain_channel = f"{ifo}:{cfg['deepclean']['strain_channel']}"
container_root = DEEPCLEAN_CONTAINER_ROOT
# subtraction problems
couplings = [subtraction_problems[i][ifo] for i in problem]
witnesses = [j for i in couplings for j in i.channels]
freq_low = [i.freq_low for i in couplings]
freq_high = [i.freq_high for i in couplings]
# train.sh and Train task
image = "train.sif"
gpus = GPU_INDEX
data_fname = f"{DATA_DIR}/deepclean-1378402219-3072.hdf5"
train_config = f"{HOME}/deepcleanv2/projects/train/config.yaml"
output_dir = f"{RESULTS_DIR}/O4-CDC_120Hz_outlaw"
config = train_config
channels = [strain_channel] + witnesses
freq_low = freq_low
freq_high = freq_high

### stream_command

In [4]:
def read_stream(stream, process, q):
    stream = getattr(process, stream)
    try:
        it = iter(stream.readline, b"")
        while True:
            try:
                line = next(it)
            except StopIteration:
                break
            q.put(line.decode())
    finally:
        q.put(None)

def stream_process(process):
    q = Queue()
    args = (process, q)
    streams = ["stdout", "stderr"]
    threads = [Thread(target=read_stream, args=(i,) + args) for i in streams]
    for t in threads:
        t.start()

    for _ in range(2):
        for line in iter(q.get, None):
            sys.stdout.write(line)

def stream_command(command: list[str]):
    process = subprocess.Popen(
        command, stdout=subprocess.PIPE,
        stderr=subprocess.PIPE, env=os.environ
    )
    stream_process(process)

### Run training

In [None]:
os.environ["CUDA_VISIBLE_DEVICES"] = str(gpus)
command = [
    "python",
    "-m",
    "train",
    "--config",
    train_config,
    "--data.fname",
    data_fname,
    "--data.channels",
    "[" + ",".join(channels) + "]",
    "--data.freq_low",
    str(freq_low),
    "--data.freq_high",
    str(freq_high),
]
command.append(f"--trainer.logger.save_dir={output_dir}")
print(command)

# if not os.path.exists(output_dir):
#     os.mkdir(output_dir)

# stream_command(command)

['python', '-m', 'train', '--config', '/home/chiajui.chou/deepcleanv2/projects/train/config.yaml', '--data.fname', '/home/chiajui.chou/deepclean/data/CDC_test-120Hz/deepclean-1378402219-3072.hdf5', '--data.channels', '[H1:GDS-CALIB_STRAIN,H1:PEM-CS_ACC_PSL_TABLE1_Z_DQ,H1:HPI-HAM4_BLND_L4C_Y_IN1_DQ,H1:IMC-WFS_A_Q_PIT_OUT_DQ,H1:IMC-WFS_B_Q_PIT_OUT_DQ,H1:IMC-DOF_1_Y_IN1_DQ,H1:PEM-CS_ACC_PSL_PERISCOPE_X_DQ,H1:IMC-DOF_4_Y_IN1_DQ,H1:HPI-HAM6_BLND_L4C_RX_IN1_DQ,H1:IMC-WFS_A_DC_YAW_OUT_DQ,H1:IMC-WFS_B_I_PIT_OUT_DQ,H1:IMC-WFS_B_Q_YAW_OUT_DQ,H1:IMC-WFS_A_Q_YAW_OUT_DQ,H1:PSL-PMC_HV_MON_OUT_DQ,H1:LSC-REFL_SERVO_ERR_OUT_DQ,H1:IMC-WFS_B_I_YAW_OUT_DQ,H1:PEM-CS_ACC_HAM2_PRM_Z_DQ,H1:PEM-CS_ACC_PSL_TABLE1_X_DQ,H1:LSC-MCL_IN1_DQ,H1:IMC-WFS_A_DC_PIT_OUT_DQ,H1:IMC-F_OUT_DQ,H1:LSC-REFL_SERVO_CTRL_OUT_DQ,H1:PSL-PMC_MIXER_OUT_DQ,H1:IMC-WFS_B_DC_YAW_OUT_DQ,H1:PEM-CS_ACC_PSL_PERISCOPE_Y_DQ,H1:IMC-DOF_2_Y_IN1_DQ,H1:LSC-MCL_OUT_DQ,H1:HPI-HAM1_BLND_L4C_RX_IN1_DQ,H1:HPI-HAM6_BLND_L4C_VP_IN1_DQ,H1:IMC-DOF_2_P_IN1_DQ

### Data Setup

In [5]:
import yaml
with open(config, 'r') as file:
    config_dict = yaml.safe_load(file)

print(config_dict['data'])

{'train_duration': 1024, 'test_duration': 2048, 'train_stride': 0.0625, 'valid_frac': 0.33, 'inference_sampling_rate': 2, 'start_offset': 0, 'clean_kernel_length': 1, 'clean_stride': 1, 'batch_size': 32, 'kernel_length': 8, 'filt_order': 8}


In [17]:
from train.data import DeepCleanDataset
from train.model import DeepClean
from train.architectures import Architecture
from train.callbacks import ModelCheckpoint, PsdPlotter
from train.metrics import OnlinePsdRatio, PsdRatio

fname = data_fname
channels = channels
kernel_length = 8
batch_size = 32
train_duration = 1024
train_stride = 0.0625
test_duration = 2048
valid_frac = 0.33
inference_sampling_rate = 2
clean_kernel_length = 1
clean_stride = 1
start_offset = 0
filt_order = 8

dc_dataset = DeepCleanDataset(
    fname=fname,
    channels=channels,
    kernel_length=kernel_length,
    freq_low=freq_low,
    freq_high=freq_high,
    batch_size=batch_size,
    train_duration=train_duration,
    test_duration=test_duration,
    valid_frac=valid_frac,
    train_stride=train_stride,
    inference_sampling_rate=inference_sampling_rate,
    clean_kernel_length=clean_kernel_length,
    clean_stride=clean_stride,
    start_offset=start_offset,
    filt_order=filt_order,
)
dc_model = DeepClean(
    arch=Architecture,
    loss=PsdRatio,
    metric=OnlinePsdRatio,
)

In [7]:
stage = "fit"
dc_dataset.setup(stage)

In [15]:
dc_dataset.train_dataloader()

AttributeError: 'NoneType' object has no attribute 'device_ids'