# Process new strax data on dali

In [1]:
%run ~/common_init.py

Active python: /home/aalbers/miniconda3/envs/py36/bin/python


### Select runs

In [144]:
# Start from all XENON1T runs
import strax
import straxen

db_context = straxen.contexts.xenon1t_analysis(
    # No S3. Just datasets available on dali
    local_only=True)
db_context.context_config['check_available'] = ('raw_records',)
db_context.storage = db_context.storage[:1]   # Just the run db
rundb_runs = db_context.select_runs(available='raw_records')

Fetching run info from MongoDB: 100%|██████████| 27083/27083 [00:01<00:00, 13943.96it/s]
Checking data availability: 100%|██████████| 1/1 [00:04<00:00,  4.05s/it]


In [194]:
runs = db_context.select_runs(
    #run_mode='kr83m_*', 
    run_mode='ambe*',
    available='raw_records',
    include_tags='sciencerun1', 
    exclude_tags='?ource*'
)
runs['livetime'] = (runs['end'] - runs['start']).values.astype(np.int64) / 1e9
runs['event_rate'] = runs['trigger.events_built'].values / runs['livetime'].values
runs = runs[runs['event_rate'] > 10]
#runs = runs.iloc[::10].copy()
strax.count_tags(runs)

Counter({'_sciencerun1_candidate': 1, '_sciencerun1': 1})

In [195]:
runs

Unnamed: 0,end,mode,name,number,reader.ini.name,start,tags,trigger.events_built,raw_records_available,livetime,event_rate
9229,2017-03-27 12:50:03,ambe_stable,170327_1149,8231,ambe_stable,2017-03-27 11:50:00,"_sciencerun1_candidate,_sciencerun1",44561.0,True,3603.0,12.367749


In [196]:
# Check explicitly that they are available on dali
chris_dali_context = strax.Context(
    storage=strax.DataDirectory('/dali/lgrandi/tunnell/strax_data/',
                                readonly=True),
    register=straxen.RecordsFromPax,
    **straxen.contexts.common_opts)

runs['dali_available'] = [chris_dali_context.is_stored(run_id, 'raw_records')
                          for run_id in runs['name'].values]

In [197]:
assert len(runs) == runs['dali_available'].sum()

### Copy run documents and link raw data

In [198]:
jelle_dali_context = strax.Context(
    storage=strax.DataDirectory('/dali/lgrandi/aalbers/strax_data/'),
    register=straxen.RecordsFromPax,
    **straxen.contexts.common_opts)

jelle_raw_context = strax.Context(
    storage=strax.DataDirectory('/dali/lgrandi/aalbers/strax_data_raw/'),
    register=straxen.RecordsFromPax,
    **straxen.contexts.common_opts)

def link_data(run_id):
    # Copy run metadata 
    rd = db_context.run_metadata(run_id)
    del rd['data']
    del rd['_id']
    jelle_raw_context.storage[0].write_run_metadata(run_id, rd)
    
    # Link dali raw data
    new_parent_path = jelle_dali_context.storage[0].path
    path = chris_dali_context.storage[0].find(chris_dali_context.key_for(run_id, 'raw_records'))[1]
    basename = osp.basename(path)
    !ln -s $path $new_parent_path/$basename

In [199]:
for x in runs['name'].values:
    link_data(x)

### Start processing jobs

In [200]:
folder = '/dali/lgrandi/aalbers/reprocess'

# Consumes 2 GB on average when run on the login node
# So 4 GB should be a reasonable threshold
# and 6 GB is even more robust
process_launcher = """#!/bin/bash
#SBATCH --partition dali
#SBATCH --qos dali
#SBATCH --account=pi-lgrandi
#SBATCH --ntasks=1
#SBATCH --output={folder}/job_logs/{run_name}_stdout.txt
#SBATCH --error={folder}/job_logs/{run_name}_stderr.txt
#SBATCH --cpus-per-task=1
#SBATCH --mem-per-cpu=6000
echo Processing job started
eval "$(/home/aalbers/miniconda3/bin/conda shell.bash hook)"
conda activate py36
export PATH=/home/aalbers/miniconda3/envs/py36/bin:$PATH
echo Environment activated
cd {folder}
straxer {run_name}
echo Processing job terminated
"""

os.makedirs(osp.join(folder, 'job_logs'), exist_ok=True)

def write_script(fn, script, **kwargs):
    with open(fn, mode='w') as f:
        content = script.format(**kwargs)
        f.write(content)
    make_executable(fn)

def make_executable(path):
    """Make the file at path executable, see """
    mode = os.stat(path).st_mode
    mode |= (mode & 0o444) >> 2    # copy R bits to X
    os.chmod(path, mode)
    

In [201]:
skip = []
script_folder = osp.join(folder, 'scripts')
os.makedirs(script_folder, exist_ok=True)

for run_name in runs['name']:
    if run_name in skip:
        continue

    fn = osp.join(script_folder, f'process_{run_name}.sh')
    write_script(fn, process_launcher, folder=folder, run_name=run_name)
    
    print(f"Starting job for {run_name}")
    !sbatch $fn

Starting job for 170327_1149
Submitted batch job 61097284


# Check

In [215]:
path = folder + '/strax_data'
st = strax.Context(storage=path,
                   register=straxen.RecordsFromPax,
                   **straxen.contexts.common_opts)

In [234]:
sf = st.storage[0]
found = set()
for f in sf._subfolders():
    run_id, dtype, lhash = sf._parse_folder_name(f)
    found.add(run_id)
len(found)

16

In [235]:
available = st.list_available('event_info')
len(available)

16