### This notebook reads in the DL1 data check information from night-wise hdf5 files, and uses them to select runs for analysis

Features:
- Finds the runs in which the telescope was pointing towards the source of interest
- Selects the desired range of zenith angle
- Excludes runs in which interleaved pedestals or flatfield events were missing 
- Excludes runs which have too high noise (std dev of charge in interleaved pedestals)
- Checks sun and moon position, to spot datataken in twilight or moon conditions
- Checks other quantities and applies custom cuts to remove suspicious runs

Please note that the cuts are custom cuts for the specific application of selecting very good quality Crab runs. The cut values will have to be adapted to the different sources, and also to the purpose of the analysis. Some of the runs rejected here may still be useful.


In [None]:
import glob
import tables
from ctapipe.io import read_table
from astropy.table import Table, vstack
from astropy.coordinates import get_moon, get_sun, AltAz, SkyCoord
from astropy.time import Time
import astropy.units as u
from astroplan.moon import moon_illumination

from lstchain.reco.utils import location

import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime

%matplotlib inline

### Set the path to the DL1 datacheck files 

In [None]:
# Set the input files: load all the available night-wise DL1_datacheck_YYYYMMDD.h5 files (or at least 
# those of the nights which contain the data you intend to analyze)

# The files are quite light, 5MB per night in average, and are available in the IT cluster under the directories 
# /fefs/aswg/data/real/OSA/DL1DataCheck_LongTerm/v0.9/YYYYMMDD/

#datacheck_files = glob.glob("/Users/moralejo/CTA/LST/RealData/datacheck/v0.9/DL1_datacheck_202*.h5")
datacheck_files = glob.glob("/fefs/aswg/data/real/OSA/DL1DataCheck_LongTerm/v0.9/20*/DL1_datacheck_20*.h5")

datacheck_files.sort()
print(f'A total of {len(datacheck_files)} files will be read')
#print(datacheck_files)

### Coordinates of the source we intend to analyze

In [None]:
source_coordinates = SkyCoord.from_name("Crab Nebula")
print(source_coordinates)

### Reminder: contents of the night-wise DL1 datacheck hdf5 files

There are five different tables in each file containing datacheck info:

With one row per run:

- runsummary

- pixwise_runsummary


With one row per subrun (containing only events of a specific type):

- cosmics

- flatfield

- pedestals

### Create the tables and fill them from the files:
Note that files that have some missing table (typically flatfield or pedestals) will not be loaded, hence the corresponding runs won't be considered!

In [None]:
dcheck_pixwise_runsummary = []
dcheck_runsummary = []

dcheck_cosmics = []
dcheck_flatfield = []
dcheck_pedestals = []

for file in datacheck_files:
    
    # print(file)
    
    # Check that the file contains the necessary info:
    with tables.open_file(file) as a:
        if "pedestals" not in a.root:
            print(f"file {file} does not contain the interleaved pedestals table... Skipping!!")
            continue
        if "flatfield" not in a.root:
            print(f"file {file} does not contain the interleaved flatfield table... Skipping!!")
            continue

    dcheck_pixwise_runsummary.append(read_table(file, "/pixwise_runsummary"))
    
    table = read_table(file, "/runsummary/table")
    # Set to 0 the number of flatfield events in nan (means none was found):
    table['num_flatfield'] = np.where(np.isnan(table['num_flatfield']), 0, table['num_flatfield'])
    dcheck_runsummary.append(table)
    
    dcheck_cosmics.append(read_table(file, "/cosmics/table"))
    dcheck_flatfield.append(read_table(file, "/flatfield/table"))
    dcheck_pedestals.append(read_table(file, "/pedestals/table"))

    
# Ignore metadata conflicts below. i.e. metadata_conflicts='silent'. 
# It is just that columns which contain some nan are float64, while those which do not are float32
dcheck_runsummary = vstack(dcheck_runsummary, metadata_conflicts='silent')
dcheck_cosmics = vstack(dcheck_cosmics, metadata_conflicts='silent')

dcheck_pixwise_runsummary = vstack(dcheck_pixwise_runsummary)
dcheck_flatfield = vstack(dcheck_flatfield)
dcheck_pedestals = vstack(dcheck_pedestals)


### Exclude runs with issues in interleaved pedestals

In [None]:
ped_ok_mask = np.isfinite(dcheck_runsummary['num_pedestals'])
print('Removed runs:', np.array(dcheck_runsummary['runnumber'][~ped_ok_mask]))

In [None]:
# Have a look at the runsummary table columns:
dcheck_runsummary

In [None]:
# Just a function to print out the run numbers that survive a certain set of cuts:

def print_runs(table, mask, by_date=False):
    print(f"{mask.sum()} wobble runs for the selected source")
    print(f"Observation time: {table['elapsed_time'][mask].sum()/3600:.2f} hours")
    print()
    print(np.array2string(np.array(table['runnumber'][mask]), separator=', '))
    
    if by_date:
        print()
        print()
        dates = [datetime.utcfromtimestamp(t-0.5*86400).date() for t in table['time'][mask]]
        for i, date in enumerate(np.unique(dates)):
            rr = []
            for d, run in zip(dates, table['runnumber'][mask]):
                if d != date:
                    continue
                rr.append(run)
            print(i+1, ":", date, ":", rr)

### Find the runs with pointing close to the source of interest:

In [None]:
telescope_pointing = SkyCoord(ra=dcheck_runsummary['mean_ra']*u.deg, dec=dcheck_runsummary['mean_dec']*u.deg)

In [None]:
angular_distance = source_coordinates.separation(telescope_pointing)

plt.hist(angular_distance.to_value(u.deg), bins=200, range=(0, 1))
plt.show()

# Select wobble pointings at ~0.4 deg from the source:
source_mask = ((angular_distance > 0.35 * u.deg) &
               (angular_distance < 0.45 * u.deg))

print_runs(dcheck_runsummary, source_mask)

In [None]:
fig = plt.figure(figsize=(15,4))
fig.add_subplot(1, 2, 1)
plt.hist(np.rad2deg(dcheck_runsummary['mean_azimuth'][source_mask]), bins=30)
plt.xlabel('Mean azimuth (deg)')
plt.ylabel('Number of runs')
fig.add_subplot(1, 2, 2)
plt.hist(90-np.rad2deg(dcheck_runsummary['mean_altitude'][source_mask]),
         weights=dcheck_runsummary['elapsed_time'][source_mask]/3600,
         bins=30)
plt.xlabel('Mean zenith (deg)')
plt.ylabel('Number of runs')
plt.show()

### Selection of zenith angle range
Define here the desired zenith angle range, e.g. for selecting runs that can be analyzed with an MC set with a given pointing

In [None]:
min_zenith = 0
max_zenith = 35 # degrees
zenith_mask =  ((90 - np.rad2deg(dcheck_runsummary['min_altitude']) < max_zenith) &
                (90 - np.rad2deg(dcheck_runsummary['min_altitude']) > min_zenith))

print(f'With {min_zenith} < zenith < {max_zenith} degrees:')


In [None]:
print_runs(dcheck_runsummary, source_mask & zenith_mask & ped_ok_mask, by_date=True)

In [None]:
utctime = np.array([datetime.utcfromtimestamp(x) for x in dcheck_runsummary['time']])

### Check sun position
In case you want e.g. to exclude twilight data

In [None]:
mask = source_mask & zenith_mask & ped_ok_mask

plt.figure(figsize=(15,4))

sun_gcrs = get_sun(Time(utctime))
altaz = AltAz(obstime=utctime, location=location)
sun_altaz = sun_gcrs.transform_to(altaz)

plt.scatter(utctime[mask], sun_altaz.alt[mask])
plt.grid()
plt.ylabel('Sun altitude (deg)')
plt.show()

In [None]:
mask = source_mask & zenith_mask & ped_ok_mask
plt.figure(figsize=(15,4))
plt.scatter(sun_altaz.alt[mask], dcheck_runsummary['ped_charge_stddev'][mask], s=8)
plt.ylabel('Pedestal charge std dev (p.e.)')
plt.xlabel('Sun altitude (deg)')
plt.grid()
plt.show()

In this case the sun is always well below horizon, there is no correlation with the camera-averaged pedestal charge std dev

### Check moon position

In [None]:
mask = source_mask & zenith_mask & ped_ok_mask

plt.figure(figsize=(15,4))

moon_gcrs = get_moon(Time(utctime), location=location)
altaz = AltAz(obstime=utctime, location=location)
moon_altaz = moon_gcrs.transform_to(altaz)

plt.scatter(utctime[mask], moon_altaz.alt[mask])
plt.grid()
plt.show()

# Moon below the horizon:
no_moon = moon_altaz.alt.to_value(u.deg) < 0

In [None]:
# Fraction of illuminated moon
moon_fraction = moon_illumination(Time(utctime))

In [None]:
plt.figure(figsize=(15,4))

mask = source_mask & zenith_mask & ped_ok_mask & no_moon
plt.scatter(moon_altaz.alt[mask], dcheck_runsummary['ped_charge_stddev'][mask], s=8, label='Moon below horizon')

moon_fraction_limit = 0.2
mask = source_mask & zenith_mask & ped_ok_mask & ~no_moon & (moon_fraction < moon_fraction_limit)
plt.scatter(moon_altaz.alt[mask], 
            dcheck_runsummary['ped_charge_stddev'][mask], s=8, 
            label=f'Moon above horizon, <{int(moon_fraction_limit*100)}% illuminated')


mask = source_mask & zenith_mask & ped_ok_mask & ~no_moon & (moon_fraction >= moon_fraction_limit)
plt.scatter(moon_altaz.alt[mask], 
            dcheck_runsummary['ped_charge_stddev'][mask], s=8, 
            label=f'Moon above horizon, $\geq$ {int(moon_fraction_limit*100)}% illuminated')

plt.xlabel('Moon altitude (deg)')
plt.ylabel('Pedestal charge std dev (p.e.)')
plt.legend(loc='upper left')
plt.grid()
plt.show()

plt.figure(figsize=(15,4))
mask = source_mask & zenith_mask & ped_ok_mask & no_moon
plt.scatter(dcheck_runsummary['runnumber'][mask], dcheck_runsummary['ped_charge_stddev'][mask], 
            label='Moon below horizon', s=5)
mask = source_mask & zenith_mask & ped_ok_mask & ~no_moon
plt.scatter(dcheck_runsummary['runnumber'][mask], dcheck_runsummary['ped_charge_stddev'][mask], 
            label='Moon above horizon', s=5)
plt.grid()
plt.legend()
plt.xlabel('Run number')
plt.ylabel('Pedestal charge std dev (p.e.)')
plt.show()

### A cut in the camera-averaged pedestal charge standard deviation seems adequate to remove both moon runs and high-NSB runs. 
Note that the specific valye for this cut will depend on the observed source (e.g. the FOV around Crab is brighter than for typical extragalactic sources)

In [None]:
max_ped_std = 2 # p.e.
ped_std_cut = dcheck_runsummary['ped_charge_stddev'] < max_ped_std

print_runs(dcheck_runsummary, source_mask & zenith_mask & ped_ok_mask & ped_std_cut)

### Check rate of cosmics
To remove too-low rate runs that probably indicate non-optimal weather or telescope issues

In [None]:
mask = source_mask & zenith_mask & ped_ok_mask & ped_std_cut

rate_cosmics = dcheck_runsummary['num_cosmics'] / dcheck_runsummary['elapsed_time']
plt.figure(figsize=(15,4))
plt.scatter(90 - np.rad2deg(dcheck_runsummary['mean_altitude'][mask]), rate_cosmics[mask])
plt.ylabel('Cosmics rate (/s)')
plt.xlabel('Zenith angle (deg)')
plt.ylim(0, 15000)
plt.grid()
plt.show()

In [None]:
# Now the cosmics rate vs. run number

mask = source_mask & zenith_mask & ped_ok_mask & ped_std_cut

rate_cosmics = dcheck_runsummary['num_cosmics'] / dcheck_runsummary['elapsed_time']
plt.figure(figsize=(15,4))
plt.scatter(dcheck_runsummary['runnumber'][mask], rate_cosmics[mask])
plt.ylabel('Cosmics rate (/s)')
plt.xlabel('Run number')
plt.grid()
plt.show()

### We remove runs with too low cosmics rates. 
The specific value of the cut will be sample-dependent, and zenith-dependent... the value we use here of 3000 (evts/s) is somewhat arbitrary

In [None]:
rate_mask = rate_cosmics > 3000
print_runs(dcheck_runsummary, source_mask & zenith_mask & ped_ok_mask & ped_std_cut & rate_mask)

In [None]:
# Let's see that expanded (now x-axis is just the order of each run in the list of survivors)

mask = source_mask & zenith_mask & ped_ok_mask & ped_std_cut & rate_mask
plt.figure(figsize=(15,4))
plt.plot(rate_cosmics[mask], 'o')
plt.ylim(0,10000)
plt.ylabel('Cosmics rate (/s)')
plt.xlabel('Run index in list')
plt.grid()
plt.show()

This is not very stable, but hopefully most of the differences will be accounted for near-threshold events and the data will be usable for spectral calculations well above the trigger threshold

### Check mean number of pixels around stars
Note that the pedestal charge std dev in the DL1 check is calculated excluding those pixels, hence there is no correlation

In [None]:
mask = source_mask & zenith_mask & ped_ok_mask & ped_std_cut & rate_mask

plt.figure(figsize=(15,4))
plt.plot(dcheck_runsummary['mean_number_of_pixels_nearby_stars'][mask], 'o')
plt.ylabel('Mean number of pixels near stars')
plt.xlabel('Run index in list')
plt.grid()
plt.show()


There are different "populations" because of different wobble pointings

### Now check the average pixel rate of >10 and >30 pe pulses in cosmics
Note that also star-affected pixels were excluded in the DL1 check calculation of the average pixel rates. Low rates may indicate poor weather or telescope problems

In [None]:
for npe in [10, 30]:
    rate_cosmics_pulses = (dcheck_runsummary[f'cosmics_fraction_pulses_above{npe}'] * 
                           dcheck_runsummary['num_cosmics'] / dcheck_runsummary['elapsed_time'])

    mask = source_mask & zenith_mask & ped_ok_mask & ped_std_cut & rate_mask
    plt.figure(figsize=(15,4))
    plt.plot(rate_cosmics_pulses[mask], 'o', label=(f'pulses of > {npe} p.e.'))


    plt.xlabel('Run index in list')
    plt.ylabel('Rate (/s)')
    plt.ylim(0, 1.2*np.max(rate_cosmics_pulses[mask]))
    plt.legend()
    plt.grid()
    plt.show()

The high rate spikes might be due to car flashes, MAGIC LIDAR shots, satellites... I could not investigate it yet. If those are the reasons, the shower data might still be perfectly ok.

### Define minimum values for those rates
To remove outliers in the low-rate side

In [None]:
rate10_mask = (dcheck_runsummary[f'cosmics_fraction_pulses_above10'] * 
               dcheck_runsummary['num_cosmics'] / 
               dcheck_runsummary['elapsed_time']) > 25
rate30_mask = (dcheck_runsummary[f'cosmics_fraction_pulses_above30'] * 
               dcheck_runsummary['num_cosmics'] / 
               dcheck_runsummary['elapsed_time']) > 4.5

pix_rate_mask = rate10_mask & rate30_mask

In [None]:
# The cuts on the pixel rates of pulses above 10 and 30 pe remove for example 6 out of 9 runs of the 20201119 night,
# for which the Logbook reports passing clouds, low transmission and varying trigger rates.


print_runs(dcheck_runsummary, source_mask & zenith_mask & ped_ok_mask & ped_std_cut & rate_mask, by_date=True)
print()
print("**********************")
print()
print_runs(dcheck_runsummary, source_mask & zenith_mask & ped_ok_mask & ped_std_cut & 
           rate_mask & pix_rate_mask, by_date=True)

### Check the rate of interleaved events 
It should be ~100 Hz, or ~50 Hz for the oldest LST1 data. The numbers are not exactly 50 and 100 because of dead time

In [None]:
mask = source_mask & zenith_mask & ped_ok_mask & ped_std_cut & rate_mask & pix_rate_mask

rate_flatfield = dcheck_runsummary['num_flatfield'] / dcheck_runsummary['elapsed_time']
rate_pedestals = dcheck_runsummary['num_pedestals'] / dcheck_runsummary['elapsed_time']
plt.figure(figsize=(15,4))
plt.scatter(dcheck_runsummary['runnumber'][mask], rate_flatfield[mask], label='flatfield')
plt.scatter(dcheck_runsummary['runnumber'][mask], rate_pedestals[mask], s=5, label='pedestals')
plt.ylim(0, 130)
plt.xlabel('Run number')
plt.ylabel('Interleaved rate (Hz)')
plt.legend()
plt.grid()
plt.show()

### Muon ring rate and intensity vs. zenith angle:

In [None]:
mask = source_mask & zenith_mask & ped_ok_mask & ped_std_cut & rate_mask & pix_rate_mask

rate_muons = dcheck_runsummary['num_contained_mu_rings'] / dcheck_runsummary['elapsed_time']
plt.figure(figsize=(15,4))
plt.scatter(90 - np.rad2deg(dcheck_runsummary['mean_altitude'][mask]), rate_muons[mask])
plt.ylim(0, 5)
plt.xlabel('Zenith angle (deg)')
plt.ylabel('Rate of contained muon rings (/s)')
plt.grid()
plt.show()

plt.figure(figsize=(15,4))
plt.scatter(90 - np.rad2deg(dcheck_runsummary['mean_altitude'][mask]), dcheck_runsummary['mu_intensity_mean'][mask])
plt.ylim(0,2500)
plt.xlabel('Zenith angle (deg)')
plt.ylabel('Average muon ring intensity (p.e.)')
plt.grid()
plt.show()

### Muon ring rate and intensity vs. run & date:

In [None]:
mask = source_mask & zenith_mask & ped_ok_mask & ped_std_cut & rate_mask & pix_rate_mask

plt.figure(figsize=(15,4))
plt.plot(dcheck_runsummary['mu_intensity_mean'][mask], 'o')
plt.ylim(0,2500)
plt.xlabel('Run index in list')
plt.ylabel('Average muon ring intensity (p.e.)')
plt.ylim(1500, 2500)
plt.grid()
plt.show()


plt.figure(figsize=(15,4))
plt.scatter(utctime[mask], dcheck_runsummary['mu_intensity_mean'][mask], s=8)
plt.ylim(0,2500)
plt.ylabel('Average muon ring intensity (p.e.)')
plt.ylim(1500, 2500)
plt.grid()
plt.show()

Jump up in muon intensity between February and March 2021 probably related to the recovery of group 14 of mirrors:
https://www.lst1.iac.es/elog/LST+commissioning/1515
(although the increase in intensity is ~2.5%  and group 14 should be 9 mirrors out of 198, i.e. 4.8%)

In [None]:
mask = source_mask & zenith_mask & ped_ok_mask & ped_std_cut & rate_mask & pix_rate_mask

plt.figure(figsize=(15,4))
plt.plot(dcheck_runsummary['mu_width_mean'][mask], 'o')
plt.xlabel('Run index in list')
plt.ylabel('Average muon ring width (deg)')
plt.ylim(0, 0.1)
plt.grid()
plt.show()

plt.figure(figsize=(15,4))
plt.plot(dcheck_runsummary['mu_width_stddev'][mask], 'o')
plt.xlabel('Run index in list')
plt.ylabel('Muon ring width std dev (deg)')
plt.ylim(0, 0.03)
plt.grid()
plt.show()

### Time resolution (from flatfield events)

In [None]:
mask = source_mask & zenith_mask & ped_ok_mask & ped_std_cut & rate_mask & pix_rate_mask

plt.figure(figsize=(15,8))
plt.scatter(dcheck_runsummary['runnumber'][mask], 
            dcheck_runsummary['ff_rel_time_stddev'][mask])
plt.xlabel('Run number')
plt.ylabel('Camera-averaged std dev of pixel time (relative to rest of camera) in FF events')
plt.ylim(0, 0.6)
plt.grid()
plt.show()


Only a few runs have a larger value that the typical ~0.4 ns, we don't remove them since the value is still pretty good in absolute terms.

### Mean pixel charge in FF events

In [None]:
mask = source_mask & zenith_mask & ped_ok_mask & ped_std_cut & rate_mask & pix_rate_mask
plt.figure(figsize=(15,4))
plt.scatter(dcheck_runsummary['runnumber'][mask], dcheck_runsummary['ff_charge_mean'][mask])
plt.grid()
plt.ylim(0, 100)
plt.show()

No large deviations, no reason to remove any runs

### Final list of selected runs by date:

In [None]:
mask = source_mask & zenith_mask & ped_ok_mask & ped_std_cut & rate_mask & pix_rate_mask
print_runs(dcheck_runsummary, mask, by_date=True)


### Additional info: example of  how to look into subrun-wise info.  
Check variation of a subrun-wise calculated quantity within a run (using table dcheck_cosmics):

In [None]:
mask = source_mask & zenith_mask & ped_ok_mask & ped_std_cut & rate_mask & pix_rate_mask
run_list = np.array(dcheck_runsummary['runnumber'][mask])

# Cut to pick the so-far selected runs (with "mask")in the subrun-wise tables, like dcheck_cosmics:
runselection = np.array([r in run_list for r in dcheck_cosmics['runnumber']])

In [None]:
# Plot the subrun-wise values:

plt.figure(figsize=(15,4))

for npe in [10, 30]:

    rate_cosmics_pulses = (dcheck_cosmics[f'fraction_pulses_above{npe}'] * 
                           dcheck_cosmics['events'] / dcheck_cosmics['elapsed_time'])
    plt.plot(rate_cosmics_pulses[runselection], 'o', markersize=1,
             label=(f'pulses of > {npe} p.e.'))

plt.yscale('log')
plt.xlabel('Sub-run index in list')
plt.ylabel('Rate (/s)')

plt.legend()
plt.grid()
plt.show()


In [None]:
# Calculate and plot the run-wise standard deviation of the pixel rates 
std_npe = []

for npe, maxy in zip([10, 30], [10, 1]):

    dummy = []
    
    rate_cosmics_pulses = (dcheck_cosmics[f'fraction_pulses_above{npe}'] * 
                           dcheck_cosmics['events'] / dcheck_cosmics['elapsed_time'])

    for run in run_list:
        thisrun = dcheck_cosmics['runnumber'] == run
        dummy.append(np.std(rate_cosmics_pulses[thisrun]))
    
    dummy = np.array(dummy)
    std_npe.append(dummy)

    plt.figure(figsize=(15,4))
    plt.plot(dummy, 'o')
    plt.ylim(0, maxy)
    plt.grid()
    plt.xlabel('Run index in list')
    plt.ylabel(f'std dev of rate of pulses of > {npe} p.e.')
    plt.show()

In [None]:
# We might a cut of < 0.2  on the std dev of the rate of >30 pulses
# Perhaps this anomaly is produced by car flashes, or the MAGIC LIDAR...

# Create a mask that can be applied to the dcheck_runsummary table:

max_rate30_std = 0.2

pulse30_std_cut = np.array(len(dcheck_runsummary)*[True])

for i, run in enumerate(run_list):
    if std_npe[1][i] < max_rate30_std:
        continue    
    pulse30_std_cut[dcheck_runsummary['runnumber']==run] = False
