Fix NorthernTracks MC time simulation (#253)

icecube · Mar 27, 2023 · e550ab5 · e550ab5
1 parent a649b86
commit e550ab5
Show file tree

Hide file tree

Showing 4 changed files with 348 additions and 45 deletions.
diff --git a/examples/ipython_notebooks/NorthernTracks_MCtime_Benchmark.ipynb b/examples/ipython_notebooks/NorthernTracks_MCtime_Benchmark.ipynb
diff --git a/flarestack/core/time_pdf.py b/flarestack/core/time_pdf.py
@@ -159,8 +159,9 @@ def product_integral(self, t, source):
 
         return f
 
-    def inverse_interpolate(self, source):
-        """Calculates the values for the integral of the signal PDF within
+    def inverse_cumulative(self, source):
+        """Calculates the inverse cumulative of the time PDF.
+        First, calculates the values for the integral of the signal PDF within
         the season. Then rescales these values, such that the start of the
         season yields 0, and then end of the season yields 1. Creates a
         function to interpolate between these values. Then, for a number
@@ -172,10 +173,18 @@ def inverse_interpolate(self, source):
         """
         max_int = self.product_integral(self.sig_t1(source), source)
         min_int = self.product_integral(self.sig_t0(source), source)
+
+        logger.debug("Integral of the signal PDF:")
+        logger.debug(f"F({self.sig_t0(source)}) = {min_int}")
+        logger.debug(f"F({self.sig_t1(source)}) = {max_int}")
+
         fraction = max_int - min_int
 
+        # number of points for the basis of interpolation
+        n_points = int(1e4)
+
         t_range = np.linspace(
-            float(self.sig_t0(source)), float(self.sig_t1(source)), int(1e4)
+            float(self.sig_t0(source)), float(self.sig_t1(source)), n_points
         )
         cumu = (self.product_integral(t_range, source) - min_int) / fraction
 
@@ -196,7 +205,7 @@ def simulate_times(self, source, n_s):
         :param n_s: Number of event times to be simulated
         :return: Array of times in MJD for a given source
         """
-        f = self.inverse_interpolate(source)
+        f = self.inverse_cumulative(source)
 
         sims = f(np.random.uniform(0.0, 1.0, n_s))
 
@@ -646,12 +655,6 @@ def __init__(self, t_pdf_dict, livetime_pdf=None):
             self.livetime_to_mjd,
         ) = self.parse_list()
 
-    def parse_list(self):
-        t0 = min(self.on_off_list["start"])
-        t1 = max(self.on_off_list["stop"])
-        livetime = np.sum(self.on_off_list["length"])
-        return t0, t1, livetime
-
     def f(self, t, source=None):
         return self.season_f(t) / self.livetime
 
@@ -673,6 +676,12 @@ def get_livetime(self):
     def get_mjd_conversion(self):
         return self.mjd_to_livetime, self.livetime_to_mjd
 
+    def signal_integral(self, t, source=None):
+        return self.mjd_to_livetime(t) / self.get_livetime()
+
+    def inverse_cumulative(self, source=None):
+        return lambda t: self.livetime_to_mjd(t * self.get_livetime())
+
 
 @TimePDF.register_subclass("decay")
 class DecayPDF(TimePDF):

diff --git a/flarestack/data/icecube/ic_season.py b/flarestack/data/icecube/ic_season.py
@@ -116,18 +116,25 @@ class IceCubeRunList(DetectorOnOffList):
     """
 
     def parse_list(self):
+        """Parses the GoodRunList to build a TimePDF
 
+        Returns:
+            t0, t1: min and max time of the GRL
+            full_livetime: livetime
+            season_f: interpolating function returning 1 or 0 as a function of time
+            mjd_to_livetime: function to convert a mjd to livetime [s]
+            livetime_to_mjd: function to convert a livetime [s] to mjd
+
+        """
         if list(self.on_off_list["run"]) != sorted(list(self.on_off_list["run"])):
             logger.error("Error in ordering GoodRunList!")
             logger.error("Runs are out of order!")
             self.on_off_list = np.sort(self.on_off_list, order="run")
 
         if self.t_dict.get("expect_gaps_in_grl", True):
-
             mask = self.on_off_list["stop"][:-1] == self.on_off_list["start"][1:]
 
             if np.sum(mask) > 0:
-
                 first_run = self.on_off_list["run"][:-1][mask][0]
 
                 logger.warning(
@@ -146,7 +153,6 @@ def parse_list(self):
                 )
 
                 while np.sum(mask) > 0:
-
                     index = list(mask).index(True)
 
                     self.on_off_list[index]["stop"] = self.on_off_list[index + 1][
@@ -170,7 +176,6 @@ def parse_list(self):
             mask = self.on_off_list["stop"][:-1] < self.on_off_list["start"][1:]
 
             if np.sum(~mask) > 0:
-
                 first_run = self.on_off_list["run"][:-1][~mask][0]
 
                 logger.error("The IceCube GoodRunList was not produced correctly.")
@@ -196,7 +201,6 @@ def parse_list(self):
                 logger.error("You have been warned!")
 
                 while np.sum(~mask) > 0:
-
                     index = list(~mask).index(True)
 
                     self.on_off_list[index]["stop"] = self.on_off_list[index + 1][
@@ -223,10 +227,14 @@ def parse_list(self):
         step = 1e-12
 
         t_range = [t0 - step]
+
         f = [0.0]
 
+        # MJD timestaps marking start and stop time of each run
         mjd = [0.0]
+        # cumulative livetime at each timestamp [unit to be checked]
         livetime = [0.0]
+        # cumulative sum of run lengths [unit to be checked]
         total_t = 0.0
 
         for i, run in enumerate(self.on_off_list):
@@ -236,6 +244,8 @@ def parse_list(self):
             mjd.append(run["stop"])
             livetime.append(total_t)
 
+            # extends t_range and f with a box function of value 1 between the start and stop of the run
+            # adds zero values at times immediately adjacent to the start and stop of the run
             t_range.extend(
                 [run["start"] - step, run["start"], run["stop"], run["stop"] + step]
             )
@@ -261,11 +271,15 @@ def parse_list(self):
                 f"Runs in GoodRunList are out of order for {self.on_off_list}. Check that!"
             )
 
+        # end of the livetime function domain
         mjd.append(1e5)
+
         livetime.append(total_t)
 
         season_f = interp1d(stitch_t, np.array(stitch_f), kind="linear")
+        # cumulative livetime [[unit to be checked]] as a function of the date [mjd]
         mjd_to_livetime = interp1d(mjd, livetime, kind="linear")
+        # date [mjd] at which a given livetime [unit to be checked] is reached
         livetime_to_mjd = interp1d(livetime, mjd, kind="linear")
         return t0, t1, full_livetime, season_f, mjd_to_livetime, livetime_to_mjd
 
@@ -313,8 +327,7 @@ def load_data(self, path, **kwargs):
 
     def build_time_pdf_dict(self):
         """Function to build a pdf for the livetime of the season. By
-        default, this is assumed to be uniform, spanning from the first to
-        the last event found in the data.
+        default, this exploits the good run list (GRL)
 
         :return: Time pdf dictionary
         """

diff --git a/flarestack/data/icecube/northern_tracks/__init__.py b/flarestack/data/icecube/northern_tracks/__init__.py
@@ -50,50 +50,69 @@ def get_diffuse_binning(season):
 
 
 class NTSeason(IceCubeSeason):
-    def get_background_model(self):
+    def get_background_model(self) -> dict:
+        """Loads Monte Carlo dataset from file according to object path set in object properties.
+
+        Returns:
+            dict: Monte Carlo data set.
+        """
         mc = self.load_data(self.mc_path, cut_fields=False)
+        # According to NT specifications (README):
+        #  "conv" gives the weight for conventional atmospheric neutrinos
+        #  flarestack renames it to "weight"
         mc = rename_fields(mc, {"conv": "weight"})
         return mc
 
     def simulate_background(self):
-        if isinstance(self.loaded_background_model, type(None)):
-            self.load_background_model()
+        rng = np.random.default_rng()
+
+        if self.loaded_background_model is None:
+            raise RuntimeError(
+                "Monte Carlo background is not loaded. Call `load_background_model` before `simulate_background`."
+            )
 
-        # base = self.get_background_model()
+        n_mc = len(self.loaded_background_model["weight"])
 
+        # Total number of events in the MC sample, weighted according to background.
         n_exp = np.sum(self.loaded_background_model["weight"])
-        # n_exp = np.sum(base["weight"])
-
-        # Simulates poisson noise around the expectation value n_inj.
-        n_bkg = np.random.poisson(n_exp)
 
-        # Creates a normalised array of OneWeights
+        # Creates a normalised array of atmospheric weights.
         p_select = self.loaded_background_model["weight"] / n_exp
-        # p_select = base['weight'] / n_exp
-
-        # Creates an array with n_signal entries.
-        # Each entry is a random integer between 0 and no. of sources.
-        # The probability for each integer is equal to the OneWeight of
-        # the corresponding source_path.
-        ind = np.random.choice(
-            len(self.loaded_background_model["ow"]), size=n_bkg, p=p_select
-        )
-        # ind = np.random.choice(len(base['ow']), size=n_bkg, p=p_select)
-
-        # Selects the sources corresponding to the random integer array
+
+        # Simulates poisson noise around the expectation value n_exp.
+        n_bkg = rng.poisson(n_exp)
+
+        # Choose n_bkg from n_mc events according to background weight.
+        ind = rng.choice(n_mc, size=n_bkg, p=p_select)
         sim_bkg = self.loaded_background_model[ind]
-        # sim_bkg = base[ind]
-        sim_bkg = sim_bkg[list(self.get_background_dtype().names)]
-        return sim_bkg
+
+        time_pdf = self.get_time_pdf()
+
+        # Simulates random times
+        sim_bkg["time"] = time_pdf.simulate_times(source=None, n_s=n_bkg)
+
+        # Check that the time pdf evaluates to 1 for all the simulated times.
+        pdf_sum = np.sum(time_pdf.season_f(sim_bkg["time"]))
+        if pdf_sum < n_bkg:
+            raise RuntimeError(
+                f"The time PDF does not evaluate to 1 for all generated event times.\n \
+                The sum of the PDF values over {n_bkg} events is {pdf_sum}.\n \
+                This means the sampling of background times is not reliable and must be fixed."
+            )
+
+        # Reduce the data to the relevant fields for analysis.
+        analysis_keys = list(self.get_background_dtype().names)
+        return sim_bkg[analysis_keys]
 
 
 class NTSeasonNewStyle(NTSeason):
     def get_background_model(self):
-        # in version 3 of the dataset the weights are given as rates in Hz
+        # in version >=3 of the dataset the weights are given as rates in Hz
         # instead of total events in the associated livetime
+        # we deal with this by overwriting the MC set
+        # possibly not the best course of action
         mc = super(NTSeasonNewStyle, self).get_background_model()
         livetime = self.get_time_pdf().get_livetime()
-        mc["astro"] *= livetime * 86400.0
-        mc["weight"] *= livetime * 86400.0
-        mc["prompt"] *= livetime * 86400.0
+        for weight in ("astro", "weight", "prompt"):
+            mc[weight] *= livetime * 86400.0
         return mc