Skip to content

Commit

Permalink
Fix NorthernTracks MC time simulation (#253)
Browse files Browse the repository at this point in the history
  • Loading branch information
mlincett committed Mar 27, 2023
1 parent a649b86 commit e550ab5
Show file tree
Hide file tree
Showing 4 changed files with 348 additions and 45 deletions.
262 changes: 262 additions & 0 deletions examples/ipython_notebooks/NorthernTracks_MCtime_Benchmark.ipynb

Large diffs are not rendered by default.

29 changes: 19 additions & 10 deletions flarestack/core/time_pdf.py
Expand Up @@ -159,8 +159,9 @@ def product_integral(self, t, source):

return f

def inverse_interpolate(self, source):
"""Calculates the values for the integral of the signal PDF within
def inverse_cumulative(self, source):
"""Calculates the inverse cumulative of the time PDF.
First, calculates the values for the integral of the signal PDF within
the season. Then rescales these values, such that the start of the
season yields 0, and then end of the season yields 1. Creates a
function to interpolate between these values. Then, for a number
Expand All @@ -172,10 +173,18 @@ def inverse_interpolate(self, source):
"""
max_int = self.product_integral(self.sig_t1(source), source)
min_int = self.product_integral(self.sig_t0(source), source)

logger.debug("Integral of the signal PDF:")
logger.debug(f"F({self.sig_t0(source)}) = {min_int}")
logger.debug(f"F({self.sig_t1(source)}) = {max_int}")

fraction = max_int - min_int

# number of points for the basis of interpolation
n_points = int(1e4)

t_range = np.linspace(
float(self.sig_t0(source)), float(self.sig_t1(source)), int(1e4)
float(self.sig_t0(source)), float(self.sig_t1(source)), n_points
)
cumu = (self.product_integral(t_range, source) - min_int) / fraction

Expand All @@ -196,7 +205,7 @@ def simulate_times(self, source, n_s):
:param n_s: Number of event times to be simulated
:return: Array of times in MJD for a given source
"""
f = self.inverse_interpolate(source)
f = self.inverse_cumulative(source)

sims = f(np.random.uniform(0.0, 1.0, n_s))

Expand Down Expand Up @@ -646,12 +655,6 @@ def __init__(self, t_pdf_dict, livetime_pdf=None):
self.livetime_to_mjd,
) = self.parse_list()

def parse_list(self):
t0 = min(self.on_off_list["start"])
t1 = max(self.on_off_list["stop"])
livetime = np.sum(self.on_off_list["length"])
return t0, t1, livetime

def f(self, t, source=None):
return self.season_f(t) / self.livetime

Expand All @@ -673,6 +676,12 @@ def get_livetime(self):
def get_mjd_conversion(self):
return self.mjd_to_livetime, self.livetime_to_mjd

def signal_integral(self, t, source=None):
return self.mjd_to_livetime(t) / self.get_livetime()

def inverse_cumulative(self, source=None):
return lambda t: self.livetime_to_mjd(t * self.get_livetime())


@TimePDF.register_subclass("decay")
class DecayPDF(TimePDF):
Expand Down
27 changes: 20 additions & 7 deletions flarestack/data/icecube/ic_season.py
Expand Up @@ -116,18 +116,25 @@ class IceCubeRunList(DetectorOnOffList):
"""

def parse_list(self):
"""Parses the GoodRunList to build a TimePDF
Returns:
t0, t1: min and max time of the GRL
full_livetime: livetime
season_f: interpolating function returning 1 or 0 as a function of time
mjd_to_livetime: function to convert a mjd to livetime [s]
livetime_to_mjd: function to convert a livetime [s] to mjd
"""
if list(self.on_off_list["run"]) != sorted(list(self.on_off_list["run"])):
logger.error("Error in ordering GoodRunList!")
logger.error("Runs are out of order!")
self.on_off_list = np.sort(self.on_off_list, order="run")

if self.t_dict.get("expect_gaps_in_grl", True):

mask = self.on_off_list["stop"][:-1] == self.on_off_list["start"][1:]

if np.sum(mask) > 0:

first_run = self.on_off_list["run"][:-1][mask][0]

logger.warning(
Expand All @@ -146,7 +153,6 @@ def parse_list(self):
)

while np.sum(mask) > 0:

index = list(mask).index(True)

self.on_off_list[index]["stop"] = self.on_off_list[index + 1][
Expand All @@ -170,7 +176,6 @@ def parse_list(self):
mask = self.on_off_list["stop"][:-1] < self.on_off_list["start"][1:]

if np.sum(~mask) > 0:

first_run = self.on_off_list["run"][:-1][~mask][0]

logger.error("The IceCube GoodRunList was not produced correctly.")
Expand All @@ -196,7 +201,6 @@ def parse_list(self):
logger.error("You have been warned!")

while np.sum(~mask) > 0:

index = list(~mask).index(True)

self.on_off_list[index]["stop"] = self.on_off_list[index + 1][
Expand All @@ -223,10 +227,14 @@ def parse_list(self):
step = 1e-12

t_range = [t0 - step]

f = [0.0]

# MJD timestaps marking start and stop time of each run
mjd = [0.0]
# cumulative livetime at each timestamp [unit to be checked]
livetime = [0.0]
# cumulative sum of run lengths [unit to be checked]
total_t = 0.0

for i, run in enumerate(self.on_off_list):
Expand All @@ -236,6 +244,8 @@ def parse_list(self):
mjd.append(run["stop"])
livetime.append(total_t)

# extends t_range and f with a box function of value 1 between the start and stop of the run
# adds zero values at times immediately adjacent to the start and stop of the run
t_range.extend(
[run["start"] - step, run["start"], run["stop"], run["stop"] + step]
)
Expand All @@ -261,11 +271,15 @@ def parse_list(self):
f"Runs in GoodRunList are out of order for {self.on_off_list}. Check that!"
)

# end of the livetime function domain
mjd.append(1e5)

livetime.append(total_t)

season_f = interp1d(stitch_t, np.array(stitch_f), kind="linear")
# cumulative livetime [[unit to be checked]] as a function of the date [mjd]
mjd_to_livetime = interp1d(mjd, livetime, kind="linear")
# date [mjd] at which a given livetime [unit to be checked] is reached
livetime_to_mjd = interp1d(livetime, mjd, kind="linear")
return t0, t1, full_livetime, season_f, mjd_to_livetime, livetime_to_mjd

Expand Down Expand Up @@ -313,8 +327,7 @@ def load_data(self, path, **kwargs):

def build_time_pdf_dict(self):
"""Function to build a pdf for the livetime of the season. By
default, this is assumed to be uniform, spanning from the first to
the last event found in the data.
default, this exploits the good run list (GRL)
:return: Time pdf dictionary
"""
Expand Down
75 changes: 47 additions & 28 deletions flarestack/data/icecube/northern_tracks/__init__.py
Expand Up @@ -50,50 +50,69 @@ def get_diffuse_binning(season):


class NTSeason(IceCubeSeason):
def get_background_model(self):
def get_background_model(self) -> dict:
"""Loads Monte Carlo dataset from file according to object path set in object properties.
Returns:
dict: Monte Carlo data set.
"""
mc = self.load_data(self.mc_path, cut_fields=False)
# According to NT specifications (README):
# "conv" gives the weight for conventional atmospheric neutrinos
# flarestack renames it to "weight"
mc = rename_fields(mc, {"conv": "weight"})
return mc

def simulate_background(self):
if isinstance(self.loaded_background_model, type(None)):
self.load_background_model()
rng = np.random.default_rng()

if self.loaded_background_model is None:
raise RuntimeError(
"Monte Carlo background is not loaded. Call `load_background_model` before `simulate_background`."
)

# base = self.get_background_model()
n_mc = len(self.loaded_background_model["weight"])

# Total number of events in the MC sample, weighted according to background.
n_exp = np.sum(self.loaded_background_model["weight"])
# n_exp = np.sum(base["weight"])

# Simulates poisson noise around the expectation value n_inj.
n_bkg = np.random.poisson(n_exp)

# Creates a normalised array of OneWeights
# Creates a normalised array of atmospheric weights.
p_select = self.loaded_background_model["weight"] / n_exp
# p_select = base['weight'] / n_exp

# Creates an array with n_signal entries.
# Each entry is a random integer between 0 and no. of sources.
# The probability for each integer is equal to the OneWeight of
# the corresponding source_path.
ind = np.random.choice(
len(self.loaded_background_model["ow"]), size=n_bkg, p=p_select
)
# ind = np.random.choice(len(base['ow']), size=n_bkg, p=p_select)

# Selects the sources corresponding to the random integer array

# Simulates poisson noise around the expectation value n_exp.
n_bkg = rng.poisson(n_exp)

# Choose n_bkg from n_mc events according to background weight.
ind = rng.choice(n_mc, size=n_bkg, p=p_select)
sim_bkg = self.loaded_background_model[ind]
# sim_bkg = base[ind]
sim_bkg = sim_bkg[list(self.get_background_dtype().names)]
return sim_bkg

time_pdf = self.get_time_pdf()

# Simulates random times
sim_bkg["time"] = time_pdf.simulate_times(source=None, n_s=n_bkg)

# Check that the time pdf evaluates to 1 for all the simulated times.
pdf_sum = np.sum(time_pdf.season_f(sim_bkg["time"]))
if pdf_sum < n_bkg:
raise RuntimeError(
f"The time PDF does not evaluate to 1 for all generated event times.\n \
The sum of the PDF values over {n_bkg} events is {pdf_sum}.\n \
This means the sampling of background times is not reliable and must be fixed."
)

# Reduce the data to the relevant fields for analysis.
analysis_keys = list(self.get_background_dtype().names)
return sim_bkg[analysis_keys]


class NTSeasonNewStyle(NTSeason):
def get_background_model(self):
# in version 3 of the dataset the weights are given as rates in Hz
# in version >=3 of the dataset the weights are given as rates in Hz
# instead of total events in the associated livetime
# we deal with this by overwriting the MC set
# possibly not the best course of action
mc = super(NTSeasonNewStyle, self).get_background_model()
livetime = self.get_time_pdf().get_livetime()
mc["astro"] *= livetime * 86400.0
mc["weight"] *= livetime * 86400.0
mc["prompt"] *= livetime * 86400.0
for weight in ("astro", "weight", "prompt"):
mc[weight] *= livetime * 86400.0
return mc

0 comments on commit e550ab5

Please sign in to comment.