# This example notebook, uses the provided OGIP files, to plot the Spectral Energy Distribution and Differential Spectrum

One can follow the tutorial from gammapy here https://docs.gammapy.org/0.18.2/tutorials/spectrum_analysis.html

# This notebook follows the following steps:
## 1. Get the dataset from OGIP files
## 2. Get the reference energy for a Log Parabola model Fit
## 3. Perform Modeling and Fitting and check some statistics
## 4. Plot the various plots
## 5. Save Flux Points Dataset and Models to separate files

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec

import pickle
from pathlib import Path

import astropy.units as u
from astropy.time import Time
from astropy.io import fits
from astropy.table import Table
import numpy as np

from gammapy.maps import MapAxis
from gammapy.modeling import Fit
from gammapy.modeling.models import (
    Models,
    PowerLawSpectralModel,
    LogParabolaSpectralModel,
    create_crab_spectral_model,
    SkyModel,
)
from gammapy.datasets import Datasets, SpectrumDataset, SpectrumDatasetOnOff, FluxPointsDataset

from gammapy.estimators import FluxPointsEstimator, FluxPoints

# 1. Get the OGIP files and make some selections if need be

In [None]:
base_dir="../data/"
dir_path="DL3/Crab_src_indep/" # DL3/BLLac_src_dep

ogip_path=Path(base_dir+dir_path+"OGIP/")
plot_path=Path(base_dir+dir_path+"plots/") # Optional directory to store plots

# Create the Paths if they do not exist already
ogip_path.mkdir(exist_ok=True)
plot_path.mkdir(exist_ok=True)

In [None]:
# Using the full standard_dataset as provided
use_standard_data = True

In [None]:
# read all obs ids of the pha files in the given directory
if use_standard_data:
    obs_ids = [2967, 2968, 2969, 2970, 2971, 2972, 2973, 2974, 2975, 2976, 2977] # For Crab
    # [5552, 5553, 5554, 5555, 5556, 5557, 5558, 5559] # for BL Lac
else:
    # Get all the OGIP files in the provided location
    obs_ids = []
    pha_files = list(ogip_path.glob("pha_obs*.fits"))
    
    for p in pha_files:
        run = int(p.name[7:-5])
        obs_ids.append(run)
    obs_ids = np.sort(np.array(obs_ids))

In [None]:
# Generate the Datasets object back from the OGIP files
datasets = Datasets()
for obs in obs_ids:
    file = ogip_path / f"pha_obs{obs}.fits"
    datasets.append(SpectrumDatasetOnOff.from_ogip_files(file))

# 2. Get some parameters to use from the datasets

In [None]:
obj_name = []

for obs in obs_ids:
    file = ogip_path / f"pha_obs{obs}.fits"
    r = Table.read(file, hdu="REGION").meta
    
    obj_name.append(r["OBJECT"])
print(obs_ids)

obj_name = np.unique(np.array(obj_name))[0]

In [None]:
e_reco_edges = datasets[0].counts.geom.axes["energy"].edges

In [None]:
# Energy range for spectral fitting the dataset.
# One can play with different energy bins to check the different spectral fits
e_fit_min = 0.01 * u.TeV
e_fit_max = 40 * u.TeV

# Using bins per decade
e_fit_bin_p_dec = 5

# Calculating the bin size in log scale for the given number of bins per decade
e_fit_bin = int(round((np.log10(e_fit_max.value) - np.log10(e_fit_min.value)) * e_fit_bin_p_dec + 1, 0))

energy_fit_edges = np.logspace(np.log10(e_fit_min.value), np.log10(e_fit_max.value), e_fit_bin) * u.TeV

# 3. Get Pivot energy to fix the reference energy and define the Spectrum Model

In [None]:
# Find pivot (decorrelation) energy for a Power Law model to get the reference energy for Log Parabola model
def get_pivot_energy(datasets, e_ref, e_edges, obj_name):
    """
    Using Power Law spectral model with the given reference energy and 
    get the decorrelation energy of the fit, within the fit energy range, e_edges
    """
    spectral_model = PowerLawSpectralModel(
        index=2, amplitude=2e-11 * u.Unit("cm-2 s-1 TeV-1"), reference=e_ref
    )
    model = SkyModel(spectral_model=spectral_model, name=obj_name)
    model_check = model.copy()

    # Stacked dataset method
    stacked_dataset = Datasets(datasets).stack_reduce()
    stacked_dataset.models = model_check

    fit_stacked = Fit(stacked_dataset)
    result_stacked = fit_stacked.run()

    return model_check.spectral_model.pivot_energy


In [None]:
%%time
ref = get_pivot_energy(datasets, 0.4 * u.TeV, e_reco_edges, obj_name)
print(ref.to_value(u.GeV))

In [None]:
# Final spectral model of Log Parabola, to be used for estimating the LC.
# One can try different Spectral Models as well.
# Be careful in the choice of Spectral Model being used for the 2 examples presented here

# Crab
spectral_model_lp = LogParabolaSpectralModel(
        amplitude = 5e-12 * u.Unit('cm-2 s-1 TeV-1'),
        reference = ref,
        alpha = 2 * u.Unit(''),
        beta = 0.1 * u.Unit('')
)
model_lp = SkyModel(spectral_model=spectral_model_lp, name=obj_name)

#BL Lac
spectral_model_lp_bllac = LogParabolaSpectralModel(
        amplitude = 3e-8 * u.Unit('cm-2 s-1 TeV-1'),
        reference = 0.1 * u.TeV,
        alpha = 2 * u.Unit(''),
        beta = 0.2 * u.Unit('')
)
model_lp_bllac = SkyModel(spectral_model=spectral_model_lp_bllac, name=obj_name)

In [None]:
# Use the appropriate models, as per the selection of the source/dataset
params=model_lp.to_dict()['spectral']['parameters']
# params=model_lp_bllac.to_dict()['spectral']['parameters']
params

# 4. Spectral Fitting
One can check for a more comprehensive tutorial on Modelling and Fitting, here is the gammapy tutorial https://docs.gammapy.org/0.18.2/tutorials/modeling.html

In [None]:
%%time
# Using stacked analysis method, where we stack together all Datasets into 1 Dataset and add the model afterwards
stacked_dataset = Datasets(datasets).stack_reduce()
stacked_dataset.models = model_lp

In [None]:
%%time
# Fitting the model to the dataset
fit = Fit([stacked_dataset])
result = fit.run()
model_best = model_lp.copy() # creating a copy of the model, to avoid overwriting of the original object

In [None]:
%%time
# Compute the Flux Points after Fitting the model
# We do not do too many optimizations here. 
# If one wants, can try and check the various attributes of the Estimator
fpe = FluxPointsEstimator(
    energy_edges=energy_fit_edges, 
    reoptimize = False
)
flux_points = fpe.run(datasets=stacked_dataset)
    
# Define the flux points with just upper limits with a threshold on TS value
flux_points.table["is_ul"] = flux_points.table["ts"] < 4

flux_points_dataset = FluxPointsDataset(
    data=flux_points, models=model_best
)

In [None]:
result

In [None]:
model_best.to_dict()['spectral']['parameters']

# 5. Check some features of the Flux points

In [None]:
# Check the Flux table
flux_points.table_formatted

In [None]:
# Fit Statistic array
print(flux_points_dataset.stat_array())

# Total statistics sum
print(flux_points_dataset.stat_sum())

In [None]:
model_best.parameters.to_table()

# 6. Spectral plots

In [None]:
if obj_name == 'Crab':
    ref_label="MAGIC LP (JHEAp 2015)"
else:
    ref_label="Crab MAGIC LP (JHEAp 2015)"

In [None]:
%%time
# Setting plot axes limits and other args
e_plot_min = 0.01 * u.TeV
e_plot_max = 50 * u.TeV

flux_plot_min = 7e-12
flux_plot_max = 2e-10

plot_kwargs = {
    "energy_range": [e_plot_min, e_plot_max],
    "energy_power": 2,
    "flux_unit": "erg-1 cm-2 s-1",
}
plot_en_kwargs = {"energy_range": [e_plot_min, e_plot_max]}
plot_ts_kwargs = {
    "energy_power": 2,
    "flux_unit": "erg-1 cm-2 s-1",
    "color": "darkorange"
}

In [None]:
# TS profile plot

# Replace values of norm_scan from all nans to be the same as ones without it
for i in np.arange(len(flux_points.table)):
    if np.isnan(flux_points.table["norm_scan"][i]).all():
        flux_points.table["norm_scan"][i] = flux_points.table[
            np.where(flux_points.table["success"] == True)[0]
        ]["norm_scan"][0]

plt.figure(figsize=(8,5))
ax = flux_points.plot(**plot_ts_kwargs)

flux_points.to_sed_type("e2dnde").plot_ts_profiles(ax=ax)
plt.grid(which='both')
plt.title('TS Profiles')

In [None]:
# Fit model covariance matrix plot
model_best.covariance.plot_correlation()

In [None]:
fig_sed = plt.figure(figsize=(8,8))

gs2 = GridSpec(7, 1)

gs2.update(hspace=0.1)
args1 = [gs2[:5,:]]
args2 = [gs2[5:,:]]

fig_gs1 = fig_sed.add_subplot(*args1)
fig_gs2 = fig_sed.add_subplot(*args2)

FluxPointsDataset(data=flux_points, models=model_best).plot_spectrum(ax=fig_gs1, label="LST-1 data")

create_crab_spectral_model("magic_lp").plot(
    ax=fig_gs1, **plot_kwargs, label=ref_label
)

fig_gs1.legend()
fig_gs1.set_xlim(e_plot_min.value, e_plot_max.value)
#fig_gs1.set_ylim(5e-12, 5e-10)
fig_gs1.tick_params(labelbottom=False)

fig_gs1.grid(which='both')
fig_gs1.set_title('SED')

flux_points_dataset.plot_residuals(ax=fig_gs2, method='diff/model')
fig_gs2.grid(which='both')
fig_gs2.set_xlim(e_plot_min.value, e_plot_max.value)
fig_gs2.set_ylim(-1,1)

In [None]:
fig = plt.figure(figsize=(8,7))
gs = GridSpec(7, 1)

args1 = [gs[:5,:]]
args2 = [gs[5:,:]]
kwargs_res = {"method": "diff", "region": stacked_dataset.counts.geom.region}

fig_gs1 = fig.add_subplot(*args1)
fig_gs2 = fig.add_subplot(*args2)

stacked_dataset.plot_excess(fig_gs1)
fig_gs1.grid(which="both")
fig_gs1.set_ylabel("Excess")

stacked_dataset.plot_residuals_spectral(fig_gs2, **kwargs_res)
fig_gs2.grid(which="both")

fig_gs2.set_ylabel(f"Residuals\n data-model")

In [None]:
plt.figure(figsize=(8,5))

flux_points.plot(label='Joint flux')
create_crab_spectral_model("magic_lp").plot(**plot_en_kwargs, label=ref_label)
plt.grid(which='both')
plt.legend()
plt.title('Differential spectrum')

# 7. Save the Flux Points Dataset and Model to separate files

In [None]:
flux_points.write(
    base_dir + dir_path + f'{obj_name}_dataset_{datasets[0].name}_to_{datasets[-1].name}_flux_pts.fits', 
    overwrite=True
)

# write the model to a dict and then to a file
model_dict = model_best.to_dict(full_output='True')

f = open(base_dir + dir_path + f'{obj_name}_dataset_{datasets[0].name}_to_{datasets[-1].name}_flux_model_dict.dat', 'wb')

pickle.dump(model_dict, f)
f.close()