Skip to content

Commit

Permalink
Merge 2f8e57a into 70f00d9
Browse files Browse the repository at this point in the history
  • Loading branch information
Waelthus committed Mar 30, 2022
2 parents 70f00d9 + 2f8e57a commit 9df9013
Show file tree
Hide file tree
Showing 6 changed files with 5,439 additions and 44 deletions.
Binary file modified docs/delta_extraction/data_model/data_model-Data.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/delta_extraction/data_model/data_model-Main.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
5,391 changes: 5,390 additions & 1 deletion docs/delta_extraction/data_model/data_model.drawio

Large diffs are not rendered by default.

26 changes: 22 additions & 4 deletions py/picca/delta_extraction/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@
"lambda max", "lambda max rest frame",
"lambda min", "lambda min rest frame",
"minimum number pixels in forest",
"out dir", "rejection log file"]
"out dir", "rejection log file",
"minimal snr",
"minimal snr pk1d","minimal snr bao3d", #these options are allowed but will be overwritten by minimal snr (only needed to allow running on a .config with default options)
]

defaults = {
"analysis type": "BAO 3D",
Expand All @@ -30,6 +33,8 @@
"lambda min rest frame": 1040.0,
"minimum number pixels in forest": 50,
"rejection log file": "rejection_log.fits.gz",
"minimal snr pk1d": 1,
"minimal snr bao3d": 0,
}

accepted_analysis_type = ["BAO 3D", "PK 1D"]
Expand Down Expand Up @@ -93,6 +98,8 @@ def __init__(self, config):
self.min_num_pix = None
self.out_dir = None
self.rejection_log_file = None
self.min_snr = None

self.__parse_config(config)

# rejection log arays
Expand Down Expand Up @@ -204,6 +211,18 @@ def __parse_config(self, config):
"should en with '.fits' or '.fits.gz'. Found "
f"'{self.rejection_log_file}'")

self.min_snr = config.getfloat("minimal snr")

if self.min_snr is None:
if self.analysis_type == "BAO 3D":
self.min_snr = config.getfloat("minimal snr bao3d")
elif self.analysis_type == "PK 1D":
self.min_snr = config.getfloat("minimal snr pk1d")
if self.min_snr is None:
raise DataError(
"Missing arguments 'minimal snr bao3d' (if 'analysis type' = 'BAO 3D') or ' minimal snr pk1d' (if 'analysis type' = 'Pk1d') required by Data")


def add_to_rejection_log(self, header, size, rejection_status):
"""Adds to the rejection log arrays.
In the log forest headers will be saved along with the forest size and
Expand Down Expand Up @@ -300,13 +319,12 @@ def filter_forests(self):
self.logger.progress(
f"Rejected forest with los_id {forest.los_id} "
"due to finding nan")
elif self.analysis_type=='PK 1D' and forest.mean_snr<1:
#TODO: add variable for the SNR cut, actually only required for constant weights...
elif forest.mean_snr < self.min_snr:
self.add_to_rejection_log(forest.get_header(), forest.flux.size,
f"low SNR ({forest.mean_snr})")
self.logger.progress(
f"Rejected forest with los_id {forest.los_id} "
"due to low SNR")
f"due to low SNR ({forest.mean_snr} < {self.min_snr})")
else:
continue

Expand Down
2 changes: 2 additions & 0 deletions py/picca/tests/delta_extraction/data/.config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ minimum number pixels in forest = 50
mode = spplate
best obs = False
keep bal = False
minimal snr pk1d = 1
minimal snr bao3d = 0

[corrections]
num corrections = 4
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,22 +132,32 @@
"- `type`: Contains the name of the class used to load your data. For example \"DesiData\" or \"SdssData\". Basically can be any data type stored in the folder `py/picca/delta_extraction/data/`. **Type: str, Required: yes**\n",
"- `module name`: Name of the file containing the data type. If the file is named following the convention this should normally not be necessary. **Type: str, Required: no**\n",
"\n",
"Other arguments will be passed to the constructor of the selected class. Let's review the arguments required by the available Data types\n",
"There are some arguments that are passed for any `type`:\n",
"\n",
"### SdssData\n",
"- `analysis type`: Selected analysis type. Current options are \"BAO 3D\" or \"PK 1D\" **Type: str, Required: no, Default: \"BAO 3D\"\n",
"- `best obs`: If True, reads only the best observation for objects with repeated observations **Type: bool, Required: no, Default: False**\n",
"- `BI max`: Maximum value allowed for the Balnicity Index to keep the quasar **Type: float or None, Required: no**\n",
"- `drq catalogue`: Name of the quasar catalogue (in DRQ format) **Type: str, Required: yes** \n",
"- `input directory`: Directory to spectra files. **Type: str, Required: yes**\n",
"- `keep BAL`: If False, remove the quasars flagged as having a Broad Absorption Line. Ignored if bi_max is not None **Type: str, Required: no, Default: False**\n",
"- `max num spec`: Maximum number of spectra to read. None for no maximum. **Type: int or None, Required: no, Default: None**\n",
"- `lambda abs IGM`: Wavelength of the IGM absorber. Must be one of the keys of `ABSORBER_IGM` in `delta_extraction/utils.py`. Used only if `analysis type` is \"PK 1D\". **Type: str, Required: no, Default: LYA**\n",
"- `lambda max`: Upper limit on observed wavelength [Angstrom] **Type: float, Required: no, Default: 5500.0**\n",
"- `lambda max rest frame`: Upper limit on rest frame wavelength [Angstrom] **Type: float, Required: no, Default: 1200.0**\n",
"- `lambda min`: Lower limit on observed wavelength [Angstrom] **Type: float, Required: no, Default: 3600.0**\n",
"- `lambda min rest frame`: Lower limit on rest frame wavelength [Angstrom] **Type: float, Required: no, Default: 1040.0**\n",
"- `minimal snr`: Minimal S/N ratio required for forests to be accepted. **Type: float, Required: no, Default: 0 for `analysis type == BAO 3D` analyses, 1 for `analysis type == Pk 1D`\n",
"- `minimum number pixels in forest`: Minimum number of pixels in a forest. Forests with less pixels will be dropped. **Type: int, Required: no, Default: 50**\n",
"- `max num spec`: Maximum number of spectra to read. None for no maximum. **Type: int or None, Required: no, Default: None**\n",
"- `rejection log file`: Filelame of the rejection log. Must have extension .fits or .fits.gz **Type: str, Required: no, Default: \"rejection_log.fits.gz\"**\n",
"- `z max`: Maximum redshift. Quasars with redshifts higher than or equal to z_max will be discarded. If not specified will be computed at runtime base on the values of `lambda max` and `lambda min rest frame` as $z_{\\rm max} = \\max\\left(0, \\lambda_{\\rm max} / \\lambda_{\\rm min, rf} - 1\\right)$\n",
"- `z min`: Minimum redshift. Quasars with redshifts lower than z_min will be discarded. If not specified will be computed at runtime base on the values of `lambda min` and `lambda max rest frame` as $z_{\\rm min} = \\max\\left(0, \\lambda_{\\rm min} / \\lambda_{\\rm max, rf} - 1\\right)$ **Type: float, Required: False**\n",
"\n",
"Other arguments will be passed to the constructor of the selected class. Let's review the arguments required by the available Data types\n",
"\n",
"### SdssData\n",
"\n",
"will always use `wave solution = log`\n",
"\n",
"- `best obs`: If True, reads only the best observation for objects with repeated observations **Type: bool, Required: no, Default: False**\n",
"- `BI max`: Maximum value allowed for the Balnicity Index to keep the quasar **Type: float or None, Required: no**\n",
"- `drq catalogue`: Name of the quasar catalogue (in DRQ format) **Type: str, Required: yes** \n",
"- `keep BAL`: If False, remove the quasars flagged as having a Broad Absorption Line. Ignored if bi_max is not None **Type: str, Required: no, Default: False**\n",
"- `mode`: Reading mode. Currently supported reading modes are \"spplate\" and \"spec\". **Type: str, Required: no, Default: \"spplate\"**\n",
"- `rebin`: Rebin wavelength grid by combining this number of adjacent pixels (ivar weight). **Type: int, Required: no, Default: 3**\n",
"- `rejection log file`: Filelame of the rejection log. Must have extension .fits or .fits.gz **Type: str, Required: no, Default: \"rejection_log.fits.gz\"**\n",
Expand All @@ -157,42 +167,27 @@
"\n",
"\n",
"### DesiHealpix\n",
"- `analysis type`: Selected analysis type. Current options are \"BAO 3D\" or \"PK 1D\" **Type: str, Required: no, Default: \"BAO 3D\"\n",
"\n",
"- `blinding`: Selected blinding strategy (\"none\" for no blinding). Available strategies listed in `py/picca/delta_extraction/utils.py` in variable `ACCEPTED_BLINDING_STRATEGIES`. For the moment, blinding strategy \"corr_yshift\" is automatically engaged for main survey data, and blinding strategy \"none\" is automatically engaged for SV data or mocks. **Type: str, Required: no, Default: \"corr_yshift\"**\n",
"- `catalogue`: Name the z_truth catalogue. **Type: str, Required: yes**\n",
"- `delta lambda`: Variation of the wavelength (in Angs) between two pixels. Ignored if `wave solution` is \"lin\" **Type: float, Required: no, Default: 1.0**\n",
"- `delta log lambda`: Variation of the logarithm of the wavelength (in Angs) between two pixels. Ignored if `wave solution` is \"log\" **Type: float, Required: no, Default: 1e-4**\n",
"- `input directory`: Directory to spectra files. **Type: str, Required: yes**\n",
"- `keep surveys`: Only keep the entries in the catalogue that have a \"SURVEY\" specified in field. Ignored if \"SURVEY\" column is not present in the catalogue. Multiple values can be added spearated by white spaces. Specifying \"all\" is equivalent to specifying \"sv1 sv2 sv3 main\". Accepted values are \"sv1\", \"sv2\", \"sv3\", \"main\", \"all\" and \"special\" **Type: str, Required: no, Default: \"all\"**\n",
"- `max num spec`: Maximum number of spectra to read. None for no maximum. **Type: int or None, Required: no, Default: None**\n",
"- `lambda abs IGM`: Wavelength of the IGM absorber. Must be one of the keys of `ABSORBER_IGM` in `delta_extraction/utils.py`. Used only if `analysis type` is \"PK 1D\". **Type: str, Required: no, Default: LYA**\n",
"- `lambda max`: Upper limit on observed wavelength [Angstrom] **Type: float, Required: no, Default: 5500.0**\n",
"- `lambda max rest frame`: Upper limit on rest frame wavelength [Angstrom] **Type: float, Required: no, Default: 1200.0**\n",
"- `lambda min`: Lower limit on observed wavelength [Angstrom] **Type: float, Required: no, Default: 3600.0**\n",
"- `lambda min rest frame`: Lower limit on rest frame wavelength [Angstrom] **Type: float, Required: no, Default: 1040.0**\n",
"- `minimum number pixels in forest`: Minimum number of pixels in a forest. Forests with less pixels will be dropped. **Type: int, Required: no, Default: 50**\n",
"- `rejection log file`: Filelame of the rejection log. Must have extension .fits or .fits.gz **Type: str, Required: no, Default: \"rejection_log.fits.gz\"**\n",
"- `wave solution`: Use linear (lin) or logarithmic wavelength solution. **Type: str, Required: no, Default: \"lin\"**\n",
"- `z max`: Maximum redshift. Quasars with redshifts higher than or equal to z_max will be discarded. If not specified will be computed at runtime base on the values of `lambda max` and `lambda min rest frame` as $z_{\\rm max} = \\max\\left(0, \\lambda_{\\rm max} / \\lambda_{\\rm min, rf} - 1\\right)$\n",
"- `z min`: Minimum redshift. Quasars with redshifts lower than z_min will be discarded. If not specified will be computed at runtime base on the values of `lambda min` and `lambda max rest frame` as $z_{\\rm min} = \\max\\left(0, \\lambda_{\\rm min} / \\lambda_{\\rm max, rf} - 1\\right)$ **Type: float, Required: False**\n",
"- `delta lambda`: Variation of the wavelength (in Angs) between two pixels. Ignored if `wave solution` is \"lin\" **Type: float, Required: no, Default: 1.0**\n",
"- `delta log lambda`: Variation of the logarithm of the wavelength (in Angs) between two pixels. Ignored if `wave solution` is \"log\" **Type: float, Required: no, Default: 1e-4**\n",
"\n",
"\n",
"\n",
"### DesiTile\n",
"- `analysis type`: Selected analysis type. Current options are \"BAO 3D\" or \"PK 1D\" **Type: str, Required: no, Default: \"BAO 3D\"\n",
"\n",
"- `blinding`: Selected blinding strategy (\"none\" for no blinding). Available strategies listed in `py/picca/delta_extraction/utils.py` in variable `ACCEPTED_BLINDING_STRATEGIES`. For the moment, blinding strategy \"corr_yshift\" is automatically engaged for main survey data, and blinding strategy \"none\" is automatically engaged for SV data or mocks. **Type: str, Required: no, Default: \"corr_yshift\"**\n",
"- `catalogue`: Name the z_truth catalogue. **Type: str, Required: yes**\n",
"- `delta lambda`: Variation of the wavelength (in Angs) between two pixels. Ignored if `wave solution` is \"lin\" **Type: float, Required: no, Default: 1.0**\n",
"- `delta log lambda`: Variation of the logarithm of the wavelength (in Angs) between two pixels. Ignored if `wave solution` is \"log\" **Type: float, Required: no, Default: 1e-4**\n",
"- `input directory`: Directory to spectra files. **Type: str, Required: yes**\n",
"- `keep surveys`: Only keep the entries in the catalogue that have a \"SURVEY\" specified in field. Ignored if \"SURVEY\" column is not present in the catalogue. Multiple values can be added spearated by white spaces. Specifying \"all\" is equivalent to specifying \"sv1 sv2 sv3 main\". Accepted values are \"sv1\", \"sv2\", \"sv3\", \"main\", \"all\" and \"special\" **Type: str, Required: no, Default: \"all\"**\n",
"- `max num spec`: Maximum number of spectra to read. None for no maximum. **Type: int or None, Required: no, Default: None**\n",
"- `lambda abs IGM`: Wavelength of the IGM absorber. Must be one of the keys of `ABSORBER_IGM` in `delta_extraction/utils.py`. Used only if `analysis type` is \"PK 1D\". **Type: str, Required: no, Default: LYA**\n",
"- `lambda max`: Upper limit on observed wavelength [Angstrom] **Type: float, Required: no, Default: 5500.0**\n",
"- `lambda max rest frame`: Upper limit on rest frame wavelength [Angstrom] **Type: float, Required: no, Default: 1200.0**\n",
"- `lambda min`: Lower limit on observed wavelength [Angstrom] **Type: float, Required: no, Default: 3600.0**\n",
"- `lambda min rest frame`: Lower limit on rest frame wavelength [Angstrom] **Type: float, Required: no, Default: 1040.0**\n",
"- `minimum number pixels in forest`: Minimum number of pixels in a forest. Forests with less pixels will be dropped. **Type: int, Required: no, Default: 50**\n",
"- `rejection log file`: Filelame of the rejection log. Must have extension .fits or .fits.gz **Type: str, Required: no, Default: \"rejection_log.fits.gz\"**\n",
"- `use all`: Rread using the all directory. **Type: bool, Required: no, Default: False**\n",
"- `use all`: Read using the all directory. **Type: bool, Required: no, Default: False**\n",
"- `use single nights`: Read using only nights specified within the cat **Type: bool, Required: no, Default: False**\n",
"- `wave solution`: Use linear (lin) or logarithmic wavelength solution. **Type: str, Required: no, Default: \"lin\"**\n",
"- `z max`: Maximum redshift. Quasars with redshifts higher than or equal to z_max will be discarded. If not specified will be computed at runtime base on the values of `lambda max` and `lambda min rest frame` as $z_{\\rm max} = \\max\\left(0, \\lambda_{\\rm max} / \\lambda_{\\rm min, rf} - 1\\right)$\n",
Expand All @@ -204,16 +199,7 @@
"- `catalogue`: Name the z_truth catalogue. **Type: str, Required: yes**\n",
"- `delta lambda`: Variation of the wavelength (in Angs) between two pixels. Ignored if `wave solution` is \"lin\" **Type: float, Required: no, Default: 1.0**\n",
"- `delta log lambda`: Variation of the logarithm of the wavelength (in Angs) between two pixels. Ignored if `wave solution` is \"log\" **Type: float, Required: no, Default: 1e-4**\n",
"- `input directory`: Directory to spectra files. **Type: str, Required: yes**\n",
"- `keep surveys`: Only keep the entries in the catalogue that have a \"SURVEY\" specified in field. Ignored if \"SURVEY\" column is not present in the catalogue. Multiple values can be added spearated by white spaces. Specifying \"all\" is equivalent to specifying \"sv1 sv2 sv3 main\". Accepted values are \"sv1\", \"sv2\", \"sv3\", \"main\", \"all\" and \"special\" **Type: str, Required: no, Default: \"all\"**\n",
"- `max num spec`: Maximum number of spectra to read. None for no maximum. **Type: int or None, Required: no, Default: None**\n",
"- `lambda abs IGM`: Wavelength of the IGM absorber. Must be one of the keys of `ABSORBER_IGM` in `delta_extraction/utils.py`. Used only if `analysis type` is \"PK 1D\". **Type: str, Required: no, Default: LYA**\n",
"- `lambda max`: Upper limit on observed wavelength [Angstrom] **Type: float, Required: no, Default: 5500.0**\n",
"- `lambda max rest frame`: Upper limit on rest frame wavelength [Angstrom] **Type: float, Required: no, Default: 1200.0**\n",
"- `lambda min`: Lower limit on observed wavelength [Angstrom] **Type: float, Required: no, Default: 3600.0**\n",
"- `lambda min rest frame`: Lower limit on rest frame wavelength [Angstrom] **Type: float, Required: no, Default: 1040.0**\n",
"- `rejection log file`: Filelame of the rejection log. Must have extension .fits or .fits.gz **Type: str, Required: no, Default: \"rejection_log.fits.gz\"**\n",
"- `minimum number pixels in forest`: Minimum number of pixels in a forest. Forests with less pixels will be dropped. **Type: int, Required: no, Default: 50**\n",
"- `wave solution`: Use linear (lin) or logarithmic wavelength solution. **Type: str, Required: no, Default: \"lin\"**\n",
"- `z max`: Maximum redshift. Quasars with redshifts higher than or equal to z_max will be discarded. If not specified will be computed at runtime base on the values of `lambda max` and `lambda min rest frame` as $z_{\\rm max} = \\max\\left(0, \\lambda_{\\rm max} / \\lambda_{\\rm min, rf} - 1\\right)$\n",
"- `z min`: Minimum redshift. Quasars with redshifts lower than z_min will be discarded. If not specified will be computed at runtime base on the values of `lambda min` and `lambda max rest frame` as $z_{\\rm min} = \\max\\left(0, \\lambda_{\\rm min} / \\lambda_{\\rm max, rf} - 1\\right)$ **Type: float, Required: False**"
Expand Down Expand Up @@ -372,7 +358,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.5"
"version": "3.8.12"
},
"toc": {
"base_numbering": 1,
Expand Down

0 comments on commit 9df9013

Please sign in to comment.