## Data preparation notebook

The observation project with GMOS results in a large set of files, including scientific data and a series of auxiliary data and metadata. This notebook contains procedures for familiarizing oneself with the files and the relationships between them, and also ensures that all necessary data sources for the project are available.

In [8]:
import bz2
import pandas as pd
from astroquery.gemini import Observations

In [2]:
# path = "/gmos/gs-2022b-q-202/"
path = ""

#### Define util functions

In [3]:
def download_files(filenames):
    for file in filenames:
        Observations.get_file(filename=file, download_dir=f"{path}raw/")

        decompressed_filename = file.replace(".bz2", "")
        with bz2.open(f"{path}raw/{file}", "rb") as compressed_file, open(
            f"{path}raw/{decompressed_filename}", "wb"
        ) as decompressed_file:
            decompressed_file.write(compressed_file.read())
        print(f"{file} decompressed to {decompressed_filename}")

In [4]:
def query_metadata(filename):
    metadata = Observations.query_criteria(filename=filename).to_pandas()
    # metadata = metadata.loc[:,['name', 'exposure_time', 'ut_datetime', 'observation_id', 'data_label', 'object', 'observation_class', 'observation_type', 'central_wavelength']]
    metadata = metadata.to_dict(orient="records")
    return metadata[0]

#### Make data catalogs

In [None]:
with open(f"{path}files.txt", "r") as f:
    files = f.readlines()

files = [filename.strip() for filename in files]
downloaded_catalog = pd.DataFrame([query_metadata(file) for file in files])
downloaded_catalog.to_csv(f"{path}data_catalog.csv", index=False)

In [10]:
program_catalog = Observations.query_criteria(program_id="GS-2022B-Q-202").to_pandas()
program_catalog["ut_datetime"] = pd.to_datetime(program_catalog["ut_datetime"])
program_catalog["exposure_time"] = program_catalog["exposure_time"].astype(float)

#### Explore science data

In [11]:
downloaded_catalog = pd.read_csv(f"{path}data_catalog.csv")
downloaded_catalog["ut_datetime"] = pd.to_datetime(downloaded_catalog["ut_datetime"])

In [12]:
science = program_catalog.loc[
    (
        (program_catalog["observation_class"] == "science")
        | (program_catalog["observation_class"] == "partnerCal")
    )
].reset_index()

In [13]:
science.loc[
    :,
    [
        "filename",
        "data_label",
        "object",
        "observation_type",
        "observation_class",
        "ut_datetime",
        "central_wavelength",
        "exposure_time",
    ],
].sort_values("object")

Unnamed: 0,filename,data_label,object,observation_type,observation_class,ut_datetime,central_wavelength,exposure_time
0,S20220918S0057.fits.bz2,GS-2022B-Q-202-7-001,CD-34 241,OBJECT,partnerCal,2022-09-18 02:44:13.200,0.575,300.0
2,S20220918S0059.fits.bz2,GS-2022B-Q-202-7-003,CD-34 241,OBJECT,partnerCal,2022-09-18 02:53:37.200,0.575,600.0
3,S20220918S0060.fits.bz2,GS-2022B-Q-202-7-005,CD-34 241,OBJECT,partnerCal,2022-09-18 03:05:47.200,0.605,600.0
22,S20220929S0064.fits.bz2,GS-2022B-Q-202-21-008,GCALflat,FLAT,partnerCal,2022-09-29 06:33:15.200,0.605,90.0
21,S20220929S0063.fits.bz2,GS-2022B-Q-202-21-007,GCALflat,FLAT,partnerCal,2022-09-29 06:30:37.200,0.575,90.0
18,S20220929S0060.fits.bz2,GS-2022B-Q-202-21-004,GCALflat,FLAT,partnerCal,2022-09-29 05:55:48.200,0.605,90.0
17,S20220929S0059.fits.bz2,GS-2022B-Q-202-21-003,GCALflat,FLAT,partnerCal,2022-09-29 05:53:10.200,0.575,90.0
11,S20220929S0050.fits.bz2,GS-2022B-Q-202-28-007,GCALflat,FLAT,partnerCal,2022-09-29 04:19:31.200,0.575,90.0
12,S20220929S0051.fits.bz2,GS-2022B-Q-202-28-008,GCALflat,FLAT,partnerCal,2022-09-29 04:22:09.200,0.605,90.0
7,S20220929S0046.fits.bz2,GS-2022B-Q-202-28-003,GCALflat,FLAT,partnerCal,2022-09-29 03:44:44.200,0.575,90.0


#### Prepare star files

In [14]:
(
    program_catalog.loc[
        (program_catalog["observation_class"] == "partnerCal")
        & (program_catalog["ut_datetime"] < "2022-09-29"),
        [
            "filename",
            "data_label",
            "object",
            "observation_type",
            "ut_datetime",
            "central_wavelength",
            "exposure_time",
        ],
    ]
)

Unnamed: 0,filename,data_label,object,observation_type,ut_datetime,central_wavelength,exposure_time
5,S20220918S0057.fits.bz2,GS-2022B-Q-202-7-001,CD-34 241,OBJECT,2022-09-18 02:44:13.200,0.575,300.0
6,S20220918S0058.fits.bz2,GS-2022B-Q-202-7-002,GCALflat,FLAT,2022-09-18 02:50:18.200,0.575,90.0
7,S20220918S0059.fits.bz2,GS-2022B-Q-202-7-003,CD-34 241,OBJECT,2022-09-18 02:53:37.200,0.575,600.0
8,S20220918S0060.fits.bz2,GS-2022B-Q-202-7-005,CD-34 241,OBJECT,2022-09-18 03:05:47.200,0.605,600.0
9,S20220918S0061.fits.bz2,GS-2022B-Q-202-7-006,GCALflat,FLAT,2022-09-18 03:16:52.200,0.605,90.0


In [None]:
# Get filenames from catalog
star_filenames = (
    (
        program_catalog.loc[
            (program_catalog["observation_class"] == "partnerCal")
            & (program_catalog["ut_datetime"] < "2022-09-29"),
            ["filename"],
        ]
    )
    .reset_index()
    .iloc[1:, 1]
    .tolist()
)

# Download and decompress files
download_files(star_filenames)

In [15]:
# Select star bias files and make a list
star_biases = downloaded_catalog.loc[
    (downloaded_catalog["observation_type"] == "BIAS")
    & (downloaded_catalog["ut_datetime"] > "2022-09-12")
    & (downloaded_catalog["ut_datetime"] < "2022-09-24")
    & (downloaded_catalog["detector_binning"] == "2x1")
]
print(f"Files selected: {star_biases.shape[0]}")

biasfiles = star_biases.loc[:, "name"].str.replace(".fits", "").tolist()
with open(f"{path}redux/starbias.lis", "w") as f:
    for bias in biasfiles:
        f.write(f"{bias}\n")

Files selected: 30


#### Prepare lamp files

In [16]:
(
    program_catalog.loc[
        (program_catalog["observation_type"] == "ARC"),  # &
        # (program_catalog['ut_datetime']<'2022-09-29'),
        [
            "filename",
            "data_label",
            "object",
            "observation_type",
            "ut_datetime",
            "central_wavelength",
            "exposure_time",
        ],
    ]
)

Unnamed: 0,filename,data_label,object,observation_type,ut_datetime,central_wavelength,exposure_time
10,S20220918S0170.fits.bz2,GS-2022B-Q-202-5-001,CuAr,ARC,2022-09-18 09:30:35.700,0.58,45.0
11,S20220918S0389.fits.bz2,GS-2022B-Q-202-5-002,CuAr,ARC,2022-09-18 16:12:58.200,0.575,45.0
12,S20220918S0390.fits.bz2,GS-2022B-Q-202-5-003,CuAr,ARC,2022-09-18 16:14:31.700,0.605,45.0
41,S20220929S0097.fits.bz2,GS-2022B-Q-202-29-001,CuAr,ARC,2022-09-29 09:59:15.700,0.575,45.0
42,S20220929S0098.fits.bz2,GS-2022B-Q-202-29-002,CuAr,ARC,2022-09-29 10:00:49.700,0.605,45.0
43,S20220929S0099.fits.bz2,GS-2022B-Q-202-22-001,CuAr,ARC,2022-09-29 10:02:23.700,0.575,45.0
44,S20220929S0100.fits.bz2,GS-2022B-Q-202-22-002,CuAr,ARC,2022-09-29 10:03:56.700,0.605,45.0


In [None]:
lamp_filenames = program_catalog.loc[(program_catalog["observation_type"] == "ARC")][
    "filename"
].to_list()

download_files(lamp_filenames)