# # Lab #1 Images, optics, and the statistics of light
## Convert RAW files to FITS files

In [1]:
# Import necessary packages
import os
import math
import shutil
import struct
import numpy as np
import matplotlib.pyplot as plt
import glob
from astropy.io import fits
from scipy.stats import linregress

In [2]:
raw_bias_directory = "new/raw"
fits_bias_directory = "new/fits"

In [3]:
def raw_to_fits(raw_file, shape=(1080, 1440), destination=None, ADC_bits=12, simu_artifacts=False):
    """
    Converts a RAW file to a FITS formatted image.

    Inputs:
    - raw_file (string): the file path of the RAW image.
    - shape (tuple): a tuple with (# of rows, # of columns) for the image dimensions.
    - destination (string): the output filename for the FITS image. If None, 
                            the '.raw' extension will be replaced with '.fits'.
    - ADC_bits (integer): the bit depth of the analog-to-digital conversion.
    - simu_artifcats (bool): whether to simulate artifcats (hot pixels, flat field, etc).
    """

    # Extract the exposure time (in seconds) from the filename by splitting on 'us.raw'
    # and then converting
    integration_time = float(raw_file.split("us.raw")[0].split("_")[-1]) / 1e6

    # Get the number of rows and columns from the shape input
    number_of_rows, number_of_columns = shape

    # Open the raw file in binary mode ('rb') and read its contents into raw_img
    with open(raw_file, 'rb') as raw:
        raw_img = raw.read()

    # Check the file size to determine whether the file is 8-bit or 16-bit
    filesize = len(raw_img)
    if filesize == number_of_rows * number_of_columns: # 8-bit image
        size = 'B' # 'B' represents unsigned 8-bit integers
        file_bits = 8
    elif filesize == number_of_rows * number_of_columns * 2: # 16-bit image
        size = 'H'
        file_bits = 16
    else:
        # If the filesize doesn't match the expected size, raise an error
        raise ValueError("The image shape provided does not match the length of the file")

    # Create a format string for struct.unpack to interpret the binary data
    # '<' for little-endian or 'H' for 8-bit or 16-bit
    format_string = f"<{number_of_rows*number_of_columns}{size}" 
    
    # Unpack the binary data into a flat numpy array, then reshape it 
    #to match the image dimension
    byte_array = np.array(struct.unpack(format_string, raw_img)).reshape(number_of_rows, number_of_columns)

    # Scale the image if the file bit depth is greater than the ADC bit depth
    if file_bits > ADC_bits:
        byte_array = byte_array / 2**(file_bits - ADC_bits)

    # If simulation of artifcats is enabled, perform several modifications to the image
    if simu_artifacts:
        ny, nx = byte_array.shape # Get the image dimensions

        dark_current = 1.25 # Dark current in (ADU/s)
        byte_array += dark_current * integration_time # Add dark current over the exoposure time

        # Create a flat-field correction (simulating non-uniform response across the sensor)
        x_grid, y_grid = np.meshgrid(np.arange(nx), np.arange(ny)) # Create a grid of coordinates
        flat_field = np.exp(-0.5 * ((x_grid - nx // 2) ** 2 / (3 * nx) ** 2 + (y_grid - ny // 2) ** 2 / (3 * ny) ** 2))
        byte_array *= flat_field # Apply the flat-field correction by multiplying

        # Save current random state and seed it to generate reproducible results for hot and cold pixels
        state = np.random.get_state()
        np.random.seed(0)

        # Simulate hot pixels (bright pixels due to sensor noise)
        hot_pixel_indices = np.random.randint(0, high=nx * ny - 1, size=int(0.01 * nx * ny))  # Random indices for hot pixels
        hot_pixel_indices = np.unravel_index(hot_pixel_indices, (ny, nx))  # Convert flat indices to 2D indices

        # Amplify hot pixels based on a Gaussian distribution and exposure time
        byte_array[hot_pix_indices] *= tint * np.clip(30 * np.random.randn(int(0.01 * nx * ny)), dark_current, 2 ** ADC_bits)

        # Simulate cold pixels (dim pixels)
        cold_pix_indices = np.random.randint(0, high=nx * ny - 1, size=int(0.01 * nx * ny))  # Random indices for cold pixels
        cold_pix_indices = np.unravel_index(cold_pix_indices, (ny, nx))  # Convert flat indices to 2D indices
        byte_array[cold_pix_indices] *= np.random.uniform(0, 0.5, size=int(0.01 * nx * ny))  # Dim cold pixels

        # Restore the random state to ensure that random processes elsewhere in the code remain unaffected
        np.random.set_state(state)

        # Add a constant bias value to the entire image
        byte_array += 64  # Bias offset in ADU

        # Add Gaussian read noise to the image (random noise due to sensor readout)
        readnoise = 3  # Read noise level in ADU
        byte_array += readnoise * np.random.randn(ny, nx)  # Add noise to each pixel

        # Clip the values to ensure they remain within the range of the ADC bits
        byte_array = np.clip(byte_array, 0, 2 ** ADC_bits)

    # Determine the ouput FITS filename
    if destination is None:
        # Replace the .raw extension with .fits if no specific destination is provided
        fits_file = raw_file.replace('.raw', '.fits')
    else:
        # Use the specified destination filename
        fits_file = destination

    # If simulation of artifacts was enabled, append "_simu" to the filename
    if simu_artifacts:
        fits_file = fits_file.replace(".fits", "_simu.fits")

    # Create a FITS file from the numpy array
    hdu = fits.PrimaryHDU(byte_array)  # Create a primary HDU object (Header Data Unit)
    hdu.writeto(fits_file, overwrite=True)  # Write the FITS file to disk, overwriting if necessary


In [4]:
def process_raw_to_fits(raw_directory, fits_directory, shape=(1080, 1440)):
    # Get a list of all .raw files in the directory
    raw_files = glob.glob(os.path.join(raw_directory, "*.raw"))
    print("Found raw files:\n", raw_files)

    # Make sure output directory exists
    if not os.path.exists(fits_directory):
        os.makedirs(fits_directory)
    
    print("Found output directory:\n", fits_directory)

    # Loop through each file and process it
    for raw_file in raw_files:
        # Determine the ouputs FITS filename
        fits_filename = raw_file.replace(".raw", ".fits")

        # Check if the FITS file already exists
        if len(glob.glob(fits_filename)) >= 1:
            print(f"FITS file {fits_filename} already exists. Skipping {fits_filename}.")
            continue

        # Convert raw file to FITS
        #print(f"Converting {raw_file} to FITS format...") # Extra
        raw_to_fits(raw_file, shape=(1080, 1440))
        #print(f"Conversion completed for {raw_file}.") # Extra

        # Move the FITS file to the fits directory
        new_fits_path = os.path.join(fits_directory, os.path.basename(fits_filename))
        shutil.move(fits_filename, new_fits_path)
        print(f"Moved {fits_filename} to {new_fits_path}.\n")

In [5]:
process_raw_to_fits(raw_directory=raw_bias_directory,
                    fits_directory=fits_bias_directory,
                    shape=(1080, 1440))

Found raw files:
 ['new/raw/red_spot_1000us.raw', 'new/raw/ronchi_1000us.raw', 'new/raw/stars_1000us.raw', 'new/raw/jupiter1_1000000us.raw', 'new/raw/jupiter10_1000000us.raw', 'new/raw/jupiter11_1000000us.raw', 'new/raw/jupiter12_1000000us.raw', 'new/raw/jupiter13_1000000us.raw', 'new/raw/jupiter14_1000000us.raw', 'new/raw/jupiter15_1000000us.raw', 'new/raw/jupiter17_1000000us.raw', 'new/raw/jupiter18_1000000us.raw', 'new/raw/jupiter19_1000000us.raw', 'new/raw/jupiter2_1000000us.raw', 'new/raw/jupiter20_1000000us.raw', 'new/raw/jupiter3_1000000us.raw', 'new/raw/jupiter4_1000000us.raw', 'new/raw/jupiter5_1000000us.raw', 'new/raw/jupiter6_1000000us.raw', 'new/raw/jupiter7_1000000us.raw', 'new/raw/jupiter9_1000000us.raw']
Found output directory:
 new/fits
Moved new/raw/red_spot_1000us.fits to new/fits/red_spot_1000us.fits.

Moved new/raw/ronchi_1000us.fits to new/fits/ronchi_1000us.fits.

Moved new/raw/stars_1000us.fits to new/fits/stars_1000us.fits.

Moved new/raw/jupiter1_1000000us.fits