# Image Offsets: Process sequence

This notebook transforms the procedures developed in `Offsets_1.ipynb` into callable functions. 

These functions are used in a loop to process an entire sequence of images. Results are later examined in plots. 

The end product is a series of FITS tables, one per input image, that contain the star offsets in relation to the reference image. These tables should be used in a subsequent notebook to generate the actual arrays with pixel offsets that are used by drizzle to figure out the pixel mapping.

Originally this notebook was developed with the ISO 12800 data set as the test data. In the current version, it is configured to use the ISO 6400 data set. Statements specific to the original data set were comented out.

In [15]:
# %matplotlib widget

import time

import os, glob

import numpy as np
from matplotlib.pyplot import imshow
import matplotlib.pyplot as plt

from astropy.table import Table
from astropy.stats import SigmaClip
from astropy.convolution import Gaussian2DKernel, interpolate_replace_nans

from photutils import centroids
from photutils.aperture import CircularAperture
from photutils.detection import DAOStarFinder
from photutils.background import Background2D, MedianBackground, ModeEstimatorBackground

import rawpy
import exifread

from datapath import DATA

## Initialization

Define values to be used in the processing functions, and throughout the script.

In [2]:
# ISO 12800 data set

# # images to be drizzled 
# data_dirpath = os.path.join(DATA,'astrophotography_data/MilkyWayPrettyBoy/12800/light/')
# image_list = list(glob.glob(data_dirpath + '/*.ARW'))
# image_list.sort()

# # reference image - this will be the image which subsequent images will have their 
# # offsets computed against.
# ref_dirpath = os.path.join(DATA,'astrophotography_data/MilkyWayPrettyBoy/12800/light/')
# reference_fname = os.path.join(ref_dirpath, 'DSC03770.ARW')

# # since the reference image was included in the image list by the glob command, it
# # has to be removed here
# image_list = image_list[1:]

In [3]:
# ISO 6400 data set

# # images to be drizzled 
# data_dirpath = os.path.join(DATA,'astrophotography_data/MilkyWayPrettyBoy/6400/light/')
# image_list = list(glob.glob(data_dirpath + '/*.ARW'))
# image_list.sort()

# # reference image - this will be the image which subsequent images will have their 
# # offsets computed against. We want here that this be the same reference used for the
# # 12800 data set, so both data sets can be drizzled onto a common pixel grid.
# ref_dirpath = os.path.join(DATA,'astrophotography_data/MilkyWayPrettyBoy/12800/light/')
# reference_fname = os.path.join(ref_dirpath, 'DSC03770.ARW')

# # we need to process images in a given order: from the closest (in time) to the reference
# # image, to the most distant.
# image_list.reverse()

# image_list

In [4]:
# Andromeda data set

# images to be drizzled 
data_dirpath = os.path.join(DATA,'astrophotography_data/Andromeda_2022/135mm16s6400ISO')
image_list = list(glob.glob(data_dirpath + '/*.ARW'))
image_list.sort()

# reference image - this will be the image which subsequent images will have their 
# offsets computed against. We want here that this be the same reference used for the
# 12800 data set, so both data sets can be drizzled onto a common pixel grid.
ref_dirpath = os.path.join(DATA,'astrophotography_data/Andromeda_2022/135mm16s6400ISO')
reference_fname = os.path.join(ref_dirpath, 'DSC03798.ARW')

# since the reference image was included in the image list by the glob command, it
# has to be removed here
image_list = image_list[1:]

print(len(image_list))
image_list

899


['/Users/busko/Projects/astrophotography_data/Andromeda_2022/135mm16s6400ISO/DSC03799.ARW',
 '/Users/busko/Projects/astrophotography_data/Andromeda_2022/135mm16s6400ISO/DSC03800.ARW',
 '/Users/busko/Projects/astrophotography_data/Andromeda_2022/135mm16s6400ISO/DSC03801.ARW',
 '/Users/busko/Projects/astrophotography_data/Andromeda_2022/135mm16s6400ISO/DSC03802.ARW',
 '/Users/busko/Projects/astrophotography_data/Andromeda_2022/135mm16s6400ISO/DSC03803.ARW',
 '/Users/busko/Projects/astrophotography_data/Andromeda_2022/135mm16s6400ISO/DSC03804.ARW',
 '/Users/busko/Projects/astrophotography_data/Andromeda_2022/135mm16s6400ISO/DSC03805.ARW',
 '/Users/busko/Projects/astrophotography_data/Andromeda_2022/135mm16s6400ISO/DSC03806.ARW',
 '/Users/busko/Projects/astrophotography_data/Andromeda_2022/135mm16s6400ISO/DSC03807.ARW',
 '/Users/busko/Projects/astrophotography_data/Andromeda_2022/135mm16s6400ISO/DSC03808.ARW',
 '/Users/busko/Projects/astrophotography_data/Andromeda_2022/135mm16s6400ISO/DSC

In [5]:
# normalization factors to get the roundest star images (as per Offsets_1 notebook)
red_norm = 1.6  
blue_norm = 1.39

# parameters to control background subtraction
bkg_cell_footprint = (100, 100)
bkg_filter = (11, 11)

bkg_sigma_clip = SigmaClip(sigma=5.)
bkg_kernel = Gaussian2DKernel(x_stddev=1)
bkg_estimator = ModeEstimatorBackground()

# parameters for star finding
# dao_fwhm = 5.   # ISO 12800
# dao_fwhm = 6.0    # ISO 6400
dao_fwhm = 7.0    # Andromeda
# dao_threshold = 2000.   # ISO 12800
# dao_threshold = 1200.   # ISO 6400
dao_threshold = 600.   # Andromeda
sharplo = 0.1
sharphi = 1.0
roundlo = -0.9 
roundhi = 0.9

# proximity = 3.5   # ISO 12800
# proximity = 4.   # ISO 6400
proximity = 10.   # Andromeda

# tolerance to detect condition that search is stuck
tol = 2.

In [6]:
# read reference image - we need to read it here to get the camera color array specification.
raw = rawpy.imread(reference_fname)
ref_imarray = raw.raw_image_visible.astype(float)

In [7]:
# masks that isolate the RGB pixels - these are camera-dependent and work with all images
colors_array = raw.raw_colors_visible

red_mask = np.where(colors_array == 0, 1, 0)

green_mask_1 = np.where(colors_array == 1, 1, 0)
green_mask_2 = np.where(colors_array == 3, 1, 0)
green_mask = green_mask_1 | green_mask_2

blue_mask = np.where(colors_array == 2, 1, 0)

Using normalizations derived from passband spectral response is *much* better than using normalizations derived from minimization of sky background variance (contrary to the initial finding in notebook Offsets_1). The likely cause is that the spectral-based normalization creates more well-behaved star images. The high variance in sky background doesn't seem to get in the way of detecting stars.

The best run with smooth background on the ISO 12800 data generated 240 detections with a complete data set. The same run but with spectral-based color band normalizations resulted in 550 detections.

## Processing functions

In [8]:
# computes position offsets between two tables. 
def get_offsets(sources, sources_prev):

    sources.add_column(np.nan, name='xoffset')
    sources.add_column(np.nan, name='yoffset')
    sources.add_column(0.0, name='xoffset_prev')
    sources.add_column(0.0, name='yoffset_prev')
    sources.add_column(0, name='ref_row')
    sources.add_column(0, name='prev_row')

    # loop over rows in previous table
    for row_index_prev in range(len(sources_prev)):
        # index in reference table
        ref_row = sources_prev[row_index_prev]['ref_row']

        # if previous table does not contain a pointer to 
        # the reference table, ignore.
        if ref_row == 0:
            continue

        # get position in previous table
        x_prev = sources_prev[row_index_prev]['xcentroid']
        y_prev = sources_prev[row_index_prev]['ycentroid']

        # loop over rows in current table
        for row_index in range(len(sources)):
            x = sources[row_index]['xcentroid']
            y = sources[row_index]['ycentroid']

            # offsets in relation to previous table - these are the ones to check for proximity
            x_off_previous = x - x_prev
            y_off_previous = y - y_prev

            # check for proximity, and store relevant info if found
            if abs(x_off_previous) <= proximity and abs(y_off_previous) <= proximity:

                # offsets in relation to reference table
                sources[row_index]['xoffset'] = x - sources_ref[ref_row]['xcentroid']
                sources[row_index]['yoffset'] = y - sources_ref[ref_row]['ycentroid']

                # offsets in relation to previous table
                sources[row_index]['xoffset_prev'] = x_off_previous
                sources[row_index]['yoffset_prev'] = y_off_previous

                # store pointers to rows in reference and previous tables
                sources[row_index]['ref_row'] = ref_row
                sources[row_index]['prev_row'] = row_index_prev
                
                #TODO 
                # instead of breaking, do an estimate of where the centroid would be,
                # given the current position, and the offsets from the previous table.
                # In other words, repeat the offset from the previous table. See if this
                # will cause the finding algorithm to pick up in the next image.

                break # if there is another star that matches the criterion, just ignore it
                
    return sources

In [9]:
def subtract_background(imarray, red_norm=1.0, blue_norm=1.0):

    # red_norm and blue_norm are normalization parameters applied to the R and B bands (assume
    # G=1) in order to make the star images as well-behaved as possible, in terms of being 
    # well represented, on average, by the daofind Gaussian. Ideally a different normalization 
    # should be applied to each star, depending on its color index, but this will be left as
    # a possible (but not very likely) future improvement. For now, we assume that an average,
    # frame-wide single normalization should suffice (statistically).
    
    # separate color bands
    red_array = imarray * red_mask
    green_array = imarray * green_mask
    blue_array = imarray * blue_mask
    
    # interpolate over the masked pixels in each band, so the background estimator 
    # is presented with a smooth array entirely filled with valid data
    red_array[red_array == 0.0] = np.nan
    green_array[green_array == 0.0] = np.nan
    blue_array[blue_array == 0.0] = np.nan

    red_array = interpolate_replace_nans(red_array, bkg_kernel)
    green_array = interpolate_replace_nans(green_array, bkg_kernel)
    blue_array = interpolate_replace_nans(blue_array, bkg_kernel)

    red_array[np.isnan(red_array)] = 0.
    green_array[np.isnan(green_array)] = 0.
    blue_array[np.isnan(blue_array)] = 0.
    
    # fit background model to each smoothed-out color band
    red_bkg = Background2D(red_array, bkg_cell_footprint, filter_size=bkg_filter, sigma_clip=bkg_sigma_clip, bkg_estimator=bkg_estimator)
    green_bkg = Background2D(green_array, bkg_cell_footprint, filter_size=bkg_filter, sigma_clip=bkg_sigma_clip, bkg_estimator=bkg_estimator)
    blue_bkg = Background2D(blue_array, bkg_cell_footprint, filter_size=bkg_filter, sigma_clip=bkg_sigma_clip, bkg_estimator=bkg_estimator)

    # subtract background from each masked color array
    subtracted = imarray - red_bkg.background * red_mask - \
                           green_bkg.background * green_mask - \
                           blue_bkg.background * blue_mask

    # after background subtraction, apply color band normalization. This has to be done separately
    # from step above for the background on each band to remain zero on average.
    subtracted = (subtracted * red_mask * red_norm) + \
                 (subtracted * green_mask) + \
                 (subtracted * blue_mask * blue_norm)

    return subtracted

In [10]:
class CentroidFinder():
    def __init__(self, x_estimate, y_estimate, size, sky, threshold):
        self.x_estimate = x_estimate
        self.y_estimate = y_estimate
        self.sky = sky
        self.threshold = threshold
        self.size = int(size)
        
        self.result = Table()
        
    def find(self, image):
#         try:
        x, y = centroids.centroid_sources(image, self.x_estimate, self.y_estimate, 
                                          box_size=self.size, centroid_func=centroids.centroid_com) 
        self.result['xcentroid'] = x
        self.result['ycentroid'] = y
#         except ValueError as e:
#             raise ValueError(str(e))

        return self.result                

In [11]:
# creates a table with star positions, given a path to an image file
def find_stars(path, sources_prev=None):

    with rawpy.imread(path) as raw:
        imarray = raw.raw_image_visible.astype(float)

        subtracted = subtract_background(imarray, red_norm=red_norm, blue_norm=blue_norm)
        
        global_median = np.median(subtracted)
        
        # find stars
        if sources_prev is None:
            daofind = DAOStarFinder(fwhm=dao_fwhm, sky=global_median, threshold=dao_threshold,
                                    sharplo=sharplo, sharphi=sharphi,
                                    roundlo=roundlo, roundhi=roundhi) 
            sources = daofind(subtracted)
        else:
            # offsets are added in reverse, to generate an estimate further away from the current position.
            x_estimate = sources_prev['xcentroid'] - sources_prev['xoffset_prev'] 
            y_estimate = sources_prev['ycentroid'] - sources_prev['yoffset_prev'] 

#             positions = [(x,y) for x,y in zip(x_estimate, y_estimate)]
#             daofind = DAOStarFinder(xycoords=np.array(positions), fwhm=dao_fwhm, 
#                                     sky=global_median,
#                                     threshold=dao_threshold,
#                                     sharplo=sharplo, sharphi=sharphi,
#                                     roundlo=roundlo, roundhi=roundhi) 
#             sources = daofind(subtracted)

            try:
                finder = CentroidFinder(x_estimate, y_estimate, dao_fwhm*2+1, global_median, dao_threshold)
                sources = finder.find(subtracted)
            except ValueError:
                # should prevent errors of type "xpos, ypos values contains point(s) outside of input data"
                x_estimate = sources_prev['xcentroid'] 
                y_estimate = sources_prev['ycentroid']  
                finder = CentroidFinder(x_estimate, y_estimate, dao_fwhm*2+1, global_median, dao_threshold)
                sources = finder.find(subtracted)
                
    return sources, subtracted, global_median

In [12]:
# keep only the NaN-free entries
def clean_nans(sources):
    has_nan = np.zeros(len(sources), dtype=bool)
    xoff = np.array(sources['xoffset'])
    has_nan |= np.isnan(xoff)
    return sources[~has_nan]

## Process sequence

In [13]:
# find stars in reference image
sources_ref, subtracted_ref, global_median = find_stars(reference_fname)

# positions storage
positions_tables = {}
positions_tables[reference_fname] = sources_ref

# array for stacking subtracted images
image_stack = np.zeros_like(subtracted_ref)

# add default offset columns to reference table
sources_ref.add_column(0., name='xoffset')
sources_ref.add_column(0., name='yoffset')
sources_ref.add_column(0., name='xoffset_prev')
sources_ref.add_column(0., name='yoffset_prev')

# in ref table, rows point to themselves
sources_ref.add_column(sources_ref['id']-1, name='ref_row')
sources_ref.add_column(sources_ref['id']-1, name='prev_row')

# force reference image to be the "previous" image
sources_prev = sources_ref

sources_ref

id,xcentroid,ycentroid,sharpness,roundness1,roundness2,npix,sky,peak,flux,mag,xoffset,yoffset,xoffset_prev,yoffset_prev,ref_row,prev_row
int64,float64,float64,float64,float64,float64,int64,float64,float64,float64,float64,float64,float64,float64,float64,int64,int64
1,3197.886240924124,5.291328188932814,0.47155291021284007,0.5161881021919031,-0.029754574660080704,49,-1.9810120464872512,182.55239719240097,98.54912012026618,-4.984131877763439,0.0,0.0,0.0,0.0,0,0
2,877.9791021899408,10.176976753860721,0.5762592869447409,0.13605874037724716,-0.5789813362190048,49,-1.9810120464872512,162.4166248054408,98.07365861648528,-4.978880942158345,0.0,0.0,0.0,0.0,1,1
3,2570.1453534865386,12.664067366154343,0.5332830478775942,0.5423652347170727,0.24834541367167368,49,-1.9810120464872512,255.2250901602162,98.61393191472577,-4.984845688120385,0.0,0.0,0.0,0.0,2,2
4,3375.5356151707624,29.465429785078435,0.386584788660393,0.643745344214697,-0.16110808253244824,49,-1.9810120464872512,920.6149343982052,103.79958530686149,-5.040489046129327,0.0,0.0,0.0,0.0,3,3
5,3049.9439896539798,34.252751510047986,0.42269386034000456,0.4809723776199077,-0.26377259410266535,49,-1.9810120464872512,3838.6501090487977,122.28027042954902,-5.218390976303164,0.0,0.0,0.0,0.0,4,4
6,1863.8887120560842,36.24260024583736,0.5363665373657461,0.6805741360661374,-0.20300859717232894,49,-1.9810120464872512,1127.9015127952075,103.5186916305645,-5.0375469358130855,0.0,0.0,0.0,0.0,5,5
7,3496.4910074425748,38.153155514248475,0.4267036724139981,0.1965633251756823,-0.718612305211364,49,-1.9810120464872512,225.52787367127465,98.62746463902101,-4.984994672755417,0.0,0.0,0.0,0.0,6,6
8,1132.444945837144,40.09549852759685,0.486796877969361,0.6632697174111248,0.15372460922864017,49,-1.9810120464872512,150.50572647238084,98.13155159723897,-4.979521664257023,0.0,0.0,0.0,0.0,7,7
9,2658.0468153716865,47.86099788892208,0.3525914251317225,0.4488858135452292,-0.23884272612654192,49,-1.9810120464872512,1039.2987837407754,104.62135603471975,-5.0490508619340355,0.0,0.0,0.0,0.0,8,8
10,2671.333609048228,47.46870657342031,0.7383058584883517,0.34933349613753195,-0.2642402662821323,49,-1.9810120464872512,438.1421021703466,99.11877626744426,-4.9903898288705575,0.0,0.0,0.0,0.0,9,9


In [14]:
sources_prev = sources_ref

# loop over list of images to be drizzled
for file_path in image_list:
    
    # find stars
#     t1 = time.time()
    sources, subtracted, global_median = find_stars(file_path, sources_prev=sources_prev)
#     t2 = time.time()
#     print("find stars: " , (t2-t1), " sec.")
    
    # compute offsets
#     t1 = time.time()
    sources_current = get_offsets(sources, sources_prev)
#     t2 = time.time()
#     print("get offsets: " , (t2-t1), " sec.")
    
    sources_current_no_nan = clean_nans(sources_current)
    
    positions_tables[file_path] = sources_current_no_nan
    
    # check that sources_current_no_nan has different offsets from sources_prev. If no
    # differences, break out of loop.
    if len(sources_current_no_nan) == len(sources_prev):
        xoff_c = np.array(sources_current_no_nan['xoffset'])
        yoff_c = np.array(sources_current_no_nan['yoffset'])
        xoff_p = np.array(sources_prev['xoffset'])
        yoff_p = np.array(sources_prev['yoffset'])
        if np.all(np.isclose(xoff_c, xoff_p, atol=tol)) and np.all(np.isclose(yoff_c, yoff_p, atol=tol)):
            print("Identical solution: search is stuck")
            break
    
    # for next iteration, current table becomes previous
    sources_prev = sources_current_no_nan
    
    # update image stack
    image_stack += subtracted
    
    print(file_path, len(sources_current_no_nan), global_median)

/Users/busko/Projects/astrophotography_data/Andromeda_2022/135mm16s6400ISO/DSC03799.ARW 917 -2.0601816953196135
/Users/busko/Projects/astrophotography_data/Andromeda_2022/135mm16s6400ISO/DSC03800.ARW 419 -2.107278056813948
/Users/busko/Projects/astrophotography_data/Andromeda_2022/135mm16s6400ISO/DSC03801.ARW 262 -2.050386786880665
/Users/busko/Projects/astrophotography_data/Andromeda_2022/135mm16s6400ISO/DSC03802.ARW 154 -1.9663120970596126


KeyboardInterrupt: 

In [None]:
sources_current_no_nan

In [None]:
# positions = [(x,y) for x,y in zip(sources_current_no_nan['xcentroid'], sources_current_no_nan['ycentroid'])]
# positions_ref = [(x,y) for x,y in zip(sources_ref['xcentroid'], sources_ref['ycentroid'])]

# apertures = CircularAperture(positions, r=5.)
# apertures_ref = CircularAperture(positions_ref, r=5.)

# plt.figure(figsize=[9, 6])
# plt.imshow(image_stack, vmin=-10, vmax=10000, cmap='binary')
# plt.colorbar()
# _ = apertures.plot(color='red')
# _ = apertures_ref.plot(color='yellow')

In [None]:
plt.figure(figsize=[9, 6])
plt.imshow(image_stack, vmin=-10, vmax=30000, cmap='binary')
plt.colorbar()

for file_path in list(positions_tables.keys()):
    positions_t = positions_tables[file_path]
    positions = [(x,y) for x,y in zip(positions_t['xcentroid'], positions_t['ycentroid'])]
    apertures = CircularAperture(positions, r=1.)
    _ = apertures.plot(color='red')

## Plot only complete sequence stars

Stars with a complete sequence of measured centroids are the ones that make to the last processed image/table in the sequence. Thus, starting from the end image and going backwards (in processing order), we ensure we pick only the complete sequence stars.

In [None]:
# start from last processed table. 
file_path_last = list(positions_tables.keys())[-1]
positions_last = positions_tables[file_path_last]
refrow_last = positions_last['ref_row']

file_path_ref = list(positions_tables.keys())[0]  # ref table
positions_ref = positions_tables[file_path_ref]

plt.figure(figsize=[9, 6])
plt.imshow(image_stack, vmin=-10, vmax=30000, cmap='binary')
plt.colorbar()

for i, row in enumerate(refrow_last):
    ref_row = positions_ref[row]
    
    x0 = ref_row['xcentroid']
    y0 = ref_row['ycentroid']
    x1 = positions_last['xcentroid'][i]
    y1 = positions_last['ycentroid'][i]
    
    plt.plot([x0,x1], [y0,y1], 'r', linewidth=1, markersize=1)

## Write tables

In [None]:
from astropy.io import fits

keys = list(positions_tables.keys())

for key in keys:
    dirname = os.path.dirname(key)
    fname = os.path.basename(key)
    imagename = fname.split('.')[0]
    tablename = os.path.join(dirname, imagename + '.offsets_table.fits')
    
    table = positions_tables[key]
    
    table.write(tablename, overwrite=True)

    print(tablename)