### Download image cutouts

Images downloaded from FIRST cutout server.

Catalogs used: FRICAT, FRIICAT, CoNFIG, Proctor (2011), Garon (2017)

Each catalog downloaded from Vizier, and saved as a tsv file (Data separated by |). Source coordinates are loaded from the tsv files.

Webscraping done using chromedriver. This needs to be downloaded to the path, and the version needs to match the version of chorme being used.

Currently notebook is set to just view source gif instead of download to demonstrate how the code works. To download change labelled parameter in FIRST_download function.

In [11]:
import numpy as np
import pandas as pd
from astropy.io import fits
import sys
import os
from selenium import webdriver
import time
from selenium.common.exceptions import NoSuchElementException

In [27]:
# Functions for downloading images

def update_progress(process, progress):
    # Simple progress bar for long loops (e.g. data loading/augmentation)
    block = int(round(10*progress))
    text = "\rProgress: [{}] {:.3f}% {}".format( "#"*block + "-"*(10-block), progress*100, process)
    sys.stdout.write(text)
    sys.stdout.flush()
    
def FIRST_download(ra, dec, download_path=None):
    '''
    Download FITS image files from FIRST cutout server.
    download_path = optional image download path, if not specified downloads to default download directory
    '''
    
    if not download_path:
        chrome_options = webdriver.ChromeOptions()
        prefs = {'download.default_directory' : download_path}
        chrome_options.add_experimental_option('prefs', prefs)
        driver = webdriver.Chrome(options=chrome_options)
    else:
        driver = webdriver.Chrome()
    driver.maximize_window()

    # Go to FIRST cutout server website
    driver.get("https://third.ucllnl.org/cgi-bin/firstcutout")

    # Set image size for download (4.5' = 150x150)
    size_arcmin = 4.5
    imagesize = driver.find_element_by_name('ImageSize')
    driver.execute_script("arguments[0].value=arguments[1]",imagesize,'{}'.format(size_arcmin))

    # Download all sources in ra, dec arrays
    start_time = time.time()
    for i in range(len(ra)):
        try:
            radec = driver.find_element_by_name('RA')
            driver.execute_script("arguments[0].value=arguments[1]",radec,"{}\t{}".format(ra[i],dec[i]))
            # 0 = view gif, 2 = download FITS file
            fits_file = driver.find_elements_by_name('ImageType')[0] # 0 = view gif, 2 = download FITS file
            fits_file.click()
            submit = driver.find_element_by_name('.submit')
            submit.click()
            #time.sleep(1)
        except NoSuchElementException:
            # Exception raised if the input ra and dec are out of range of the database
            # Resets the webpage and continues loop through remaining values
            print ('Could not find element {}'.format(i))
            driver.get("https://third.ucllnl.org/cgi-bin/firstcutout")
            time.sleep(3)
            imagesize = driver.find_element_by_name('ImageSize')
            driver.execute_script("arguments[0].value=arguments[1]",imagesize,'{}'.format(size_arcmin))
            continue
        update_progress('downloading images', (i+1)/len(ra))

    print('\n{}/{} images downloaded in {:.3f} seconds'.format(len(ra)-len(missing_indices), len(ra), time.time()-start_time))
    print('Missing images:', missing_indices)

In [29]:
# Read data (i.e. source coords) from '|' seperated file

# Choose catalog to download
# 0: ConFIG, 1: FRICAT, 2: FRIICAT, 3: garon_combined, 4: Proctor
catalog = 4

if catalog == 0:
    filepath = './Catalogs/CoNFIG.tsv'
    data = pd.read_csv(filepath, sep='|', header=None, skiprows=3)
    data = np.array(data)
    ra, dec = data[:,0], data[:,1]
    ra, dec = data[:,3], data[:,4]
    print('Example (ra, dec) coords to pass into server:', ra[0], dec[0])
    
elif catalog in (1,2):
    if catalog == 1: filepath = './Catalogs/FR1CAT.tsv'
    if catalog == 2: filepath = './Catalogs/FR2CAT.tsv'
    data = pd.read_csv(filepath, sep='|', header=None, skiprows=3)
    data = np.array(data)
    coords = data[:,0]

    # Convert coordinates into format accepted by FIRST cutout server
    ra, dec = np.empty_like(coords), np.empty_like(coords)
    for i, coord in enumerate(coords):
        ra[i] = coord[1:3]+' '+coord[3:5]+' '+coord[5:10]
        dec[i] = coord[10:13]+' '+coord[13:15]+' '+coord[15:]
    print('Example (ra, dec) coords to pass into server:', ra[0], dec[0])

elif catalog == 3:
    filepath = './Catalogs/garon_combined.tsv'
    data = pd.read_csv(filepath, sep='|', header=None, skiprows=3)
    data = np.array(data)
    coords = data[:,2]

    # Convert coordinates into format accepted by FIRST cutout server
    ra, dec = np.empty_like(coords), np.empty_like(coords)
    for i, coord in enumerate(coords):
        ra[i] = coord[1:3]+' '+coord[3:5]+' '+coord[5:9]
        dec[i] = coord[9:12]+' '+coord[12:14]+' '+coord[14:]
    print('Example (ra, dec) coords to pass into server:', ra[0], dec[0])
        
elif catalog == 4:
    filepath = './Catalogs/proctor_2011.tsv'
    data = pd.read_csv(filepath, sep='|', header=None, skiprows=3)
    data = np.array(data)
    ra, dec = data[:,1], data[:,2]
    
    # Seperate into tables 1-14 for each morph type
    table_ind = np.argwhere(data[:,8] != np.roll(data[:,8], 1)).flatten()
    tables = np.split(data, table_ind[1:])
    ra, dec = np.split(ra, table_ind[1:]), np.split(dec, table_ind[1:])
    print('Example (ra, dec) coords to pass into server:', ra[0][0], dec[0][0])

Example (ra, dec) coords to pass into server: 00 03 31.218 +00 28 04.25


In [30]:
# Download source images from cutout server
# Only download WATs and NATs from Proctor catalog

if catalog == 4:
    FIRST_download(ra[0], dec[0], download_path=None)
else:
    FIRST_download(ra, dec, download_path=None)

Progress: [##########] 100.000% downloading images
412/412 images downloaded in 571.230 seconds
Missing images: []
