In [None]:
__author__ = 'Knut Olsen <kolsen@noao.edu>' # single string; emails in <>
__version__ = '20180104' # yyyymmdd; version datestamp of this notebook
__datasets__ = ['']  # enter used datasets by hand
__keywords__ = ['tutorial','query','image cutout']

# Welcome to the NOAO Data Lab
*Knut Olsen, NOAO Data Lab Team*

## Table of contents
* [Goals](#goals)
* [Summary](#summary)
* [Disclaimer & attribution](#attribution)
* [Imports & setup](#import)
* [Authentication](#auth)
* [Basic info about database tables](#basic)
* [A simple database query](#query)
* [An image cutout](#image)
* [Resources and references](#resources)

<a class="anchor" id="goals"></a>
# Goals
Learn how to:
* Import standard Data Lab modules
* Set up the Simple Image Access (SIA) Service to let you create image cutouts
* Discover the datasets available in the database
* Issue a simple query to the database
* Retrieve image cutouts
* Create a color image


<a class="anchor" id="summary"></a>
# Summary

If you've gotten this far, you're planning to use the Data Lab Jupyter notebook environment to access, explore, and analyze datasets available in the Data Lab.  This notebook aims to provide you with a very quick overview of how to use some common Data Lab services.  For more detailed explanations of data access, the full range of services, or for complete science examples, check out the other notebooks in this directory tree.

<a class="anchor" id="attribution"></a>
# Disclaimer & attribution
If you use this notebook for your published science, please acknowledge the following:

* Data Lab concept paper: Fitzpatrick et al., "The NOAO Data Laboratory: a conceptual overview", SPIE, 9149, 2014, http://dx.doi.org/10.1117/12.2057445

* Data Lab disclaimer: http://datalab.noao.edu/disclaimers.php


<a class="anchor" id="import"></a>
# Imports and setup
To use the Data Lab, you'll generally want to import common packages such as NumPy and matplotlib.  From the datalab package, you'll at minimum need the authClient module to get an authorization token (even if using Data Lab anonymously) and the queryClient module to issue a query against the catalog database.

For storing results in virtual storage and myDB, you'll need the storeClient module *and* log in as an authenticated user.  

The helpers module has many convience functions.  See the <a href="http://datalab.noao.edu/docs/manual/UsingTheNOAODataLab/ClientInterfaces/Helpers/Helpers.html">online documentation</a> for a summary.

Use of the image cutout service (SIA) requires the external PyVO package, and the URL of the SIA service that you will use.  Data Lab has a general SIA service containing all available images from NOAO Science Archive, as well as a number of survey-specific services.  See the SIA service HowTo for examples.

In [None]:
import numpy as np
import pylab as plt
import matplotlib
from astropy import utils, io
from getpass import getpass
from astropy.visualization import make_lupton_rgb

%matplotlib inline

# Datalab and related imports
# You'll need at least these for authenticating and for issuing database queries
from dl import authClient as ac, queryClient as qc
# You'll want storeClient if you plan to use virtual storage or myDB
#from dl import storeClient as sc
# Get helpers for various convenience function
from dl.helpers.utils import convert

# To get image cutouts, you'll need the VO-based SIA package, and define which SIA service
# to use
from pyvo.dal import sia
DEF_ACCESS_URL = "http://datalab.noao.edu/sia/des_dr1" # DES SIA service URL
svc = sia.SIAService(DEF_ACCESS_URL)


<a class="anchor" id="auth"></a>
# Authentication
For the purposes of this notebook, there is no need to log in with your username and password inside the notebook. As an anonymous user, you can issue queries to the database or retrieve image cutouts, but not store your results in virtual storage or myDB.  If you need these things, you would use the *authClient* module to log in.  You only need to do this once (unless you log out through authClient), as the authentication token is stored on the server and automatically detected. 

In [None]:
# As an anonymous user, you can issue queries to the database or retrieve image cutouts, but
# not store your results in virtual storage or myDB

# Get token for an authenticated user if you wish to use virtual storage or myDB
#token = ac.login(input("Enter user name: "),getpass("Enter password: "))
ac.whoAmI()

<a class="anchor" id="basic"></a>
# Basic info about database tables

### What datasets are available?
The queryClient has a <tt>schema</tt> method to give you information about available databases, tables, and columns.  If we call <tt>qc.schema()</tt> with an empty first argument, we'll get information on the available datasets and a one-line description for most of them.

In [None]:
print(qc.schema())

### Get list of tables
If we call qc.schema() with a specific dataset name, we'll see what tables are available for that dataset.  Here's what's available for SMASH DR1:

In [None]:
print(qc.schema('smash_dr1'))

### Get list of columns
We can also use qc.schema() to get column names and descriptions for a specific table.  Here's what's available for the SMASH DR1 object table.  (Note that not all datasets have column descriptions for every column).

In [None]:
print(qc.schema('smash_dr1.object'))

### Getting statistics for tables
You'll often want to get some basic information about a given table, e.g. the number of rows.  You might be tempted to use COUNT(\*) in a query for this--DON'T DO THIS.  You'll be scanning the entire table, which for some tables will mean scanning billions of rows.  Instead, the special database <tt>tbl_stat</tt> contains this information for each dataset.  Query this table instead:

In [None]:
#query="SELECT COUNT(ra) FROM phat_v2.phot_mod" # SLOW
query="SELECT * FROM tbl_stat WHERE schema='smash_dr1' and tbl_name='object'" # Retrieve useful stats, quickly

In [None]:
%%time
info = qc.query(sql=query) # by default the result is a CSV formatted string

In [None]:
print(info)

<a class="anchor" id="query"></a>
# A simple query
In the above section you already saw a basic query of the <tt>tbl_stats</tt> database.  Here we will retrieve the first 10 rows from the <tt>smash_dr1.object</tt> table:

In [None]:
%%time
query="SELECT * FROM smash_dr1.object LIMIT 10"
result = qc.query(sql=query) # by default the result is a CSV formatted string

You'll notice that the result is returned as a long string:

In [None]:
print(type(result))
print(result[0:99])

You'll generally want to convert this string into a table or an array.  The <tt>helpers</tt> module makes it easy.  Here we convert the string result into a Pandas dataframe:

In [None]:
df = convert(result,'pandas')
df.head()

You could store these results to VOSpace or to mydb if you logged in as an authenticated user.  See the HowTo notebooks for examples of this.

<a class="anchor" id="image"></a>
# An image cutout
Using the image cutout service is a two-step process.  First, you need to specify a position and the size of the image that you want to retrieve, and then call the SIA service for all of the images that overlap that point on the sky:

In [None]:
ra = 35.
dec = -52.
fov = 1./60
imgTable = svc.search((ra,dec), (fov/np.cos(dec*np.pi/180), fov), verbosity=2).votable.to_table()

The result is a VOTable that we convert to an astropy Table on the fly.  The table contains many columns of data describing the parameters of each image, including a URL for the cutout itself.  Note, however, that data quality images such as masks or weight maps can also appear in the list:

In [None]:
print(type(imgTable))
imgTable

The next step is to identify the image that you want from the list of available images.  Here we will limit the list to g-band image Stacks, and select the <tt>image</tt> product type (rather than weights or masks).

In [None]:
sel0 = imgTable['obs_bandpass'].astype(str)=='g'
sel = sel0 & ((imgTable['proctype'].astype(str)=='Stack') & (imgTable['prodtype'].astype(str)=='image')) # basic selection
Table = imgTable[sel] # select
Table

The final step is to extract the URL and download the image:

In [None]:
row = Table[0]
url = row['access_url'].decode() # get the download URL
print(url)


In [None]:
gimage = io.fits.getdata(utils.data.download_file(url,cache=True,show_progress=False,timeout=120))


We'll do a quick display here:

In [None]:
plt.imshow(gimage,cmap=matplotlib.cm.gray_r)

### Want to make a color image?
Let's write a quick function to do the SIA query and download, download two more bands, and combine them with the g image.

In [None]:
# a little function to download the deepest stacked images
# adapted from R. Nikutta
def download_deep_stack(ra,dec,fov=0.1,band='g'):
    imgTable = svc.search((ra,dec), (fov/np.cos(dec*np.pi/180), fov), verbosity=2).votable.to_table()
    print("The full image list contains", len(imgTable), "entries")
    
    sel0 = imgTable['obs_bandpass'].astype(str)==band
    sel = sel0 & ((imgTable['proctype'].astype(str)=='Stack') & (imgTable['prodtype'].astype(str)=='image')) # basic selection
    Table = imgTable[sel] # select
    if (len(Table)>0):
        row = Table[np.argmax(Table['exptime'].data.data.astype('float'))] # pick image with longest exposure time
        url = row['access_url'].decode() # get the download URL
        print ('downloading deepest stacked image...')
        image = io.fits.getdata(utils.data.download_file(url,cache=True,show_progress=False,timeout=120))
        print(url)
        
    else:
        print ('No image available.')
        image=None
        
    return image

In [None]:
rimage=download_deep_stack(ra,dec,fov,band='r')
iimage=download_deep_stack(ra,dec,fov,band='i')


In [None]:
color_image = make_lupton_rgb(iimage, rimage, gimage, stretch=30)
plt.imshow(color_image)