# Importing *Kepler* data into LSD

In [2]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from astropy.table import Table
import pandas as pd
import sqlite3

In [3]:
from lsd import DB
from lsd.bounds import beam, rectangle
from astropy.table import Table, Column

## Set environment variables

In [4]:
import os

os.environ['NWORKERS'] = '8'
os.environ['LSD_DB'] = '/astro/users/jlustigy/Courses/big_data/lsdtemp/'
os.environ['LSD_CACHEDIR'] = '/astro/users/jlustigy/Courses/big_data/lsdtemp/'
os.environ['LSD_TEMPDIR'] = '/astro/users/jlustigy/Courses/big_data/lsdtemp/'

print os.environ['NWORKERS']
print os.environ['LSD_DB']
print os.environ['LSD_CACHEDIR']
print os.environ['LSD_TEMPDIR']

8
/astro/users/jlustigy/Courses/big_data/lsdtemp/
/astro/users/jlustigy/Courses/big_data/lsdtemp/
/astro/users/jlustigy/Courses/big_data/lsdtemp/


## Get and inspect the kepler data  

Here I am interested in both [Kepler Lightcurves](https://archive.stsci.edu/pub/kepler/lightcurves/) and [Kepler Target Pixel Files](https://archive.stsci.edu/pub/kepler/target_pixel_files/). I will write Table Schemas for both types of files. 

In [5]:
# Read Kepler Lightcurve file
t_llc = Table.read('kplr000757076-2009131105131_llc.fits')



In [6]:
# Read Kepler Target Pixel File
t_lpd = Table.read('kplr001429092-2009259160929_lpd-targ.fits.gz')



In [7]:
# Check column names and datatypes
for column in t_llc.dtype.names:
    print "(%s, %s)" % (column.lower(), t_llc[column].dtype)

(time, >f8)
(timecorr, >f4)
(cadenceno, >i4)
(sap_flux, >f4)
(sap_flux_err, >f4)
(sap_bkg, >f4)
(sap_bkg_err, >f4)
(pdcsap_flux, >f4)
(pdcsap_flux_err, >f4)
(sap_quality, >i4)
(psf_centr1, >f8)
(psf_centr1_err, >f4)
(psf_centr2, >f8)
(psf_centr2_err, >f4)
(mom_centr1, >f8)
(mom_centr1_err, >f4)
(mom_centr2, >f8)
(mom_centr2_err, >f4)
(pos_corr1, >f4)
(pos_corr2, >f4)


In [8]:
# Check column names and datatypes
for column in t_lpd.dtype.names:
    print "(%s, %s)" % (column.lower(), t_lpd[column].dtype)

(time, >f8)
(timecorr, >f4)
(cadenceno, >i4)
(raw_cnts, >i4)
(flux, >f4)
(flux_err, >f4)
(flux_bkg, >f4)
(flux_bkg_err, >f4)
(cosmic_rays, >f4)
(quality, >i4)
(pos_corr1, >f4)
(pos_corr2, >f4)
(rb_level, >f4)


In [9]:
# Inspect table 
t_llc[:1]

TIME,TIMECORR,CADENCENO,SAP_FLUX,SAP_FLUX_ERR,SAP_BKG,SAP_BKG_ERR,PDCSAP_FLUX,PDCSAP_FLUX_ERR,SAP_QUALITY,PSF_CENTR1,PSF_CENTR1_ERR,PSF_CENTR2,PSF_CENTR2_ERR,MOM_CENTR1,MOM_CENTR1_ERR,MOM_CENTR2,MOM_CENTR2_ERR,POS_CORR1,POS_CORR2
BJD - 2454833,d,Unnamed: 2_level_1,e-/s,e-/s,e-/s,e-/s,e-/s,e-/s,Unnamed: 9_level_1,pix,pix,pix,pix,pix,pix,pix,pix,pixels,pixels
float64,float32,int32,float32,float32,float32,float32,float32,float32,int32,float64,float32,float64,float32,float64,float32,float64,float32,float32,float32
120.53932244,0.00117002,568,304538.0,16.7787,5315.84,3.10501,315591.0,17.3685,0,,,,,23.3980294029,8.30202e-05,101.427953607,9.69935e-05,0.00776424,0.0114545


In [10]:
# Inspect table 
t_lpd[:1]

TIME,TIMECORR,CADENCENO,"RAW_CNTS [4,6]","FLUX [4,6]","FLUX_ERR [4,6]","FLUX_BKG [4,6]","FLUX_BKG_ERR [4,6]","COSMIC_RAYS [4,6]",QUALITY,POS_CORR1,POS_CORR2,"RB_LEVEL [4,5]"
BJD - 2454833,d,Unnamed: 2_level_1,ct,e-/s,e-/s,e-/s,e-/s,e-/s,Unnamed: 9_level_1,pix,pix,sigma
float64,float32,int32,int32,float32,float32,float32,float32,float32,int32,float32,float32,float32
169.520777866,0.00330245,2965,421483 .. 425883,-10.6894 .. 319.626,1.46168 .. 1.55922,176.151 .. 175.997,0.119934 .. 0.115563,nan .. nan,256,0.0757977,0.22162,0.807526 .. 0.175194


In [11]:
# Read a different Target Pixel File
t = Table.read('kplr001429153-2009259160929_lpd-targ.fits.gz')

In [12]:
# Inspect table 
t[:1]

TIME,TIMECORR,CADENCENO,"RAW_CNTS [5,6]","FLUX [5,6]","FLUX_ERR [5,6]","FLUX_BKG [5,6]","FLUX_BKG_ERR [5,6]","COSMIC_RAYS [5,6]",QUALITY,POS_CORR1,POS_CORR2,"RB_LEVEL [5,5]"
BJD - 2454833,d,Unnamed: 2_level_1,ct,e-/s,e-/s,e-/s,e-/s,e-/s,Unnamed: 9_level_1,pix,pix,sigma
float64,float32,int32,int32,float32,float32,float32,float32,float32,int32,float32,float32,float32
169.520776917,0.0033015,2965,-- .. --,nan .. nan,nan .. nan,nan .. nan,nan .. nan,nan .. nan,256,0.0757166,0.221369,0.807526 .. 0.324147


### Note: The RAW_CNTS, FLUX, FLUX_ERR, etc columns have different size of arrays for different fits files :( Therefore the below Schema for target pixel files is appropriate only for this particular fits file.

# Import *Kepler* lightcurves into LSD

## Inspect lightcurve Table Schema `kplr_llc.yaml`

In [13]:
! cat kplr_llc.yaml

# Schema for Kepler lightcurves
filters: {complevel: 5, complib: blosc}
schema:
  common:
    primary_key: obj_id
    spatial_keys: [obj_id, cadenceno]
    columns:
    - [obj_id, u8]
    - [time, f8]
    - [timecorr, f4]
    - [cadenceno, i4]
  photometry: 
    columns:
    - [sap_flux, f4]
    - [sap_flux_err, f4]
    - [sap_bkg, f4]
    - [sap_bkg_err, f4]
    - [pdcsap_flux, f4]
    - [pdcsap_flux_err, f4]
    - [sap_quality, i4]
    - [psf_centr1, f8]
    - [psf_centr1_err, f4]
    - [psf_centr2, f8]
    - [psf_centr2_err, f4]
  telemetry:
    columns:
    - [mom_centr1, f8]
    - [mom_centr1_err, f4]
    - [mom_centr2, f8]
    - [mom_centr2_err, f4]
    - [pos_corr1, f4]
    - [pos_corr2, f4]
    

## Create empty LSD table

In [14]:
! lsd-admin create table --drop-existing --schema kplr_llc.yaml kplr_llc

Table 'kplr_llc' dropped.

-------- committing 20160313020454.693762 [kplr_llc] ---------
[kplr_llc] Updating tablet catalog: [256 el.]::::::::::::::::::::>  0.25 sec
[kplr_llc] Updating neighbors: Already up to date.
[kplr_llc] Updating tablet catalog: [256 el.]::::::::::::::::::::>  0.26 sec
[kplr_llc] Updating stats: [0 el.]>  0.00 sec
[kplr_llc] Marking tablets read-only...
----------- success 20160313020454.693762 [kplr_llc] ---------

Table 'kplr_llc' created.


## Import fits file into new LSD table

In [15]:
! lsd-import fits kplr_llc kplr000757076-2009131105131_llc.fits

Importing from 1 pieces:
  ===> Imported kplr000757076-2009131105131_llc.fits                                   [1/1, 100.00%] +    476/476           476 (0/0 min.)
done

-------- committing 20160313020458.436753 [kplr_llc] ---------
[kplr_llc] Updating tablet catalog: [256 el.]::::::::::::::::::::>  0.27 sec
[kplr_llc] Updating neighbors: [49 el.]::::::::::::::::::::[203 el.]++++++++++++++++++++>  2.60 sec
           Total 4237 cached objects in 203 cells
[kplr_llc] Updating tablet catalog: [256 el.]::::::::::::::::::::>  0.33 sec
[kplr_llc] Updating stats: [203 el.]::::::::::::::::::::>  0.06 sec [203 el.]::::::::::::::::::::>  0.25 sec
[kplr_llc] Marking tablets read-only...
----------- success 20160313020458.436753 [kplr_llc] ---------



# Import *Kepler* Target Pixel File into LSD

## Inspect Target Pixel File Table Schema `kplr_lpd.yaml`

In [16]:
! cat kplr_lpd.yaml

# Schema for Kepler target pixel files
filters: {complevel: 5, complib: blosc}
schema:
  common:
    primary_key: obj_id
    spatial_keys: [obj_id, cadenceno]
    columns:
    - [obj_id, u8]
    - [time, f8]
    - [timecorr, f4]
    - [cadenceno, i4]
    - [cosmic_rays, '(4,6)f4']
    - [quality, i4]
    - [pos_corr1, f4]
    - [pos_corr2, f4]
    - [rb_level, '(4,5)f4']
  photometry: 
    columns:
    - [raw_cnts, '(4,6)i4']
    - [flux, '(4,6)f4']
    - [flux_err, '(4,6)f4']
    - [flux_bkg, '(4,6)f4']
    - [flux_bkg_err, '(4,6)f4']
    

## Create empty LSD table

In [17]:
! lsd-admin create table --drop-existing --schema kplr_lpd.yaml kplr_lpd

Table 'kplr_lpd' dropped.

-------- committing 20160313020509.990818 [kplr_lpd] ---------
[kplr_lpd] Updating tablet catalog: [256 el.]::::::::::::::::::::>  0.24 sec
[kplr_lpd] Updating neighbors: Already up to date.
[kplr_lpd] Updating tablet catalog: [256 el.]::::::::::::::::::::>  0.24 sec
[kplr_lpd] Updating stats: [0 el.]>  0.00 sec
[kplr_lpd] Marking tablets read-only...
----------- success 20160313020509.990818 [kplr_lpd] ---------

Table 'kplr_lpd' created.


## Import fits file into new LSD table

In [18]:
! lsd-import fits kplr_lpd kplr001429092-2009259160929_lpd-targ.fits.gz

Importing from 1 pieces:
  ===> Imported kplr001429092-2009259160929_lpd-targ.fits.gz                           [1/1, 100.00%] +   4354/4354         4354 (0/0 min.)
done

-------- committing 20160313020513.713489 [kplr_lpd] ---------
[kplr_lpd] Updating tablet catalog: [256 el.]::::::::::::::::::::>  0.30 sec
[kplr_lpd] Updating neighbors: [49 el.]::::::::::::::::::::[203 el.]++++++++++++++++++++>  2.40 sec
           Total 39888 cached objects in 203 cells
[kplr_lpd] Updating tablet catalog: [256 el.]::::::::::::::::::::>  0.40 sec
[kplr_lpd] Updating stats: [203 el.]::::::::::::::::::::>  0.06 sec [203 el.]::::::::::::::::::::>  0.32 sec
[kplr_lpd] Marking tablets read-only...
----------- success 20160313020513.713489 [kplr_lpd] ---------

