Written by Nick Easton for the Zooniverse LSST Project. 
<br> July, 2017

Create a script to query the Ptf database for sources of given constraints. 
Then compile lightcurves and phase folded lightcurves for the returned sources.

In [2]:
import numpy as np
from astropy.table import Table as tbl
import urllib.request
import urllib.parse
import subprocess
import matplotlib.pyplot as plt
from cesium import featurize
%matplotlib inline
import sqlite3

## Query for the given objects

In [None]:
url = "http://irsa.ipac.caltech.edu/cgi-bin/Gator/nph-query?"

In [None]:
values = {'catalog':'ptf_objects', 'spatial':'None', 'outfmt':'1', 'selcols':'ra,dec,oid', 'constraints':'(bestchisq>100)and(ngoodobs>500)'}

In [None]:
subprocess.call('curl -F catalog=ptf_objects -F spatial=None -F outfmt=1 -F selcols=ra,dec,oid -F constriants="(bestchisq>100)and(ngoodobs>500)" "http://irsa.ipac.caltech.edu/cgi-bin/Gator/nph-query?" -o objects.tbl', shell = True)

Im not sure why subprocess.call doesnt seem to work for this specific case. However, the urllib work below does the job.

In [None]:
%%timeit
data = urllib.parse.urlencode(values)
data = data.encode('utf-8')
req = urllib.request.Request(url, data)
resp = urllib.request.urlopen(req)
respdata = resp.read()
saveFile = open('objects.tbl', 'wb')
saveFile.write(respdata)
saveFile.close()

### Read that data file in

In [3]:
objects = tbl.read('/home/nke2/NUREU17/LSST/VariableStarClassification/scripts/ptf_query/objects.tbl', format = 'ipac')

In [4]:
conn = sqlite3.connect('/home/nke2/NUREU17/LSST/VariableStarClassification/features.db')
cur = conn.cursor()

In [5]:
def saveFeat (lc, tName, cur, conn): #pass in lightcurve table and cursor
    feats_to_use = [
                'amplitude',
                'flux_percentile_ratio_mid20', 
                'flux_percentile_ratio_mid35', 
                'flux_percentile_ratio_mid50', 
                'flux_percentile_ratio_mid65', 
                'flux_percentile_ratio_mid80', 
                'max_slope', 
                'maximum', 
                'median',
                'median_absolute_deviation', 
                'minimum',
                'percent_amplitude',
                'percent_beyond_1_std', 
                'percent_close_to_median', 
                'percent_difference_flux_percentile',
                'period_fast', 
                'qso_log_chi2_qsonu',
                'qso_log_chi2nuNULL_chi2nu',
                'skew',
                'std',
                'stetson_j',
                'stetson_k',
                'weighted_average',
                'fold2P_slope_10percentile',
                'fold2P_slope_90percentile',
                'freq1_amplitude1',
                'freq1_amplitude2',
                'freq1_amplitude3',
                'freq1_amplitude4',
                'freq1_freq',
                'freq1_lambda',
                'freq1_rel_phase2',
                'freq1_rel_phase3',
                'freq1_rel_phase4',
                'freq1_signif',
                'freq2_amplitude1',
                'freq2_amplitude2',
                'freq2_amplitude3',
                'freq2_amplitude4',
                'freq2_freq',
                'freq2_rel_phase2',
                'freq2_rel_phase3',
                'freq2_rel_phase4',
                'freq3_amplitude1',
                'freq3_amplitude2',
                'freq3_amplitude3',
                'freq3_amplitude4',
                'freq3_freq',
                'freq3_rel_phase2',
                'freq3_rel_phase3',
                'freq3_rel_phase4',
                'freq_amplitude_ratio_21',
                'freq_amplitude_ratio_31',
                'freq_frequency_ratio_21',
                'freq_frequency_ratio_31',
                'freq_model_max_delta_mags',
                'freq_model_min_delta_mags',
                'freq_model_phi1_phi2',
                'freq_n_alias',
                'freq_signif_ratio_21',
                'freq_signif_ratio_31',
                'freq_varrat',
                'freq_y_offset',
                'linear_trend',
                'medperc90_2p_p',
                'p2p_scatter_2praw',
                'p2p_scatter_over_mad',
                'p2p_scatter_pfold_over_mad',
                'p2p_ssqr_diff_over_var',
                'scatter_res_raw'
               ]
    string = "insert into " + tName + """ values (
            ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 
            ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"""
    cur.execute("""select oid from {:}""".format(tName))
    check = cur.fetchall()

    for oid in np.unique(lc['oid']):
        if (oid not in check):
            mask = np.logical_and(lc['oid'] == oid, lc['mag_autocorr'] > 0)

            fset = featurize.featurize_time_series(lc[mask]['obsmjd'], lc[mask]['mag_autocorr'], lc[mask]['magerr_auto'],
                                            meta_features = {'oid': str(oid)}, features_to_use = feats_to_use)
 
            cur.execute(string, fset.get_values()[0])
        else:
            print('Database already contains a ',oid)
    conn.commit()

In [9]:
cur.execute("""delete from feats2""")
cur.fetchall()

[]

If reading in for the first times then use this cell. This will loop over each oid within the objects queried above and excute queries for their source lightcurves. Additionally, reads that returned data file into a dict to reference later.

In [None]:
#curves = {}

for i in np.random.choice(18849, 1000, replace = False):
    cmd = 'curl -F catalog=ptf_lightcurves -F spatial=None -F constraints=' + '"(oid={:})"'.format(objects['oid'][i]) + ' -F outfmt=1 -F selcols=oid,obsmjd,mag_autocorr,magerr_auto,fid,ra,dec "http://irsa.ipac.caltech.edu/cgi-bin/Gator/nph-query?" -o curves_oid{:_>17}.tbl'.format(objects['oid'][i])
    subprocess.call(cmd, shell = True)
    curves = tbl.read('curves_oid{:_>17}.tbl'.format(objects['oid'][i]), format = 'ipac')
    saveFeat(curves, 'feats2', cur, conn)
    
    #%run '/home/nke2/NUREU17/LSST/VariableStarClassification/saveFeat.py' (curves, 'feats2', cur, conn)    
    #curves[i] = tbl.read('curves_oid{:_>17}.tbl'.format(objects['oid'][i]), format = 'ipac')
    

  out_dict['signif'] = lprob2sigma(np.log(prob))
  f = 0.5 * np.log(2. / np.pi) - 0.5 * sigma**2 - np.log(sigma) - lprob
























Same as above. However, if the sources have already been queried just reads in the files to save some time.

In [9]:
#curves = {}
for i in range(0,3):
    curves = tbl.read('curves_oid{:_>17}.tbl'.format(objects['oid'][i]), format = 'ipac')
    saveFeat(curves, 'feats2', cur, conn)

Database already contains a  51892000005737
Database already contains a  51892060016842
Database already contains a  51892060003794


In [7]:
cur.execute("""select oid from feats2""")
cur.fetchall()


[(32752090001847,),
 (1000132100007797,),
 (1000262000006855,),
 (1000032070007194,),
 (1010032020002143,),
 (34782080005344,),
 (1000092000005037,),
 (1000132040002603,),
 (35782020007679,),
 (1000012060016462,),
 (1000202110003387,),
 (34782080005633,),
 (1000372000006008,),
 (1000372000002359,),
 (1000202110000785,),
 (1010032020002995,),
 (35782020008950,),
 (1000202110005260,),
 (1000102020002012,),
 (1000202110004534,),
 (33762090006717,),
 (1010032020001985,),
 (34782080010069,),
 (1000202110001550,),
 (1000132090002078,),
 (1010032070006672,),
 (1000202110008938,),
 (1000132040001070,),
 (227202100010200,),
 (32752090001468,),
 (1000132100003481,),
 (1000202110007828,),
 (1000372000004661,),
 (32752090008077,),
 (1000202115000795,),
 (32752090010856,),
 (35782020007075,),
 (32752090004973,),
 (33772070003836,),
 (35782020002180,),
 (1000202110005039,),
 (1000202110000283,),
 (1000132100004686,),
 (1000132090003034,),
 (227202105019755,),
 (35782020006032,),
 (1000202110008605,)

In both of the above cases, the loop has been shortened to so it the script doesnt exceed memory or storage limits (and so I could interact with it much quicker).

Plots the raw lightcurves together in a table. Some data has been masked, there seem to be a series of observations whose magnitudes where substantiantally inaccurate.

In [None]:
fig = plt.figure(figsize = (12, 20))
for i in range(0,3):
    ax = fig.add_subplot(3,1,i+1)
    masked = np.where(curves[i]['mag_autocorr'] > 0 )
    ax.errorbar(curves[i]['obsmjd'][masked], curves[i]['mag_autocorr'][masked], yerr = curves[i]['magerr_auto'][masked], fmt = 'bo')
    ax.set_xlabel('Time(days)')
    ax.set_ylabel('Observed magnitude')
    
plt.tight_layout()

### Phase folded curves
Each lightcurve has been phase folded with the first frequency Lomb-Scargyle detects. It is no surprise that the 3 first sources are not periodic.

In [None]:
feats_to_use = ["freq1_freq"]
want = np.where(curves[0]['mag_autocorr'] > 0)
fset = feat.featurize_time_series(times = curves[0]['obsmjd'][want], values = curves[0]['mag_autocorr'][want], errors = curves[0]['magerr_auto'][want], features_to_use = feats_to_use)

per1 = fset['freq1_freq'][0][0]
print(per1)

plt.errorbar((curves[0]['obsmjd'][want]%per1)/per1, curves[0]['mag_autocorr'][want], yerr = curves[0]['magerr_auto'][want], fmt = 'bo')
plt.xlabel('Phase')
plt.ylabel('Observed magnitude')

In [None]:
feats_to_use = ["freq1_freq"]
want = np.where(curves[1]['mag_autocorr'] > 0)
fset = feat.featurize_time_series(times = curves[1]['obsmjd'][want], values = curves[1]['mag_autocorr'][want], errors = curves[1]['magerr_auto'][want], features_to_use = feats_to_use)

per1 = fset['freq1_freq'][0][0]
print(per1)

plt.errorbar((curves[1]['obsmjd'][want]%per1)/per1, curves[1]['mag_autocorr'][want], yerr = curves[1]['magerr_auto'][want], fmt = 'bo')
plt.xlabel('Phase')
plt.ylabel('Observed magnitude')

In [None]:
feats_to_use = ["freq1_freq"]
want = np.where(curves[2]['mag_autocorr'] > 0)
fset = feat.featurize_time_series(times = curves[2]['obsmjd'][want], values = curves[2]['mag_autocorr'][want], errors = curves[2]['magerr_auto'][want], features_to_use = feats_to_use)

per1 = fset['freq1_freq'][0][0]
print(per1)

plt.errorbar((curves[2]['obsmjd'][want]%per1)/per1, curves[2]['mag_autocorr'][want], yerr = curves[2]['magerr_auto'][want], fmt = 'bo')
plt.xlabel('Phase')
plt.ylabel('Observed magnitude')

It would be nice if cesium returned a periodogram somewhere. This would be helpful to determine how well we can trust the reported period. Additionally, as discussed, it may prove useful to show Zooniverse users said periodogram.