In [None]:
%matplotlib inline
import pandas as pd
from astroquery.mast import Observations, Catalogs
import matplotlib.pyplot as plt
import numpy as np
from astropy.table import Table, join, vstack
from astropy.io import ascii
import requests

## Notebook Goals:
- 1) gets/opens sector tic id lists
- 2) downloads tic catalog based on tic ids (temp cuts, get ra/dec)
- 3) uses ra/dec for mast query to get data urls
- 4) creates shell script to download raw lc files 

- 5) merges/matches pipeline stats into tic catalog

- 6) Download Vilanova Kepler EB catalog lcs

### Warning: Before executing this notebook, download necessary data files (directions below) and change all placeholder 'Filepath' & 'filename' for your system.

### Useful Function

In [None]:
# unique item finder

def uniquefinder(mylist):
    seen = {}
    dupes = []
    uniq_tics = []
    for x in mylist:
        if x not in seen:
            seen[x] = 1
            uniq_tics.append(x)
        else:
            if seen[x] == 1:
                dupes.append(x)
            seen[x] += 1
    uniques = len(uniq_tics)
    print('there are {} unique tics. Use the first output dictionary'.format(uniques), \
          'to see number of occurances for duplicates')
    return seen, uniq_tics

# ONE

Go to https://tess.mit.edu 
- under top menu item "Observations"-->"Target Lists" 
- download .csv file for desired sector(s)

In [None]:
# open target list
sector = 14 #enter relevant sector
sector_file = pd.read_csv('Filepath/all_targets_S0{}_v1.csv'.format(sector),skiprows=5) #overlaps kepler
sector_file

In [None]:
#pull tic ids
sector_ticids = sector_file['TICID'].to_numpy() #full sample
len(sector_ticids)

# TWO

In [None]:
## get tic catalog to do teff cut prior to lc dl
catalog_data_sector = Catalogs.query_criteria(catalog='Tic',ID=sector_ticids)
#rename TIC ID column to match target name from Mast file for matching ease later
catalog_data_sector.rename_column('ID', 'target_name')
catalog_data_sector

In [None]:
# do temp cut
tempcut_sector = catalog_data_sector[catalog_data_sector['Teff']<6500]
# pull tic ids after temp cut
tics_tempcut = list(tempcut_sector['target_name']) #strings #cool stars only

print('Sector-{} full sample size:{}, size after temp cut:{}'.format(sector,len(catalog_data_sector),len(tempcut_sector)))

# THREE

In [None]:
#determine RA/DEC bounds for mast url query

ra_sector = tempcut_sector['ra']
dec_sector = tempcut_sector['dec']
print('RA sector-{} (min/max):',min(ra_sector),max(ra_sector))
print('DEC sector-{} (min/max):',min(dec_sector),max(dec_sector))

#plot for confirmation
plt.plot(ra_sector,dec_sector);plt.xlabel('RA');plt.ylabel('DEC');plt.title('Sector-{}'.format(sector));

Go to https://mast.stsci.edu/portal/Mashup/Clients/Mast/Portal.html
- under top menu click 'Advanced Search'
- query ra/dec bounds & check 'TESS' under 'Filters'

![image](images/mast_step1.png)
![image](images/mast_step2.png)

In [None]:
#open mast file to get urls
mast_file = pd.read_csv('Filepath/filename.csv',skiprows=4) #example filename: MAST_2020-08-06T2317
print('Total number of mast files:',len(mast_file))


In [None]:
## match mast file with tempcut tic ids

mastid = mast_file['target_name'].to_numpy()
# convert types for merge
tempcut_sector_df = tempcut_sector.to_pandas() #make table a df for joining
# merge ---use this table in FOUR
mast_ticcat_merge = pd.merge(tempcut_sector_df, mast_file, how='left', on='target_name') 
print('SEC-{} before merge:'.format(sector),len(tempcut_sector_df),' after merge:',len(mast_ticcat_merge),'targets')
# test for how many unique tics merged
dupes, uniques = uniquefinder(mast_ticcat_merge['target_name'].to_numpy())

print('These two better match:', len(tempcut_sector_df),len(uniques))#'otherwise some targets have no lc datafiles')


# FOUR

In [None]:
## create shell text strings from mast url's

curlscript = []
for i in range(len(mast_ticcat_merge)):
    firststr = 'curl -C - -L -o '
    middlestr = str(mast_ticcat_merge['obs_id'][i]) +'_lc.fits '
    webaddy = "https://mast.stsci.edu/api/v0.1/Download/file/?uri=" 
    laststr = webaddy +str(mast_ticcat_merge['dataURL'][i])
    script = firststr + middlestr + laststr 
    #print(script)
    curlscript.append(script)
curlscript=np.array(curlscript)
curlscript.shape

In [None]:
## SECTOR-15 create shell script (remove outter ''' ''' if need to rerun)




with open ('Filepath/dl_sec{}.sh'.format(sector), 'w') as rsh:
    for count,script in enumerate(curlscript):
        rsh.write('''\
#! /bin/bash
{}
'''.format(script))





# FIVE
###### *Requires all cells run up to section THREE & results file from 'run_LS.py'

In [None]:
# 1/2
#open data
stats_sec = pd.read_csv('Filepath/ls_stats.csv'.format(sector)) #output from run_LS.py

#prepare catalogs for merge
tempcut_sector.rename_column('target_name','TIC') #change colname to match stats df
stats_sec.insert(1, "Sector", np.repeat(sector,len(stats_sec)), True) #add sector so know where came from in final table
stats_sec_table = Table.from_pandas(stats_sec) #change df to table to match tic catalog
stats_sec_table['TIC'] =  stats_sec_table['TIC'].astype(str) #change datatype to match tic catalog

# #do merge         
result_sec = join(stats_sec_table, tempcut_sector, keys='TIC')
# check result
print(len(result_sec))
result_sec[0:5]

In [None]:
# 2/2 -------already ran for sec15
#save to file generalized for any ONE sector above
ascii.write(result_sec,'Filepath/ls_ticcat_table.csv', format='csv',overwrite=True)

# SIX

Go to http://keplerebs.villanova.edu for Kepler Eclipsing Binary Catalog
- choose 'File: comma-separated values
- Click 'Download Catalog'
- open file in a text editor and remove the # in front of column names line

![image](images/ebs.png)

In [None]:
#open EB files
KeplerEB_file = pd.read_csv('Filepath/keplerebs.villanova.edu',header=7) #note if you changed the filename, change here as well
# do cuts in period 
lower_period = .5
upper_period = 15
KeplerEB_periodcut = KeplerEB_file[(KeplerEB_file['period']>lower_period) &(KeplerEB_file['period']<upper_period)] # only periods within TESS easily measureable range

print('column names:',list(KeplerEB_periodcut.columns))
print('all EBs:',len(KeplerEB_file),'; EBs within {}-{} period range:'.format(lower_period,upper_period),len(KeplerEB_periodcut))

In [None]:
# Downloads data

# downloads every 3rd to get diverse sample (original table sorted by periods low-high)
n = 3 #only dl every 3rd lc
dl_count = 0
for count,i in enumerate(list(KeplerEB_periodcut['KIC'])):
    if count % n == 0: #remainder ==0 after division by interval
        kic = i
        url = 'http://keplerebs.villanova.edu/data/?k={}.00&cadence=lc&data=data'.format(kic)
        r = requests.get(url, allow_redirects=True)
        filename = 'Filepath/{}_lc.csv'.format(kic)
        open(filename, 'wb').write(r.content)
        dl_count +=1
    else:
        pass
print('Total files downloaded (.5< per <15):',dl_count)