# Creating filepaths for project lightcurves

## Part 1: The XRP lookup

In [2]:
import os
os.chdir('../../')

import pandas as pd
import json

In [3]:
#data = pd.read_csv('/storage/astro2/phrdhx/tesslcs/fulllookup.csv')

In [None]:
with open('colnames.json', 'r', encoding='utf-8') as f:
    check = f.read()
    columns = json.loads(check)
    columns = columns['column_names']

In [None]:
data['TIC_ID'] = data['TIC_ID'].astype(str).str.zfill(16)

# Convert sector to a 4-digit string with leading zeros
data['Sector_leading_zeros'] = data['Sector'].astype(str).str.zfill(4)

# Create the new_path column
data['new_path'] = (
    's' + data['Sector_leading_zeros'] + '/' +
    data['TIC_ID'].str[0:4] + '/' +
    data['TIC_ID'].str[4:8] + '/' +
    data['TIC_ID'].str[8:12] + '/' +
    data['TIC_ID'].str[12:16] + '/' +
    'hlsp_gsfc-eleanor-lite_tess_ffi_s' + data['Sector_leading_zeros'] + '-' + data['TIC_ID'] + '_tess_v1.0_lc.fits'
)

common_string = '/storage/astro2/phrdhx/eleanor-lite/'

data['new_path'] = common_string + data['new_path']
#data.to_csv('xrplookup.csv')

---

## Part 2: Using `project_lookup` to produce file paths of for Sector-by-Sector copies

In [1]:
import pandas as pd
import os
os.chdir('../../')

In [2]:
#s1 = pd.read_csv('s0001.csv')
lookup = pd.read_csv('project_lookup.csv')
lookup = lookup.loc[:, ~lookup.columns.str.contains('^Unnamed')]

In [4]:
export_s22 = lookup[lookup.Sector == 21].new_path.to_list()
len(export_s22)

241100

### Reading up the `eleanor-lite` lookup that I generated

In [15]:
tic = pd.read_csv('tic.txt',sep='\t')
tic.head()

Unnamed: 0,tic_id,Tmag
0,269273552,-1.745
1,175934060,-1.679
2,459832522,-1.254
3,245873777,-1.2
4,272314138,-1.043


In [28]:
eleanor_lite_s21 = pd.read_csv('s21-eleanor-lite.txt',header=None,names=['tic_id'])
print(len(eleanor_lite_s21))
eleanor_lite_s21 = eleanor_lite_s21.drop_duplicates(subset=['tic_id'])
print(str(len(eleanor_lite_s21)) + " after dropping duplicates")

236276
236276 after dropping duplicates


The `eleanor-lite` lightcurves met for our projects:

In [20]:
project_eleanor_lite_s21 = eleanor_lite_s21[eleanor_lite_s21.tic_id.isin(tic.tic_id)]

This does not include the lightcurves that exist in the old lookup file. To do this, we can just query the lightcurves that were not present in our `project_eleanor_lite_s21`

In [4]:
with open('s0024.txt', 'w') as file:
        for item in export_s22:
            file.write(str(item) + '\n')

---

---

### Part 2.5: For TICs that did not exist in `old`, make the paths for those and export

In [1]:
import os
os.chdir('../../')
import pandas as pd

In [2]:
tic = pd.read_csv('tic.txt',sep='\t')
tic.head()

Unnamed: 0,tic_id,Tmag
0,269273552,-1.745
1,175934060,-1.679
2,459832522,-1.254
3,245873777,-1.2
4,272314138,-1.043


In [3]:
lookup = pd.read_csv('project_lookup.csv')
lookup = lookup.loc[:, ~lookup.columns.str.contains('^Unnamed')]

In [9]:
s21 = lookup[lookup.Sector == 1]
s21.head()

Unnamed: 0,Filename,RA,Dec,TIC_ID,Sector,Camera,CCD,Magnitude,Sector_leading_zeros,new_path
0,tesslcs_sector_1_104/2_min_cadence_targets/tes...,318.057138,-40.269362,159670453,1,1,1,5.4117,1,/storage/astro2/phrdhx/eleanor-lite/s0001/0000...
1,tesslcs_sector_1_104/2_min_cadence_targets/tes...,312.371213,-41.7115,389450629,1,1,1,7.8065,1,/storage/astro2/phrdhx/eleanor-lite/s0001/0000...
2,tesslcs_sector_1_104/2_min_cadence_targets/tes...,318.093446,-41.931732,159671133,1,1,1,9.5973,1,/storage/astro2/phrdhx/eleanor-lite/s0001/0000...
3,tesslcs_sector_1_104/2_min_cadence_targets/tes...,312.316831,-41.644755,389450601,1,1,1,10.1857,1,/storage/astro2/phrdhx/eleanor-lite/s0001/0000...
4,tesslcs_sector_1_104/2_min_cadence_targets/tes...,316.741183,-40.591502,115115510,1,1,1,8.838,1,/storage/astro2/phrdhx/eleanor-lite/s0001/0000...


In [10]:
len(s21)

286208

I need to know how many TICs were not in the old sector target lists

---

## Part 3: Getting Project TIC IDs from Catalog
- Using the TIC catalogue, I want to create file paths for the TICs that did not exist in the old `project_lookup.csv` file.

In [4]:
import pandas as pd

## Import TIC Catalog for stars up to 13th mag

In [11]:
tic = pd.read_csv('../../tic.txt',sep='\t')
tic.head()

Unnamed: 0,tic_id,Tmag
0,269273552,-1.745
1,175934060,-1.679
2,459832522,-1.254
3,245873777,-1.2
4,272314138,-1.043


In [9]:
# s1 = pd.read_csv('target_lists/s0001.csv',header=None,names=['TIC_ID','RA','DEC'])
# s2 = pd.read_csv('target_lists/s0002.csv',header=None,names=['TIC_ID','RA','DEC'])
# s3 = pd.read_csv('target_lists/s0003.csv',header=None,names=['TIC_ID','RA','DEC'])
# s4 = pd.read_csv('target_lists/s0004.csv',header=None,names=['TIC_ID','RA','DEC'])
# s5 = pd.read_csv('target_lists/s0005.csv',header=None,names=['TIC_ID','RA','DEC'])
# s6 = pd.read_csv('target_lists/s0006.csv',header=None,names=['TIC_ID','RA','DEC'])
# s7 = pd.read_csv('target_lists/s0007.csv',header=None,names=['TIC_ID','RA','DEC'])
s8 = pd.read_csv('../../target_lists/s0008.csv',header=None,names=['TIC_ID','RA','DEC'])

  exec(code_obj, self.user_global_ns, self.user_ns)


#### This gives all the TIC IDs from each sector of the `eleanor-lite` data that are in the TIC catalog. They do _not_ include the data from the old lookup that still exist.

In [12]:
# s1_project = tic[tic.tic_id.isin(s1.TIC_ID)] 
# s2_project = tic[tic.tic_id.isin(s2.TIC_ID)]
# s3_project = tic[tic.tic_id.isin(s3.TIC_ID)]
# s4_project = tic[tic.tic_id.isin(s4.TIC_ID)]
# s5_project = tic[tic.tic_id.isin(s5.TIC_ID)]
# s6_project = tic[tic.tic_id.isin(s6.TIC_ID)]
# s7_project = tic[tic.tic_id.isin(s7.TIC_ID)]
s8_project = tic[tic.tic_id.isin(s8.TIC_ID)]


In [34]:
old = pd.read_csv('project_lookup.csv')


In [52]:
old[old.Sector == 7]

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Unnamed: 0.1.1,Unnamed: 0.1.1.1,Filename,RA,Dec,TIC_ID,Sector,Camera,CCD,Magnitude,Sector_leading_zeros,new_path
1936547,1936547,1936547,8148314,0,tesslcs_sector_7_104/2_min_cadence_targets/tes...,101.978718,-27.145716,172929247,7,2,1,11.833300,7,/storage/astro2/phrdhx/eleanor-lite/s0007/0000...
1936548,1936548,1936548,8148315,1,tesslcs_sector_7_104/2_min_cadence_targets/tes...,98.434691,-27.903746,172192984,7,2,1,10.769400,7,/storage/astro2/phrdhx/eleanor-lite/s0007/0000...
1936549,1936549,1936549,8148317,3,tesslcs_sector_7_104/2_min_cadence_targets/tes...,126.193348,-14.070762,409735202,7,2,3,10.500300,7,/storage/astro2/phrdhx/eleanor-lite/s0007/0000...
1936550,1936550,1936550,8148320,6,tesslcs_sector_7_104/2_min_cadence_targets/tes...,100.072854,-22.770491,48609355,7,2,1,8.173400,7,/storage/astro2/phrdhx/eleanor-lite/s0007/0000...
1936551,1936551,1936551,8148321,7,tesslcs_sector_7_104/2_min_cadence_targets/tes...,129.805157,7.573233,458680694,7,1,3,6.045500,7,/storage/astro2/phrdhx/eleanor-lite/s0007/0000...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2796904,2796904,2796904,12142374,3994060,tesslcs_sector_7_104/tesslcs_tmag_9_10/tesslc_...,105.956329,-21.191585,80710138,7,2,1,9.826899,7,/storage/astro2/phrdhx/eleanor-lite/s0007/0000...
2796905,2796905,2796905,12142375,3994061,tesslcs_sector_7_104/tesslcs_tmag_9_10/tesslc_...,108.221134,-21.013277,99508260,7,2,1,9.360800,7,/storage/astro2/phrdhx/eleanor-lite/s0007/0000...
2796906,2796906,2796906,12142376,3994062,tesslcs_sector_7_104/tesslcs_tmag_9_10/tesslc_...,112.508601,-20.151139,5223004,7,2,4,9.842490,7,/storage/astro2/phrdhx/eleanor-lite/s0007/0000...
2796907,2796907,2796907,12142377,3994063,tesslcs_sector_7_104/tesslcs_tmag_9_10/tesslc_...,113.761884,-21.426656,348598616,7,2,2,9.973210,7,/storage/astro2/phrdhx/eleanor-lite/s0007/0000...


In [57]:
print(len(s1_project)/len(old[old.Sector == 1])*100)
print(len(s2_project)/len(old[old.Sector == 2])*100)
print(len(s3_project)/len(old[old.Sector == 3])*100)
print(len(s4_project)/len(old[old.Sector == 4])*100)
print(len(s5_project)/len(old[old.Sector == 5])*100)
print(len(s6_project)/len(old[old.Sector == 6])*100)
print(len(s7_project)/len(old[old.Sector == 7]))

1.0039970930232558
0.9652264246947083
0.9463078291814947
0.9621716981833657
0.9443494236378088
0.9893406926067172
1.0026035552476749


---

### Part 3.5: add paths to new project lightcurves
- Note: I did not need to do this for S1-7, because of the download scripts.
- S8 needs a bit of work because the download script is out, but the target list isn't.
Step 1: Add leading zeroes

In [14]:
# s1_project['tic_leading_zeroes'] = s1_project.tic_id.astype(str).str.zfill(16)
# s2_project['tic_leading_zeroes'] = s2_project.tic_id.astype(str).str.zfill(16)
# s3_project['tic_leading_zeroes'] = s3_project.tic_id.astype(str).str.zfill(16)
# s4_project['tic_leading_zeroes'] = s4_project.tic_id.astype(str).str.zfill(16)
# s5_project['tic_leading_zeroes'] = s5_project.tic_id.astype(str).str.zfill(16)
# s6_project['tic_leading_zeroes'] = s6_project.tic_id.astype(str).str.zfill(16)
# s7_project['tic_leading_zeroes'] = s7_project.tic_id.astype(str).str.zfill(16)
s8_project['tic_leading_zeroes'] = s8_project.tic_id.astype(str).str.zfill(16)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Step 2: Make the new paths now

In [9]:
s1_project['new_path'] = (
    's0001' + '/' +
    s1_project['tic_leading_zeroes'].str[0:4] + '/' +
    s1_project['tic_leading_zeroes'].str[4:8] + '/' +
    s1_project['tic_leading_zeroes'].str[8:12] + '/' +
    s1_project['tic_leading_zeroes'].str[12:16] + '/' + 'hlsp_gsfc-eleanor-lite_tess_ffi_s' + 's0001' + '-' + s1_project['tic_leading_zeroes'] + '_tess_v1.0_lc.fits'
) 

common_string = '/storage/astro2/phrdhx/eleanor-lite/'
s1_project['new_path'] = common_string + s1_project['new_path']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.


In [10]:
s2_project['new_path'] = (
    's0002' + '/' +
    s2_project['tic_leading_zeroes'].str[0:4] + '/' +
    s2_project['tic_leading_zeroes'].str[4:8] + '/' +
    s2_project['tic_leading_zeroes'].str[8:12] + '/' +
    s2_project['tic_leading_zeroes'].str[12:16] + '/' + 'hlsp_gsfc-eleanor-lite_tess_ffi_s' + 's0002' + '-' + s2_project['tic_leading_zeroes'] + '_tess_v1.0_lc.fits'
) 

common_string = '/storage/astro2/phrdhx/eleanor-lite/'
s2_project['new_path'] = common_string + s2_project['new_path']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.


In [11]:
s3_project['new_path'] = (
    's0003' + '/' +
    s3_project['tic_leading_zeroes'].str[0:4] + '/' +
    s3_project['tic_leading_zeroes'].str[4:8] + '/' +
    s3_project['tic_leading_zeroes'].str[8:12] + '/' +
    s3_project['tic_leading_zeroes'].str[12:16] + '/' + 'hlsp_gsfc-eleanor-lite_tess_ffi_s' + 's0003' + '-' + s3_project['tic_leading_zeroes'] + '_tess_v1.0_lc.fits'
) 

common_string = '/storage/astro2/phrdhx/eleanor-lite/'
s3_project['new_path'] = common_string + s3_project['new_path']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.


In [12]:
s4_project['new_path'] = (
    's0004' + '/' +
    s4_project['tic_leading_zeroes'].str[0:4] + '/' +
    s4_project['tic_leading_zeroes'].str[4:8] + '/' +
    s4_project['tic_leading_zeroes'].str[8:12] + '/' +
    s4_project['tic_leading_zeroes'].str[12:16] + '/' + 'hlsp_gsfc-eleanor-lite_tess_ffi_s' + 's0004' + '-' + s4_project['tic_leading_zeroes'] + '_tess_v1.0_lc.fits'
) 

common_string = '/storage/astro2/phrdhx/eleanor-lite/'
s4_project['new_path'] = common_string + s4_project['new_path']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.


In [13]:
s5_project['new_path'] = (
    's0005' + '/' +
    s5_project['tic_leading_zeroes'].str[0:4] + '/' +
    s5_project['tic_leading_zeroes'].str[4:8] + '/' +
    s5_project['tic_leading_zeroes'].str[8:12] + '/' +
    s5_project['tic_leading_zeroes'].str[12:16] + '/' + 'hlsp_gsfc-eleanor-lite_tess_ffi_s' + 's0005' + '-' + s5_project['tic_leading_zeroes'] + '_tess_v1.0_lc.fits'
) 

common_string = '/storage/astro2/phrdhx/eleanor-lite/'
s5_project['new_path'] = common_string + s5_project['new_path']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.


In [14]:
s6_project['new_path'] = (
    's0006' + '/' +
    s6_project['tic_leading_zeroes'].str[0:4] + '/' +
    s6_project['tic_leading_zeroes'].str[4:8] + '/' +
    s6_project['tic_leading_zeroes'].str[8:12] + '/' +
    s6_project['tic_leading_zeroes'].str[12:16] + '/' + 'hlsp_gsfc-eleanor-lite_tess_ffi_s' + 's0006' + '-' + s6_project['tic_leading_zeroes'] + '_tess_v1.0_lc.fits'
) 

common_string = '/storage/astro2/phrdhx/eleanor-lite/'
s6_project['new_path'] = common_string + s6_project['new_path']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.


In [15]:
s7_project['new_path'] = (
    's0007' + '/' +
    s7_project['tic_leading_zeroes'].str[0:4] + '/' +
    s7_project['tic_leading_zeroes'].str[4:8] + '/' +
    s7_project['tic_leading_zeroes'].str[8:12] + '/' +
    s7_project['tic_leading_zeroes'].str[12:16] + '/' + 'hlsp_gsfc-eleanor-lite_tess_ffi_s' + 's0007' + '-' + s7_project['tic_leading_zeroes'] + '_tess_v1.0_lc.fits'
) 

common_string = '/storage/astro2/phrdhx/eleanor-lite/'
s7_project['new_path'] = common_string + s7_project['new_path']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.


In [15]:
s8_project['new_path'] = (
    's0008' + '/' +
    s8_project['tic_leading_zeroes'].str[0:4] + '/' +
    s8_project['tic_leading_zeroes'].str[4:8] + '/' +
    s8_project['tic_leading_zeroes'].str[8:12] + '/' +
    s8_project['tic_leading_zeroes'].str[12:16] + '/' + 'hlsp_gsfc-eleanor-lite_tess_ffi_s' + 's0008' + '-' + s8_project['tic_leading_zeroes'] + '_tess_v1.0_lc.fits'
) 

common_string = '/storage/astro2/phrdhx/eleanor-lite/'
s8_project['new_path'] = common_string + s8_project['new_path']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.


Step 3: Export

In [19]:
export_s1 = s1_project.new_path.to_list()
output_file = 's0001.txt'
with open(output_file, 'w') as file:
    for i in export_s1:
        file.write(i + '\n')

In [20]:
export_s2 = s2_project.new_path.to_list()
output_file = 's0002.txt'
with open(output_file, 'w') as file:
    for i in export_s2:
        file.write(i + '\n')

In [21]:
export_s3 = s3_project.new_path.to_list()
output_file = 's0003.txt'
with open(output_file, 'w') as file:
    for i in export_s3:
        file.write(i + '\n')

In [22]:
export_s4 = s4_project.new_path.to_list()
output_file = 's0004.txt'
with open(output_file, 'w') as file:
    for i in export_s4:
        file.write(i + '\n')

In [23]:
export_s5 = s5_project.new_path.to_list()
output_file = 's0005.txt'
with open(output_file, 'w') as file:
    for i in export_s5:
        file.write(i + '\n')

In [24]:
export_s6 = s6_project.new_path.to_list()
output_file = 's0006.txt'
with open(output_file, 'w') as file:
    for i in export_s6:
        file.write(i + '\n')

In [25]:
export_s7 = s7_project.new_path.to_list()
output_file = 's0007.txt'
with open(output_file, 'w') as file:
    for i in export_s7:
        file.write(i + '\n')

## Part 4: What TICs from old download are not in `eleanor-lite`, and vice-versa?

In [164]:
old_s1 = old[old.Sector == 1]
old_s2 = old[old.Sector == 2]
old_s3 = old[old.Sector == 3]
old_s4 = old[old.Sector == 4]
old_s5 = old[old.Sector == 5]
old_s6 = old[old.Sector == 6]
old_s7 = old[old.Sector == 7]

In [182]:
not_in_s1_project = old_s1[~old_s1.TIC_ID.isin(s1_project.tic_id)] # tics from old not in new
not_in_old_s1 = s1_project[~s1_project.tic_id.isin(old_s1.TIC_ID)] # tics in new not in old

In [184]:
not_in_old_s1

Unnamed: 0,tic_id,Tmag
155,355789476,2.1034
399,237939392,2.8660
417,766018849,2.8941
493,167602316,3.0430
496,139267123,3.0507
...,...,...
13270406,150299381,12.9996
13270797,326453307,12.9996
13273844,237921766,12.9999
13274293,425951898,12.9999


In [None]:
not_in_s1_project

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Unnamed: 0.1.1,Unnamed: 0.1.1.1,Filename,RA,Dec,TIC_ID,Sector,Camera,CCD,Magnitude,Sector_leading_zeros,new_path
286262,286262,286262,1192467,60,tesslcs_sector_2_104/2_min_cadence_targets/tes...,6.209362,-42.511040,7204855,2,2,3,8.595000,2,/storage/astro2/phrdhx/eleanor-lite/s0002/0000...
286264,286264,286264,1192469,62,tesslcs_sector_2_104/2_min_cadence_targets/tes...,6.257594,-42.153439,7204925,2,2,3,9.414700,2,/storage/astro2/phrdhx/eleanor-lite/s0002/0000...
286491,286491,286491,1192713,306,tesslcs_sector_2_104/2_min_cadence_targets/tes...,64.822631,-59.966022,38633112,2,4,1,10.352700,2,/storage/astro2/phrdhx/eleanor-lite/s0002/0000...
286734,286734,286734,1192968,561,tesslcs_sector_2_104/2_min_cadence_targets/tes...,356.380300,-16.172200,2760232,2,1,3,11.126000,2,/storage/astro2/phrdhx/eleanor-lite/s0002/0000...
286736,286736,286736,1192970,563,tesslcs_sector_2_104/2_min_cadence_targets/tes...,336.642825,-16.741955,12723528,2,1,4,4.906000,2,/storage/astro2/phrdhx/eleanor-lite/s0002/0000...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
521874,521874,521874,2158446,966039,tesslcs_sector_2_104/tesslcs_tmag_9_10/tesslc_...,67.501414,-62.828004,38764156,2,4,1,9.771401,2,/storage/astro2/phrdhx/eleanor-lite/s0002/0000...
521876,521876,521876,2158448,966041,tesslcs_sector_2_104/tesslcs_tmag_9_10/tesslc_...,65.981660,-62.216553,38696769,2,4,1,9.632100,2,/storage/astro2/phrdhx/eleanor-lite/s0002/0000...
521879,521879,521879,2158451,966044,tesslcs_sector_2_104/tesslcs_tmag_9_10/tesslc_...,69.873176,-62.430198,38941059,2,4,1,9.902000,2,/storage/astro2/phrdhx/eleanor-lite/s0002/0000...
521930,521930,521930,2158502,966095,tesslcs_sector_2_104/tesslcs_tmag_9_10/tesslc_...,64.473820,-63.233232,38607156,2,4,1,9.657300,2,/storage/astro2/phrdhx/eleanor-lite/s0002/0000...


In [None]:
not_in_s2_project[not_in_s2_project.TIC_ID.isin(not_in_old_s2.tic_id)]

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Unnamed: 0.1.1,Unnamed: 0.1.1.1,Filename,RA,Dec,TIC_ID,Sector,Camera,CCD,Magnitude,Sector_leading_zeros,new_path


In [179]:
len(old_s2)

235840

In [178]:
abs(len(s2_project) - len(old_s2))

8201

## Part 5: Download Scripts

In [1]:
import pandas as pd

In [2]:
with open('/storage/astro2/phrdhx/eleanor-lite-project-v2/download_scripts/hlsp_gsfc-eleanor-lite_tess_ffi_s0008_tess_v1.0_lc.sh', 'r') as file:
    script_contents = file.readlines()

In [3]:
df = pd.DataFrame(script_contents, columns=["Script Lines"])
df['Extracted ID'] = df['Script Lines'].str.extract(r'(\d{16})')
df['ID'] = df['Extracted ID'].str.lstrip('0')

In [4]:
df.head()

Unnamed: 0,Script Lines,Extracted ID,ID
0,#!/bin/sh\n,,
1,curl -f --create-dirs --output 's0008/0000/000...,766561.0,766561.0
2,curl -f --create-dirs --output 's0008/0000/000...,766563.0,766563.0
3,curl -f --create-dirs --output 's0008/0000/000...,766565.0,766565.0
4,curl -f --create-dirs --output 's0008/0000/000...,766568.0,766568.0


In [5]:
df.drop(0,inplace=True)

In [6]:
df['ID'] = df['ID'].astype(int)

In [16]:
to_export = df[df['ID'].isin(s8_project['tic_id'])]

In [17]:
to_export

Unnamed: 0,Script Lines,Extracted ID,ID
262145,curl -f --create-dirs --output 's0008/0000/000...,0000000007845793,7845793
262148,curl -f --create-dirs --output 's0008/0000/000...,0000000007845804,7845804
262150,curl -f --create-dirs --output 's0008/0000/000...,0000000007845809,7845809
262154,curl -f --create-dirs --output 's0008/0000/000...,0000000007845818,7845818
262155,curl -f --create-dirs --output 's0008/0000/000...,0000000007845819,7845819
...,...,...,...
6859729,curl -f --create-dirs --output 's0008/0000/010...,0000010005000363,10005000363
6859730,curl -f --create-dirs --output 's0008/0000/010...,0000010005000369,10005000369
6859731,curl -f --create-dirs --output 's0008/0000/010...,0000010005000380,10005000380
6859732,curl -f --create-dirs --output 's0008/0000/010...,0000010005000418,10005000418


In [18]:
len(s8_project)

698245

In [19]:
output_file = '../../s8.sh'

# Extract the 'Curl Script' column and write it to the .sh file
with open(output_file, 'w') as sh_file:
    for curl_script in df['Script Lines']:
        sh_file.write(curl_script + '\n')

---

### Part 6: From the TICs I created (while waiting for the official release), how many fall in TIC Catalog

In [1]:
import os
os.chdir('../../')
import pandas as pd

In [2]:
tic = pd.read_csv('tic.txt',sep='\t')
tic['tic_leading_zeroes'] = tic.tic_id.astype(str).str.zfill(16)

In [4]:
print(len(tic))
tic = tic.drop_duplicates(subset=['tic_id'])
len(tic)

13275772


13275772

In [5]:
s21 = pd.read_csv('s21-eleanor-lite.txt',header=None,names=['tic_id'])
print(len(s21))
s21 = s21.drop_duplicates(subset=['tic_id'])
print(len(s21))
s21.info()

1683175
1683175
<class 'pandas.core.frame.DataFrame'>
Int64Index: 1683175 entries, 0 to 1683174
Data columns (total 1 columns):
 #   Column  Non-Null Count    Dtype
---  ------  --------------    -----
 0   tic_id  1683175 non-null  int64
dtypes: int64(1)
memory usage: 25.7 MB


In [18]:
s21_project = s21[s21.tic_id.isin(tic.tic_id)]
s21_project['tic_leading_zeroes'] = s21_project.tic_id.astype(str).str.zfill(16)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [23]:
s21_project['new_path'] = (
    's0021' + '/' +
    s21_project['tic_leading_zeroes'].str[0:4] + '/' +
    s21_project['tic_leading_zeroes'].str[4:8] + '/' +
    s21_project['tic_leading_zeroes'].str[8:12] + '/' +
    s21_project['tic_leading_zeroes'].str[12:16] + '/' + 'hlsp_gsfc-eleanor-lite_tess_ffi_' + 's0021' + '-' + s21_project['tic_leading_zeroes'] + '_tess_v1.0_lc.fits'
) 

common_string = '/storage/astro2/phrdhx/eleanor-lite/'
s21_project['new_path'] = common_string + s21_project['new_path']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.


In [25]:
export_s21 = s21_project.new_path.to_list()
output_file = 's0021.txt'
with open(output_file, 'w') as file:
    for i in export_s21:
        file.write(i + '\n')

----
## Part 6.5a: making Part 6 automated

In [1]:
import os
os.chdir('../../')
import pandas as pd
from tqdm import tqdm

In [2]:
tic = pd.read_csv('tic.txt',sep='\t')
tic['tic_leading_zeroes'] = tic.tic_id.astype(str).str.zfill(16)

In [3]:
tic

Unnamed: 0,tic_id,Tmag,tic_leading_zeroes
0,269273552,-1.745,0000000269273552
1,175934060,-1.679,0000000175934060
2,459832522,-1.254,0000000459832522
3,245873777,-1.200,0000000245873777
4,272314138,-1.043,0000000272314138
...,...,...,...
13275767,10000693211,13.000,0000010000693211
13275768,10001083513,13.000,0000010001083513
13275769,10001920480,13.000,0000010001920480
13275770,10002253686,13.000,0000010002253686


In [4]:
s_values = list(range(18,20))
s_values

[18, 19]

In [5]:
project_data = {}
for s in tqdm(s_values):
    # Read the CSV file for the current 's' value
    file_name = f's{s:02d}-eleanor-lite.txt'
    df = pd.read_csv(file_name, header=None, names=['tic_id'])
    df_len = len(df)
    df = df.drop_duplicates(subset=['tic_id'])
    print(f'{df_len - len(df)} duplicates found')
    
    # Filter the DataFrame based on the 'tic' DataFrame
    project = df[df.tic_id.isin(tic.tic_id)]
    
    project = project.merge(tic[['tic_id', 'Tmag']], on='tic_id', how='left')
    
    # Add leading zeroes and construct the 'new_path' column
    project['tic_leading_zeroes'] = project.tic_id.astype(str).str.zfill(16)
    project['new_path'] = (
        f's{s:04d}/' +
        project['tic_leading_zeroes'].str[0:4] + '/' +
        project['tic_leading_zeroes'].str[4:8] + '/' +
        project['tic_leading_zeroes'].str[8:12] + '/' +
        project['tic_leading_zeroes'].str[12:16] + '/' +
        f'hlsp_gsfc-eleanor-lite_tess_ffi_s{s:04d}-' + project['tic_leading_zeroes'] + '_tess_v1.0_lc.fits'
    )
    
    # Add the common_string to the 'new_path' column
    common_string = '/storage/astro2/phrdhx/eleanor-lite/'
    project['new_path'] = common_string + project['new_path']
    export_paths = project.new_path.to_list()
    output_file = f's{s:04d}.txt'
    with open(output_file, 'w') as file:
        for i in export_paths:
            file.write(i + '\n')
    
    # Store the project DataFrame in the dictionary to access later
    project_data[f's{s:04d}_project'] = project
    
    


  0%|          | 0/2 [00:00<?, ?it/s]

0 duplicates found


 50%|█████     | 1/2 [00:11<00:11, 11.38s/it]

0 duplicates found


100%|██████████| 2/2 [00:22<00:00, 11.19s/it]


In [7]:
project_data['s0019_project']

Unnamed: 0,tic_id,Tmag,tic_leading_zeroes,new_path
0,122339913,11.9102,0000000122339913,/storage/astro2/phrdhx/eleanor-lite/s0019/0000...
1,122339771,10.9417,0000000122339771,/storage/astro2/phrdhx/eleanor-lite/s0019/0000...
2,122339612,12.6007,0000000122339612,/storage/astro2/phrdhx/eleanor-lite/s0019/0000...
3,122339873,11.0181,0000000122339873,/storage/astro2/phrdhx/eleanor-lite/s0019/0000...
4,122339765,12.9144,0000000122339765,/storage/astro2/phrdhx/eleanor-lite/s0019/0000...
...,...,...,...,...
486041,326260358,12.4369,0000000326260358,/storage/astro2/phrdhx/eleanor-lite/s0019/0000...
486042,326260424,12.9241,0000000326260424,/storage/astro2/phrdhx/eleanor-lite/s0019/0000...
486043,326260657,12.1702,0000000326260657,/storage/astro2/phrdhx/eleanor-lite/s0019/0000...
486044,326260537,11.2682,0000000326260537,/storage/astro2/phrdhx/eleanor-lite/s0019/0000...
