# Creating filepaths for project lightcurves

## Part 1: The XRP lookup

In [None]:
import os
os.chdir('../../')

import pandas as pd
import json

In [None]:
#data = pd.read_csv('/storage/astro2/phrdhx/tesslcs/fulllookup.csv')

In [None]:
with open('colnames.json', 'r', encoding='utf-8') as f:
    check = f.read()
    columns = json.loads(check)
    columns = columns['column_names']

In [None]:
data['TIC_ID'] = data['TIC_ID'].astype(str).str.zfill(16)

# Convert sector to a 4-digit string with leading zeros
data['Sector_leading_zeros'] = data['Sector'].astype(str).str.zfill(4)

# Create the new_path column
data['new_path'] = (
    's' + data['Sector_leading_zeros'] + '/' +
    data['TIC_ID'].str[0:4] + '/' +
    data['TIC_ID'].str[4:8] + '/' +
    data['TIC_ID'].str[8:12] + '/' +
    data['TIC_ID'].str[12:16] + '/' +
    'hlsp_gsfc-eleanor-lite_tess_ffi_s' + data['Sector_leading_zeros'] + '-' + data['TIC_ID'] + '_tess_v1.0_lc.fits'
)

common_string = '/storage/astro2/phrdhx/eleanor-lite/'

data['new_path'] = common_string + data['new_path']
#data.to_csv('xrplookup.csv')

---

## Part 2: Using `project_lookup` to produce file paths of for Sector-by-Sector copies

In [None]:
import pandas as pd
import os
os.chdir('../../')

In [None]:
#s1 = pd.read_csv('s0001.csv')
lookup = pd.read_csv('project_lookup.csv')
lookup = lookup.loc[:, ~lookup.columns.str.contains('^Unnamed')]

In [None]:
export_s22 = lookup[lookup.Sector == 21].new_path.to_list()
len(export_s22)

### Reading up the `eleanor-lite` lookup that I generated

In [None]:
tic = pd.read_csv('tic.txt',sep='\t')
tic.head()

In [None]:
eleanor_lite_s21 = pd.read_csv('s21-eleanor-lite.txt',header=None,names=['tic_id'])
print(len(eleanor_lite_s21))
eleanor_lite_s21 = eleanor_lite_s21.drop_duplicates(subset=['tic_id'])
print(str(len(eleanor_lite_s21)) + " after dropping duplicates")

The `eleanor-lite` lightcurves met for our projects:

In [None]:
project_eleanor_lite_s21 = eleanor_lite_s21[eleanor_lite_s21.tic_id.isin(tic.tic_id)]

This does not include the lightcurves that exist in the old lookup file. To do this, we can just query the lightcurves that were not present in our `project_eleanor_lite_s21`

In [None]:
with open('s0024.txt', 'w') as file:
        for item in export_s22:
            file.write(str(item) + '\n')

---

---

### Part 2.5: For TICs that did not exist in `old`, make the paths for those and export

In [None]:
import os
os.chdir('../../')
import pandas as pd

In [None]:
tic = pd.read_csv('tic.txt',sep='\t')
tic.head()

In [None]:
lookup = pd.read_csv('project_lookup.csv')
lookup = lookup.loc[:, ~lookup.columns.str.contains('^Unnamed')]

In [None]:
s21 = lookup[lookup.Sector == 1]
s21.head()

In [None]:
len(s21)

I need to know how many TICs were not in the old sector target lists

---

## Part 3: Getting Project TIC IDs from Catalog
- Using the TIC catalogue, I want to create file paths for the TICs that did not exist in the old `project_lookup.csv` file.

In [1]:
import pandas as pd

## Import TIC Catalog for stars up to 13th mag

In [2]:
tic = pd.read_csv('../../tic.txt',sep='\t')
tic.head()

Unnamed: 0,tic_id,Tmag
0,269273552,-1.745
1,175934060,-1.679
2,459832522,-1.254
3,245873777,-1.2
4,272314138,-1.043


In [3]:
s1 = pd.read_csv('../../target_lists/s0001.csv',header=None,names=['TIC_ID','RA','DEC'])
s2 = pd.read_csv('../../target_lists/s0002.csv',header=None,names=['TIC_ID','RA','DEC'])
s3 = pd.read_csv('../../target_lists/s0003.csv',header=None,names=['TIC_ID','RA','DEC'])
s4 = pd.read_csv('../../target_lists/s0004.csv',header=None,names=['TIC_ID','RA','DEC'])
s5 = pd.read_csv('../../target_lists/s0005.csv',header=None,names=['TIC_ID','RA','DEC'])
s6 = pd.read_csv('../../target_lists/s0006.csv',header=None,names=['TIC_ID','RA','DEC'])
s7 = pd.read_csv('../../target_lists/s0007.csv',header=None,names=['TIC_ID','RA','DEC'])
s8 = pd.read_csv('../../target_lists/s0008.csv',header=None,names=['TIC_ID','RA','DEC'])
s12 = pd.read_csv('../../target_lists/s0012.csv',header=None,names=['TIC_ID','RA','DEC'])

  exec(code_obj, self.user_global_ns, self.user_ns)


#### This gives all the TIC IDs from each sector of the `eleanor-lite` data that are in the TIC catalog. They do _not_ include the data from the old lookup that still exist.

In [4]:
s1_project = tic[tic.tic_id.isin(s1.TIC_ID)] 
s2_project = tic[tic.tic_id.isin(s2.TIC_ID)]
s3_project = tic[tic.tic_id.isin(s3.TIC_ID)]
s4_project = tic[tic.tic_id.isin(s4.TIC_ID)]
s5_project = tic[tic.tic_id.isin(s5.TIC_ID)]
s6_project = tic[tic.tic_id.isin(s6.TIC_ID)]
s7_project = tic[tic.tic_id.isin(s7.TIC_ID)]
s8_project = tic[tic.tic_id.isin(s8.TIC_ID)]
s12_project = tic[tic.tic_id.isin(s12.TIC_ID)]


---

In [5]:
len(s8_project)

698245

### Part 3.5: add paths to new project lightcurves
- Note: I did not need to do this for S1-7, because of the download scripts.
- S8 needs a bit of work because the download script is out, but the target list isn't.
Step 1: Add leading zeroes

In [6]:
s1_project['tic_leading_zeroes'] = s1_project.tic_id.astype(str).str.zfill(16)
s2_project['tic_leading_zeroes'] = s2_project.tic_id.astype(str).str.zfill(16)
s3_project['tic_leading_zeroes'] = s3_project.tic_id.astype(str).str.zfill(16)
s4_project['tic_leading_zeroes'] = s4_project.tic_id.astype(str).str.zfill(16)
s5_project['tic_leading_zeroes'] = s5_project.tic_id.astype(str).str.zfill(16)
s6_project['tic_leading_zeroes'] = s6_project.tic_id.astype(str).str.zfill(16)
s7_project['tic_leading_zeroes'] = s7_project.tic_id.astype(str).str.zfill(16)
s8_project['tic_leading_zeroes'] = s8_project.tic_id.astype(str).str.zfill(16)
s12_project['tic_leading_zeroes'] = s12_project.tic_id.astype(str).str.zfill(16)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using

Step 2: Make the new paths now

In [None]:
s1_project['new_path'] = (
    's0001' + '/' +
    s1_project['tic_leading_zeroes'].str[0:4] + '/' +
    s1_project['tic_leading_zeroes'].str[4:8] + '/' +
    s1_project['tic_leading_zeroes'].str[8:12] + '/' +
    s1_project['tic_leading_zeroes'].str[12:16] + '/' + 'hlsp_gsfc-eleanor-lite_tess_ffi_s' + 's0001' + '-' + s1_project['tic_leading_zeroes'] + '_tess_v1.0_lc.fits'
) 

common_string = '/storage/astro2/phrdhx/eleanor-lite/'
s1_project['new_path'] = common_string + s1_project['new_path']

In [None]:
s2_project['new_path'] = (
    's0002' + '/' +
    s2_project['tic_leading_zeroes'].str[0:4] + '/' +
    s2_project['tic_leading_zeroes'].str[4:8] + '/' +
    s2_project['tic_leading_zeroes'].str[8:12] + '/' +
    s2_project['tic_leading_zeroes'].str[12:16] + '/' + 'hlsp_gsfc-eleanor-lite_tess_ffi_s' + 's0002' + '-' + s2_project['tic_leading_zeroes'] + '_tess_v1.0_lc.fits'
) 

common_string = '/storage/astro2/phrdhx/eleanor-lite/'
s2_project['new_path'] = common_string + s2_project['new_path']

In [None]:
s3_project['new_path'] = (
    's0003' + '/' +
    s3_project['tic_leading_zeroes'].str[0:4] + '/' +
    s3_project['tic_leading_zeroes'].str[4:8] + '/' +
    s3_project['tic_leading_zeroes'].str[8:12] + '/' +
    s3_project['tic_leading_zeroes'].str[12:16] + '/' + 'hlsp_gsfc-eleanor-lite_tess_ffi_s' + 's0003' + '-' + s3_project['tic_leading_zeroes'] + '_tess_v1.0_lc.fits'
) 

common_string = '/storage/astro2/phrdhx/eleanor-lite/'
s3_project['new_path'] = common_string + s3_project['new_path']

In [None]:
s4_project['new_path'] = (
    's0004' + '/' +
    s4_project['tic_leading_zeroes'].str[0:4] + '/' +
    s4_project['tic_leading_zeroes'].str[4:8] + '/' +
    s4_project['tic_leading_zeroes'].str[8:12] + '/' +
    s4_project['tic_leading_zeroes'].str[12:16] + '/' + 'hlsp_gsfc-eleanor-lite_tess_ffi_s' + 's0004' + '-' + s4_project['tic_leading_zeroes'] + '_tess_v1.0_lc.fits'
) 

common_string = '/storage/astro2/phrdhx/eleanor-lite/'
s4_project['new_path'] = common_string + s4_project['new_path']

In [None]:
s5_project['new_path'] = (
    's0005' + '/' +
    s5_project['tic_leading_zeroes'].str[0:4] + '/' +
    s5_project['tic_leading_zeroes'].str[4:8] + '/' +
    s5_project['tic_leading_zeroes'].str[8:12] + '/' +
    s5_project['tic_leading_zeroes'].str[12:16] + '/' + 'hlsp_gsfc-eleanor-lite_tess_ffi_s' + 's0005' + '-' + s5_project['tic_leading_zeroes'] + '_tess_v1.0_lc.fits'
) 

common_string = '/storage/astro2/phrdhx/eleanor-lite/'
s5_project['new_path'] = common_string + s5_project['new_path']

In [None]:
s6_project['new_path'] = (
    's0006' + '/' +
    s6_project['tic_leading_zeroes'].str[0:4] + '/' +
    s6_project['tic_leading_zeroes'].str[4:8] + '/' +
    s6_project['tic_leading_zeroes'].str[8:12] + '/' +
    s6_project['tic_leading_zeroes'].str[12:16] + '/' + 'hlsp_gsfc-eleanor-lite_tess_ffi_s' + 's0006' + '-' + s6_project['tic_leading_zeroes'] + '_tess_v1.0_lc.fits'
) 

common_string = '/storage/astro2/phrdhx/eleanor-lite/'
s6_project['new_path'] = common_string + s6_project['new_path']

In [None]:
s7_project['new_path'] = (
    's0007' + '/' +
    s7_project['tic_leading_zeroes'].str[0:4] + '/' +
    s7_project['tic_leading_zeroes'].str[4:8] + '/' +
    s7_project['tic_leading_zeroes'].str[8:12] + '/' +
    s7_project['tic_leading_zeroes'].str[12:16] + '/' + 'hlsp_gsfc-eleanor-lite_tess_ffi_s' + 's0007' + '-' + s7_project['tic_leading_zeroes'] + '_tess_v1.0_lc.fits'
) 

common_string = '/storage/astro2/phrdhx/eleanor-lite/'
s7_project['new_path'] = common_string + s7_project['new_path']

In [None]:
s8_project['new_path'] = (
    's0008' + '/' +
    s8_project['tic_leading_zeroes'].str[0:4] + '/' +
    s8_project['tic_leading_zeroes'].str[4:8] + '/' +
    s8_project['tic_leading_zeroes'].str[8:12] + '/' +
    s8_project['tic_leading_zeroes'].str[12:16] + '/' + 'hlsp_gsfc-eleanor-lite_tess_ffi_s' + 's0008' + '-' + s8_project['tic_leading_zeroes'] + '_tess_v1.0_lc.fits'
) 

common_string = '/storage/astro2/phrdhx/eleanor-lite/'
s8_project['new_path'] = common_string + s8_project['new_path']

In [10]:
s12_project['new_path'] = (
    's0012' + '/' +
    s12_project['tic_leading_zeroes'].str[0:4] + '/' +
    s12_project['tic_leading_zeroes'].str[4:8] + '/' +
    s12_project['tic_leading_zeroes'].str[8:12] + '/' +
    s12_project['tic_leading_zeroes'].str[12:16] + '/' + 'hlsp_gsfc-eleanor-lite_tess_ffi_' + 's0012' + '-' + s12_project['tic_leading_zeroes'] + '_tess_v1.0_lc.fits'
) 

common_string = '/storage/astro2/phrdhx/eleanor-lite/'
s12_project['new_path'] = common_string + s12_project['new_path']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.


Step 3: Export

In [None]:
export_s1 = s1_project.new_path.to_list()
output_file = 's0001.txt'
with open(output_file, 'w') as file:
    for i in export_s1:
        file.write(i + '\n')

In [None]:
export_s2 = s2_project.new_path.to_list()
output_file = 's0002.txt'
with open(output_file, 'w') as file:
    for i in export_s2:
        file.write(i + '\n')

In [None]:
export_s3 = s3_project.new_path.to_list()
output_file = 's0003.txt'
with open(output_file, 'w') as file:
    for i in export_s3:
        file.write(i + '\n')

In [None]:
export_s4 = s4_project.new_path.to_list()
output_file = 's0004.txt'
with open(output_file, 'w') as file:
    for i in export_s4:
        file.write(i + '\n')

In [None]:
export_s5 = s5_project.new_path.to_list()
output_file = 's0005.txt'
with open(output_file, 'w') as file:
    for i in export_s5:
        file.write(i + '\n')

In [None]:
export_s6 = s6_project.new_path.to_list()
output_file = 's0006.txt'
with open(output_file, 'w') as file:
    for i in export_s6:
        file.write(i + '\n')

In [None]:
export_s7 = s7_project.new_path.to_list()
output_file = 's0007.txt'
with open(output_file, 'w') as file:
    for i in export_s7:
        file.write(i + '\n')

In [11]:
export_s12 = s12_project.new_path.to_list()
output_file = 's0012.txt'
with open(output_file, 'w') as file:
    for i in export_s12:
        file.write(i + '\n')

## Part 4: What TICs from old download are not in `eleanor-lite`, and vice-versa?

In [None]:
old_s1 = old[old.Sector == 1]
old_s2 = old[old.Sector == 2]
old_s3 = old[old.Sector == 3]
old_s4 = old[old.Sector == 4]
old_s5 = old[old.Sector == 5]
old_s6 = old[old.Sector == 6]
old_s7 = old[old.Sector == 7]

In [None]:
not_in_s1_project = old_s1[~old_s1.TIC_ID.isin(s1_project.tic_id)] # tics from old not in new
not_in_old_s1 = s1_project[~s1_project.tic_id.isin(old_s1.TIC_ID)] # tics in new not in old

In [None]:
not_in_old_s1

In [None]:
not_in_s1_project

In [None]:
not_in_s2_project[not_in_s2_project.TIC_ID.isin(not_in_old_s2.tic_id)]

In [None]:
len(old_s2)

In [None]:
abs(len(s2_project) - len(old_s2))

## Part 5: Download Scripts

In [7]:
import pandas as pd

In [8]:
with open('/storage/astro2/phrdhx/eleanor-lite-project-v2/download_scripts/hlsp_gsfc-eleanor-lite_tess_ffi_s0006_tess_v1.0_lc.sh', 'r') as file:
    script_contents = file.readlines()

In [9]:
df = pd.DataFrame(script_contents, columns=["Script Lines"])
df['Extracted ID'] = df['Script Lines'].str.extract(r'(\d{16})')
df['ID'] = df['Extracted ID'].str.lstrip('0')

In [10]:
df.head()

Unnamed: 0,Script Lines,Extracted ID,ID
0,#!/bin/sh\n,,
1,curl -f --create-dirs --output 's0006/0000/000...,4160330.0,4160330.0
2,curl -f --create-dirs --output 's0006/0000/000...,4160331.0,4160331.0
3,curl -f --create-dirs --output 's0006/0000/000...,4160343.0,4160343.0
4,curl -f --create-dirs --output 's0006/0000/000...,4160348.0,4160348.0


In [11]:
df.drop(0,inplace=True)

In [12]:
#df[df['Extracted ID'] == '0000000270577175']['Script Lines'].values[0]

In [13]:
df['ID'] = df['ID'].astype(int)

In [14]:
to_export = df[df['ID'].isin(s6_project['tic_id'])]

In [15]:
to_export

Unnamed: 0,Script Lines,Extracted ID,ID
262147,curl -f --create-dirs --output 's0006/0000/000...,0000000031193194,31193194
262150,curl -f --create-dirs --output 's0006/0000/000...,0000000031193204,31193204
262151,curl -f --create-dirs --output 's0006/0000/000...,0000000031193208,31193208
262153,curl -f --create-dirs --output 's0006/0000/000...,0000000031193217,31193217
262167,curl -f --create-dirs --output 's0006/0000/000...,0000000031193253,31193253
...,...,...,...
5692609,curl -f --create-dirs --output 's0006/0000/010...,0000010005000369,10005000369
5692610,curl -f --create-dirs --output 's0006/0000/010...,0000010005000388,10005000388
5692611,curl -f --create-dirs --output 's0006/0000/010...,0000010005000433,10005000433
5692612,curl -f --create-dirs --output 's0006/0000/010...,0000010005000483,10005000483


In [14]:
len(s1_project)

287352

In [198]:
len(df)

6859735

In [199]:
output_file = '../../s8.sh'

# Extract the 'Curl Script' column and write it to the .sh file
with open(output_file, 'w') as sh_file:
    for curl_script in to_export['Script Lines']:
        sh_file.write(curl_script)

---
### Making sure it's the same number of lines

In [6]:
with open('/storage/astro2/phrdhx/eleanor-lite-project-v2/download_scripts/s8.sh', 'r') as file:
    script_contents = file.readlines()

In [7]:
df = pd.DataFrame(script_contents, columns=["Script Lines"])
df['Extracted ID'] = df['Script Lines'].str.extract(r'(\d{16})')
df['ID'] = df['Extracted ID'].str.lstrip('0')
df['ID'] = df['ID'].astype(int)

In [8]:
df

Unnamed: 0,Script Lines,Extracted ID,ID
0,curl -f --create-dirs --output 's0008/0000/000...,0000000007845793,7845793
1,curl -f --create-dirs --output 's0008/0000/000...,0000000007845804,7845804
2,curl -f --create-dirs --output 's0008/0000/000...,0000000007845809,7845809
3,curl -f --create-dirs --output 's0008/0000/000...,0000000007845818,7845818
4,curl -f --create-dirs --output 's0008/0000/000...,0000000007845819,7845819
...,...,...,...
698240,curl -f --create-dirs --output 's0008/0000/010...,0000010005000363,10005000363
698241,curl -f --create-dirs --output 's0008/0000/010...,0000010005000369,10005000369
698242,curl -f --create-dirs --output 's0008/0000/010...,0000010005000380,10005000380
698243,curl -f --create-dirs --output 's0008/0000/010...,0000010005000418,10005000418


In [9]:
df.drop_duplicates(subset='ID')

Unnamed: 0,Script Lines,Extracted ID,ID
0,curl -f --create-dirs --output 's0008/0000/000...,0000000007845793,7845793
1,curl -f --create-dirs --output 's0008/0000/000...,0000000007845804,7845804
2,curl -f --create-dirs --output 's0008/0000/000...,0000000007845809,7845809
3,curl -f --create-dirs --output 's0008/0000/000...,0000000007845818,7845818
4,curl -f --create-dirs --output 's0008/0000/000...,0000000007845819,7845819
...,...,...,...
698240,curl -f --create-dirs --output 's0008/0000/010...,0000010005000363,10005000363
698241,curl -f --create-dirs --output 's0008/0000/010...,0000010005000369,10005000369
698242,curl -f --create-dirs --output 's0008/0000/010...,0000010005000380,10005000380
698243,curl -f --create-dirs --output 's0008/0000/010...,0000010005000418,10005000418


In [10]:
df.ID

0             7845793
1             7845804
2             7845809
3             7845818
4             7845819
             ...     
698240    10005000363
698241    10005000369
698242    10005000380
698243    10005000418
698244    10005000473
Name: ID, Length: 698245, dtype: int64

In [11]:
import json

#### S1 failed process

In [12]:
test = pd.read_csv('outputs/s8.txt',header=None, sep=" ")
with open('colnames.json', 'r', encoding='utf-8') as f:
    check = f.read()
    columns = json.loads(check)
    columns = columns['column_names']
test.columns = columns



FileNotFoundError: [Errno 2] No such file or directory: 'outputs/s8.txt'

In [None]:
missed = df[~df.ID.isin(test.TIC_ID)]

In [None]:
missed

In [39]:
output_file2 = '/storage/astro2/phrdhx/eleanor-lite-project-v2/download_scripts/s8_missed.sh'

# Extract the 'Curl Script' column and write it to the .sh file
with open(output_file2, 'w') as sh_file:
    for curl_script in missed['Script Lines']:
        sh_file.write(curl_script)

----
## Part 6

In [1]:
import os
os.chdir('../../')
import pandas as pd
from tqdm import tqdm

In [4]:
tic = pd.read_csv('tic_catalog_v3.txt',sep='\t')
tic['tic_leading_zeroes'] = tic.tic_id.astype(str).str.zfill(16)

In [5]:
tic

Unnamed: 0,tic_id,Tmag,GAIAmag,gaiabp,gaiarp,d,plx,tic_leading_zeroes
0,269273552,-1.745,,,,,,0000000269273552
1,175934060,-1.679,,,,,,0000000175934060
2,459832522,-1.254,,,,11.2575,88.83,0000000459832522
3,245873777,-1.200,,,,20.4332,48.94,0000000245873777
4,272314138,-1.043,,,,,,0000000272314138
...,...,...,...,...,...,...,...,...
13275767,10000693211,13.000,,,,,,0000010000693211
13275768,10001083513,13.000,,,,,,0000010001083513
13275769,10001920480,13.000,,,,,,0000010001920480
13275770,10002253686,13.000,,,,,,0000010002253686


In [6]:
s_values = list(range(9,26))
s_values

[9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]

In [12]:
project_data = {}
for s in s_values:
    # Read the CSV file for the current 's' value
    try:
        file_name = f'tic_project/s{s:02d}-eleanor-lite.txt'
        df = pd.read_csv(file_name, header=None, names=['tic_id'])
    except FileNotFoundError:
        file_name = f'tic_project/s{s:02d}-eleanor-lite-project.txt'
        df = pd.read_csv(file_name, header=None, names=['tic_id'])
    except: 
        print(f"File 'tic_project/s{s:02d}-eleanor-lite-project.txt' not found. Skipping")
        pass

        
    df_len = len(df)
    df = df.drop_duplicates(subset=['tic_id'])
    print(f'{df_len - len(df)} duplicates found')
    
    # Filter the DataFrame based on the 'tic' DataFrame
    project = df[df.tic_id.isin(tic.tic_id)]
    
    project = project.merge(tic[['tic_id', 'Tmag']], on='tic_id', how='left')
    
    # Add leading zeroes and construct the 'new_path' column
    project['tic_leading_zeroes'] = project.tic_id.astype(str).str.zfill(16)
    project['new_path'] = (
        f's{s:04d}/' +
        project['tic_leading_zeroes'].str[0:4] + '/' +
        project['tic_leading_zeroes'].str[4:8] + '/' +
        project['tic_leading_zeroes'].str[8:12] + '/' +
        project['tic_leading_zeroes'].str[12:16] + '/' +
        f'hlsp_gsfc-eleanor-lite_tess_ffi_s{s:04d}-' + project['tic_leading_zeroes'] + '_tess_v1.0_lc.fits'
    )
    
    # Add the common_string to the 'new_path' column
    common_string = '/storage/astro2/phrdhx/eleanor-lite/'
    project['new_path'] = common_string + project['new_path']
    export_paths = project.new_path.to_list()
    output_file = f'project_targets/s{s:04d}.txt'
    with open(output_file, 'w') as file:
       for i in export_paths:
           file.write(i + '\n')
    print(f"project dataframe size for sector {s}: " + str(project))

    # Store the project DataFrame in the dictionary to access later
    project_data[f's{s:04d}_project'] = project
    
    


0 duplicates found
project dataframe size for sector 9:            tic_id     Tmag tic_leading_zeroes  \
0       462358278  12.5217   0000000462358278   
1       462354014  11.9082   0000000462354014   
2       462356064  12.5600   0000000462356064   
3       462359065  12.4845   0000000462359065   
4       462358670  12.3590   0000000462358670   
...           ...      ...                ...   
729308  679620176   8.0967   0000000679620176   
729309  677042436  12.6723   0000000677042436   
729310  685333198  11.9352   0000000685333198   
729311  675771637  12.1447   0000000675771637   
729312  683946865  10.7540   0000000683946865   

                                                 new_path  
0       /storage/astro2/phrdhx/eleanor-lite/s0009/0000...  
1       /storage/astro2/phrdhx/eleanor-lite/s0009/0000...  
2       /storage/astro2/phrdhx/eleanor-lite/s0009/0000...  
3       /storage/astro2/phrdhx/eleanor-lite/s0009/0000...  
4       /storage/astro2/phrdhx/eleanor-lite/s0009/0000.

FileNotFoundError: [Errno 2] No such file or directory: 'tic_project/s12-eleanor-lite-project.txt'

In [6]:
project_data['s0013_project']

Unnamed: 0,tic_id,Tmag,tic_leading_zeroes,new_path
0,107920237,12.3346,0000000107920237,/storage/astro2/phrdhx/eleanor-lite/s0013/0000...
1,107920149,12.8402,0000000107920149,/storage/astro2/phrdhx/eleanor-lite/s0013/0000...
2,107920076,10.4251,0000000107920076,/storage/astro2/phrdhx/eleanor-lite/s0013/0000...
3,107922638,11.3910,0000000107922638,/storage/astro2/phrdhx/eleanor-lite/s0013/0000...
4,107920431,9.0046,0000000107920431,/storage/astro2/phrdhx/eleanor-lite/s0013/0000...
...,...,...,...,...
853040,954404076,12.6882,0000000954404076,/storage/astro2/phrdhx/eleanor-lite/s0013/0000...
853041,954402201,12.0978,0000000954402201,/storage/astro2/phrdhx/eleanor-lite/s0013/0000...
853042,954400293,12.4560,0000000954400293,/storage/astro2/phrdhx/eleanor-lite/s0013/0000...
853043,954375175,12.6237,0000000954375175,/storage/astro2/phrdhx/eleanor-lite/s0013/0000...


#### Compare with public lightcurves

In [11]:
s9 = pd.read_csv('target_lists/s0009.csv')

---

## Part 7: do `tic_project` TICs match public-release TICs?

In [1]:
import os
os.chdir('../../')
import pandas as pd

In [2]:
tic = pd.read_csv('tic.txt',sep='\t')
tic['tic_leading_zeroes'] = tic.tic_id.astype(str).str.zfill(16)

In [3]:
pr = pd.read_csv('target_lists/s0009.csv')

In [4]:
mytic = pd.read_csv('tic_project/s09-eleanor-lite.txt',header=None,names=['TIC_ID'])

In [5]:
print(len(mytic))
print(len(pr))

7054161
7054161


In [12]:
# Sort the columns in both DataFrames
pr = pr.sort_values(by='#TIC_ID')
mytic = mytic.sort_values(by='TIC_ID')

# Check if the DataFrames are equal
are_equal = pr['#TIC_ID'].equals(mytic['TIC_ID'])

if are_equal:
    print("The DataFrames are equal.")
else:
    print("The DataFrames are not equal.")
    unequal_rows = pr[pr['#TIC_ID'].values != mytic['TIC_ID'].values]
    print("Rows where values are not equal:")
    print(unequal_rows)

The DataFrames are not equal.
Rows where values are not equal:
Empty DataFrame
Columns: [#TIC_ID, RA, DEC]
Index: []


In [16]:
project = mytic[mytic.TIC_ID.isin(tic.tic_id)]

In [18]:
project

Unnamed: 0,TIC_ID
3897762,975780
3897703,975849
3897601,975851
3897676,975868
3897797,975888
...,...
2879466,10005000339
2879456,10005000356
2879458,10005000369
2879461,10005000466


In [20]:
 pr[pr['#TIC_ID'].isin(tic.tic_id)]

Unnamed: 0,#TIC_ID,RA,DEC
0,975780,165.398927,-5.957251
19,975849,165.352853,-5.531833
20,975851,165.329347,-5.508584
27,975868,165.338835,-5.452077
36,975888,165.315235,-5.368423
...,...,...,...
7054156,10005000339,152.281158,-29.064301
7054157,10005000356,159.178421,-27.528339
7054158,10005000369,91.874397,-61.807579
7054159,10005000466,150.778656,-26.159571


In [27]:
test = pd.merge(pr, mytic, left_on='#TIC_ID', right_on='TIC_ID', how='inner')
(test['#TIC_ID'] == test['TIC_ID']).value_counts()

True    7054161
dtype: int64

---

In [18]:
lookup = pd.read_csv('project_lookup.csv')