In [10]:
import pandas as pd
import os, sys
current_dir = os.getcwd()
while os.path.basename(current_dir) != 'nets2':
    current_dir = os.path.dirname(current_dir)
    if current_dir == os.path.dirname(current_dir): 
        raise Exception("'nets2' directory not found in parent directories")
    
sys.path.insert(1, os.path.join(current_dir, 'scripts'))
sys.path.insert(1, os.path.join(current_dir, 'stella'))

from utils import *
from tqdm import tqdm
import random
import glob
import re

In [11]:
files = glob.glob('../models/svc-s18/*')
files[0:2]

['../models/svc-s18/418034045_sector07.npy',
 '../models/svc-s18/85431097_sector07.npy']

In [18]:
def extract_id(filename):
    pattern = r'/(\d+)_'
    match = re.search(pattern, filename)
    if match:
        return match.group(1)
    return None

def generate_random_times(files, num_samples=10, max_retries=10):
    result = []
    for file in tqdm(files):
        data = np.load(file)
        tic = extract_id(file)
        
        times = data[0]
        diff = np.diff(times)
        large_gaps_indices = np.where(diff > 0.5)[0]
        
        for _ in range(num_samples):
            valid_time_found = False
            retry_count = 0
            while not valid_time_found and retry_count < max_retries:
                t0 = np.random.uniform(times[0], times[-1])
                valid_t0 = True
                
                for index in large_gaps_indices:
                    start_time = times[index] - 1
                    end_time = times[index + 1] + 1
                    if start_time <= t0 <= end_time:
                        valid_t0 = False
                        break
                    elif index < len(times) - 1 and diff[index] > 0.5 and abs(t0 - times[index + 1]) < 1.5:
                        valid_t0 = False
                        break
                    elif index > 0 and diff[index - 1] > 0.5 and abs(t0 - times[index]) < 1.5:
                        valid_t0 = False
                        break
                
                if t0 <= times[0] + 1 or t0 >= times[-1] - 1.5:
                    valid_t0 = False
                
                if valid_t0:
                    valid_time_found = True
                    result.append((tic, t0))
                else:
                    retry_count += 1
            
            if not valid_time_found:
                print(f"Warning: Could not find a valid time for {tic} after {max_retries} attempts.")
    df = pd.DataFrame(result,columns=['TIC','tpeak'])
    return df

In [22]:
df = generate_random_times(files,num_samples=7)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1725/1725 [00:01<00:00, 1092.17it/s]


In [23]:
df.to_csv('../catalogs/svc-s18-sample.txt',index=None,sep=' ')

In [26]:
pd.read_csv('../catalogs/svc.csv').columns

Index(['tess_id', 'Solution', 'Sector', 'period_var_1', 'period_var_2',
       'period_var_uncert_1', 'period_var_uncert_2', 'amp_var_1', 'amp_var_2',
       'amp_var_uncert_1',
       ...
       'splists', 'e_RA', 'e_Dec', 'RA_orig', 'Dec_orig', 'e_RA_orig',
       'e_Dec_orig', 'raddflag', 'wdflag', 'objID'],
      dtype='object', length=168)