In [None]:
!pip install lightkurve
!pip install tsfresh -c constraints.txt



In [None]:
#uploading star ID file for light curve extraction
from google.colab import files

# Prompt user to upload files
uploaded = files.upload()

# Safely extract filenames and print feedback
if uploaded:
    filename = next(iter(uploaded))
    print(f"File '{filename}' uploaded successfully.")
else:
    print("No files uploaded.")

Saving labelled_training_star_data_943_samples_processed.csv to labelled_training_star_data_943_samples_processed (1).csv
File 'labelled_training_star_data_943_samples_processed (1).csv' uploaded successfully.


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import csv
from lightkurve import search_targetpixelfile
from tsfresh import extract_features
from tsfresh.feature_extraction import extract_features, MinimalFCParameters, EfficientFCParamters
from csv import writer
import warnings
warnings.filterwarnings("ignore")

In [None]:
import logging
logging.getLogger("tsfresh.feature_extraction.settings").setLevel(logging.ERROR)

In [None]:
data = pd.read_csv(filename, dtype={'Star_ID': str})  # Ensures Star_ID is read as a string
data.drop_duplicates(subset=['Star_ID'], inplace=True)
data = data.dropna(axis=1, how='all')
data

Unnamed: 0,Star_ID,Stellar Distance (parsecs),Stellar Effective Temperature (Kelvin),Stellar Effective Radius (solar radii),TESS Object of Interest Registered,Exoplanet_Actual
0,TIC 198108326,142.7510,4652.87,0.700000,2020-05-07 18:04:46,1
1,TIC 327577308,78.9495,4475.00,0.700000,2020-06-19 07:20:04,0
2,TIC 436575927,98.0152,4197.00,0.700000,2024-04-25 22:44:01,1
3,TIC 62483237,42.4061,4356.00,0.700727,2018-09-05 18:51:57,1
4,TIC 251848941,62.6990,4111.00,0.701202,2018-11-03 00:53:39,1
...,...,...,...,...,...,...
938,TIC 229742722,224.3850,6415.00,1.400000,2020-05-08 17:43:13,1
939,TIC 429295277,218.4380,5972.00,1.400000,2020-06-17 20:08:08,0
940,TIC 178217113,348.6460,6135.50,1.400000,2022-01-06 23:39:44,1
941,TIC 366602797,703.6280,6332.00,1.400000,2022-02-28 23:29:45,0


In [None]:
appended_data = []

with open(filename) as file:
    reader = csv.DictReader(file)

    for row in reader:
        try:
            tpf_file = search_targetpixelfile(row['Star_ID']).download(quality_bitmask='default')
            if tpf_file is None:
                continue

            lc = tpf_file.to_lightcurve()
            df = pd.DataFrame({'flux': lc.flux.value, 'time': lc.time.value})  # Only extracting flux & time
            df.dropna(inplace=True)
            df["ID"] = 1  # Required for tsfresh

            extracted_features = extract_features(df, default_fc_parameters=EfficientFCParameters(), column_id="ID")
            extracted_features['Exoplanet_Actual'] = row['Exoplanet_Actual']
            extracted_features.insert(loc=0, column="Star_ID", value=row['Star_ID'])
            appended_data.append(extracted_features)

        except (TypeError, AttributeError, ValueError):
            continue

In [None]:
appended_data = pd.concat(appended_data)

In [None]:
appended_data = appended_data.loc[:, (appended_data != 0.0).any(axis=0)]
appended_data = appended_data.loc[:, (appended_data != 1.0).any(axis=0)]
appended_data = appended_data.dropna(axis=1, how='all')
appended_data

Unnamed: 0,Star_ID,flux__has_duplicate,flux__sum_values,flux__abs_energy,flux__mean_abs_change,flux__mean_change,flux__mean_second_derivative_central,flux__median,flux__mean,flux__length,...,time__ratio_beyond_r_sigma__r_6,time__ratio_beyond_r_sigma__r_7,time__ratio_beyond_r_sigma__r_10,time__count_above__t_0,time__count_below__t_0,time__permutation_entropy__dimension_3__tau_1,time__permutation_entropy__dimension_4__tau_1,time__permutation_entropy__dimension_5__tau_1,time__permutation_entropy__dimension_6__tau_1,time__permutation_entropy__dimension_7__tau_1
0,TIC 198108326,1.0,4.664377e+07,1.240849e+11,11.530768,-0.003935,-0.000964,2.657060e+03,2.660038e+03,17535.0,...,,,,,,,,,,
1,TIC 327577308,1.0,7.948766e+06,5.733501e+10,5.232091,0.001887,0.000571,7.212939e+03,7.213036e+03,1102.0,...,,,,,,,,,,
2,TIC 436575927,1.0,6.518944e+07,2.972384e+11,32.503155,0.523376,0.001092,3.821674e+03,3.901690e+03,16708.0,...,,,,,,,,,,
3,TIC 62483237,1.0,4.939043e+08,1.334561e+13,23.704840,0.001608,0.000609,2.703756e+04,2.702031e+04,18279.0,...,,,,,,,,,,
4,TIC 251848941,1.0,1.835050e+08,1.840140e+12,16.052486,0.010285,-0.001091,1.003106e+04,1.002759e+04,18300.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
570,TIC 14614418,1.0,1.020607e+07,1.348075e+10,13.442261,-0.001804,-0.000759,6.041951e+02,6.198646e+02,16465.0,...,0.0,0.0,0.0,1.0,0.0,-0.0,-0.0,-0.0,-0.0,-0.0
571,TIC 349095149,1.0,2.563983e+08,3.749651e+12,32.655548,-0.363590,-0.001395,1.441882e+04,1.447025e+04,17719.0,...,0.0,0.0,0.0,1.0,0.0,-0.0,-0.0,-0.0,-0.0,-0.0
572,TIC 49687222,1.0,3.371074e+09,3.385214e+15,267.342255,0.696495,-0.005943,1.003480e+06,1.004192e+06,3357.0,...,0.0,0.0,0.0,1.0,0.0,-0.0,-0.0,-0.0,-0.0,-0.0
573,TIC 63452790,1.0,3.770222e+07,8.742650e+11,12.102783,0.345808,0.004929,2.321402e+04,2.318710e+04,1626.0,...,0.0,0.0,0.0,1.0,0.0,-0.0,-0.0,-0.0,-0.0,-0.0


In [None]:
appended_data.to_csv("final_train_file_575_samples.csv")