In [None]:
!pip install lightkurve
!pip install tsfresh -c constraints.txt

Collecting lightkurve
  Downloading lightkurve-2.5.0-py3-none-any.whl.metadata (6.1 kB)
Collecting astroquery>=0.3.10 (from lightkurve)
  Downloading astroquery-0.4.10-py3-none-any.whl.metadata (6.3 kB)
Collecting fbpca>=1.0 (from lightkurve)
  Downloading fbpca-1.0.tar.gz (11 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting memoization>=0.3.1 (from lightkurve)
  Downloading memoization-0.4.0.tar.gz (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.2/41.2 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting oktopus>=0.1.2 (from lightkurve)
  Downloading oktopus-0.1.2.tar.gz (10 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting s3fs>=2024.6.1 (from lightkurve)
  Downloading s3fs-2025.3.2-py3-none-any.whl.metadata (1.9 kB)
Collecting uncertainties>=3.1.4 (from lightkurve)
  Downloading uncertainties-3.2.2-py3-none-any.whl.metadata (6.9 kB)
Collecting pyvo>=1.5 (fro

In [None]:
#uploading star ID file for light curve extraction
from google.colab import files

# Prompt user to upload files
uploaded = files.upload()

# Safely extract filenames and print feedback
if uploaded:
    filename = next(iter(uploaded))
    print(f"File '{filename}' uploaded successfully.")
else:
    print("No files uploaded.")

Saving exoplanet_archive_test_data_processed.csv to exoplanet_archive_test_data_processed.csv
File 'exoplanet_archive_test_data_processed.csv' uploaded successfully.


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import csv
from lightkurve import search_targetpixelfile
from tsfresh import extract_features
from tsfresh.feature_extraction import extract_features, MinimalFCParameters, EfficientFCParameters
from csv import writer
import warnings
warnings.filterwarnings("ignore")
import logging
logging.getLogger("tsfresh.feature_extraction.settings").setLevel(logging.ERROR)

In [None]:
data = pd.read_csv(filename)
# Making sure to drop duplicates and any column with NaN values
data.drop_duplicates(subset=['Star_ID'], inplace=True)
data = data.dropna(axis=1, how='all')
data

Unnamed: 0,Star_ID,Stellar Effective Temperature (Kelvin),Stellar Effective Radius (solar radii)
0,TIC 233529335,6000.00,1.28867
1,TIC 206466531,6000.20,1.33000
2,TIC 68152139,6001.00,1.49000
3,TIC 288246496,6001.00,1.51284
4,TIC 192976435,6001.00,1.56496
...,...,...,...
246,TIC 190990336,6118.10,1.66000
247,TIC 118339710,6118.81,1.56229
248,TIC 186815660,6119.00,1.40000
249,TIC 432280671,6119.00,1.18343


In [None]:
appended_data = []
error_log = []
for star_id in data['Star_ID']:
    try:
        tpf_file = search_targetpixelfile(star_id).download(quality_bitmask='default')
        lc = tpf_file.to_lightcurve()
        df = pd.DataFrame({'flux': lc.flux.value, 'time': lc.time.value})
        df.dropna(inplace=True)
        df['ID'] = 1  # Required for tsfresh

        print(f"\nExtracting features for Star_ID: {star_id}")  # Show which star is processing
        extracted_features = extract_features(
            df,
            default_fc_parameters=EfficientFCParameters(),
            column_id='ID',
            disable_progressbar=False  # Keep tsfresh's progress bar
        )
        extracted_features.insert(0, "Star_ID", star_id)
        appended_data.append(extracted_features)

    except Exception as e:
        error_log.append(f"Error processing Star_ID {star_id}: {e}")

In [None]:
appended_data = pd.concat(appended_data)
appended_data

Unnamed: 0,Star_ID,flux__variance_larger_than_standard_deviation,flux__has_duplicate_max,flux__has_duplicate_min,flux__has_duplicate,flux__sum_values,flux__abs_energy,flux__mean_abs_change,flux__mean_change,flux__mean_second_derivative_central,...,time__fourier_entropy__bins_5,time__fourier_entropy__bins_10,time__fourier_entropy__bins_100,time__permutation_entropy__dimension_3__tau_1,time__permutation_entropy__dimension_4__tau_1,time__permutation_entropy__dimension_5__tau_1,time__permutation_entropy__dimension_6__tau_1,time__permutation_entropy__dimension_7__tau_1,time__query_similarity_count__query_None__threshold_0.0,time__mean_n_absolute_max__number_of_maxima_7
1,TIC 233529335,1.0,0.0,0.0,1.0,3.865003e+08,8.108050e+12,23.562277,-0.000188,0.000032,...,0.045395,0.090729,0.215617,-0.0,-0.0,-0.0,-0.0,-0.0,,1710.200345
1,TIC 206466531,1.0,0.0,0.0,1.0,2.081981e+08,3.201131e+12,16.568686,0.001486,-0.002093,...,0.045395,0.090729,0.260704,-0.0,-0.0,-0.0,-0.0,-0.0,,1406.211661
1,TIC 68152139,1.0,0.0,0.0,1.0,1.400513e+07,1.816439e+11,30.890060,-0.093211,-0.014456,...,0.045395,0.045395,0.090729,-0.0,-0.0,-0.0,-0.0,-0.0,,1814.953493
1,TIC 288246496,1.0,0.0,0.0,1.0,5.614880e+07,1.752484e+11,8.099537,0.005363,-0.000434,...,0.090729,0.136002,0.350689,-0.0,-0.0,-0.0,-0.0,-0.0,,2882.114778
1,TIC 192976435,1.0,0.0,0.0,1.0,2.711339e+07,4.078654e+10,6.826508,-0.002126,0.000431,...,0.045395,0.045395,0.090729,-0.0,-0.0,-0.0,-0.0,-0.0,,3014.152418
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1,TIC 36808091,1.0,0.0,0.0,1.0,1.737098e+07,1.489721e+10,6.946484,-0.000564,0.000226,...,0.045395,0.045395,0.090729,-0.0,-0.0,-0.0,-0.0,-0.0,,3746.943482
1,TIC 190990336,1.0,0.0,0.0,1.0,6.964836e+08,2.833801e+13,44.944324,-3.874144,0.011241,...,0.045395,0.090729,0.226363,-0.0,-0.0,-0.0,-0.0,-0.0,,1568.473607
1,TIC 118339710,1.0,0.0,0.0,1.0,1.057533e+09,1.070885e+13,38.086884,-0.250841,0.000271,...,0.090729,0.090729,0.339942,-0.0,-0.0,-0.0,-0.0,-0.0,,2254.069974
1,TIC 186815660,1.0,0.0,0.0,1.0,1.504200e+07,2.814536e+10,12.300451,1.068955,0.003814,...,0.090729,0.090729,0.260704,-0.0,-0.0,-0.0,-0.0,-0.0,,3259.965846


In [None]:
appended_data = appended_data.loc[:, (appended_data != 0).any(axis=0)]
appended_data = appended_data.loc[:, (appended_data != 1).any(axis=0)]

appended_data

Unnamed: 0,Star_ID,flux__sum_values,flux__abs_energy,flux__mean_abs_change,flux__mean_change,flux__mean_second_derivative_central,flux__median,flux__mean,flux__length,flux__standard_deviation,...,time__lempel_ziv_complexity__bins_5,time__lempel_ziv_complexity__bins_10,time__lempel_ziv_complexity__bins_100,time__fourier_entropy__bins_2,time__fourier_entropy__bins_3,time__fourier_entropy__bins_5,time__fourier_entropy__bins_10,time__fourier_entropy__bins_100,time__query_similarity_count__query_None__threshold_0.0,time__mean_n_absolute_max__number_of_maxima_7
1,TIC 233529335,3.865003e+08,8.108050e+12,23.562277,-0.000188,0.000032,20979.367188,20978.089844,18424.0,24.978390,...,0.023231,0.032892,0.102964,0.045395,0.045395,0.045395,0.090729,0.215617,,1710.200345
1,TIC 206466531,2.081981e+08,3.201131e+12,16.568686,0.001486,-0.002093,15375.145508,15375.388672,13541.0,15.610107,...,0.027029,0.038402,0.117938,0.045395,0.045395,0.045395,0.090729,0.260704,,1406.211661
1,TIC 68152139,1.400513e+07,1.816439e+11,30.890060,-0.093211,-0.014456,12972.496094,12943.741211,1082.0,580.928772,...,0.094270,0.133087,0.405730,0.045395,0.045395,0.045395,0.045395,0.090729,,1814.953493
1,TIC 288246496,5.614880e+07,1.752484e+11,8.099537,0.005363,-0.000434,3131.765137,3120.938477,17991.0,25.275686,...,0.023401,0.032794,0.097938,0.045395,0.045395,0.090729,0.136002,0.350689,,2882.114778
1,TIC 192976435,2.711339e+07,4.078654e+10,6.826508,-0.002126,0.000431,1504.052002,1504.210449,18025.0,11.258662,...,0.023467,0.033287,0.103689,0.045395,0.045395,0.045395,0.045395,0.090729,,3014.152418
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1,TIC 36808091,1.737098e+07,1.489721e+10,6.946484,-0.000564,0.000226,857.300903,857.529541,20257.0,7.333892,...,0.022116,0.031248,0.097448,0.045395,0.045395,0.045395,0.045395,0.090729,,3746.943482
1,TIC 190990336,6.964836e+08,2.833801e+13,44.944324,-3.874144,0.011241,40622.648438,40634.980469,17140.0,1457.425171,...,0.024096,0.034014,0.105426,0.045395,0.045395,0.045395,0.090729,0.226363,,1568.473607
1,TIC 118339710,1.057533e+09,1.070885e+13,38.086884,-0.250841,0.000271,10114.679688,10115.091797,104550.0,336.069427,...,0.009756,0.013821,0.043109,0.045395,0.045395,0.090729,0.090729,0.339942,,2254.069974
1,TIC 186815660,1.504200e+07,2.814536e+10,12.300451,1.068955,0.003814,853.076294,899.371887,16725.0,934.859375,...,0.024395,0.034320,0.103498,0.045395,0.045395,0.090729,0.090729,0.260704,,3259.965846


In [None]:
appended_data.to_csv("final_test_file_227_samples.csv")