<a href="https://colab.research.google.com/github/joaopferreirajunior/neuralize_nasaspaceapps2025/blob/main/kepler_lightcurve_pipeline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Light Curve Analysis with Real Data from the Kepler Mission
This notebook loads a set of 1000 targets from the Kepler mission, equally divided into:

confirmed exoplanets

exoplanet candidates

The data was extracted directly from NASA’s official archive (Exoplanet Archive) and saved in the CSV file filtered_kepler_targets_confirmed_candidate.csv, based on the koi_disposition column.

The goal is to retrieve light curves for these targets and analyze them for future classification experiments.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install lightkurve --quiet
import os
import pandas as pd
import lightkurve as lk
import matplotlib.pyplot as plt
from astropy.timeseries import BoxLeastSquares

plt.rcParams['figure.figsize'] = (8, 4)
plt.rcParams['figure.dpi'] = 120

DATA_CSV = "/content/drive/MyDrive/filtered_kepler_targets_confirmed_candidate.csv"
OUTPUT_DIR = "outputs"
os.makedirs(OUTPUT_DIR, exist_ok=True)


In [None]:

df = pd.read_csv(DATA_CSV, comment="#")
df = df[df['kepid'].notnull()]
df['kepid'] = df['kepid'].astype(int).astype(str)
print(f"Total de alvos disponíveis: {len(df)}")
df[['kepid', 'koi_disposition']].head()


In [None]:

targets = df['kepid'].tolist()
downloaded = []

for target in targets:
    try:
        search_result = lk.search_lightcurve(target, mission='Kepler')
        if len(search_result) > 0:
            lc = search_result.download()
            lc.to_fits(os.path.join(OUTPUT_DIR, f"{target}.fits"), overwrite=True)
            downloaded.append(target)
    except Exception as e:
        print(f"Erro com {target}: {e}")

print(f"Total de curvas salvas: {len(downloaded)}")


In [None]:
import os, glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import lightkurve as lk
from astropy.timeseries import BoxLeastSquares
from astropy import units as u

FITS_DIR = "/content/outputs"

fits_files = glob.glob(os.path.join(FITS_DIR, "*.fits"))
print(f"{len(fits_files)} curvas encontradas.")

results = []

for path in fits_files:
    try:
        lc0 = lk.read(path).remove_nans()


        flux = lc0.flux
        if str(flux.unit) == "electron / s":
            flux = flux.value * (u.electron/u.s)  # nova unidade válida


        lc = lk.LightCurve(time=lc0.time, flux=flux)


        lc = lc.normalize().remove_outliers()


        period_grid = np.linspace(0.5, 20, 10000)
        model = BoxLeastSquares(lc.time, lc.flux)
        bls_result = model.power(period_grid, 0.1)
        best_period = bls_result.period[np.argmax(bls_result.power)]


        target_id = os.path.basename(path).replace(".fits", "")
        fig = lc.fold(period=best_period).scatter()
        plt.title(f"{target_id} - Period: {best_period:.3f} days")
        fig.figure.savefig(os.path.join(FITS_DIR, f"{target_id}_folded.png"))
        plt.close()

        results.append({"target": target_id,
                        "period": best_period,
                        "power": np.max(bls_result.power)})

    except Exception as e:
        print(f"Erro ao processar {path}: {e}")


df_results = pd.DataFrame(results)
df_results.to_csv(os.path.join(FITS_DIR, "results_batch.csv"), index=False)
print("Processamento finalizado.")


In [None]:

for i, target in enumerate(downloaded[:5]):
    try:
        lc = lk.read(os.path.join(OUTPUT_DIR, f"{target}.fits"))
        lc = lc.normalize().remove_outliers()
        lc.plot()
        plt.title(f"Target {target}")
        plt.savefig(os.path.join(OUTPUT_DIR, f"{target}_lightcurve.png"))
        plt.close()
    except Exception as e:
        print(f"Erro ao processar {target}: {e}")


In [None]:

df_downloaded = df[df['kepid'].isin(downloaded)]
df_downloaded.to_csv(os.path.join(OUTPUT_DIR, "results_batch.csv"), index=False)
df_downloaded.head()


In [None]:
import shutil

shutil.make_archive("/content/outputs_backup", 'zip', "/content/outputs")

In [None]:
from google.colab import files

files.download("/content/outputs_backup.zip")