In [None]:
# ---------------------------
# Data Preparation Notebook
# ---------------------------

import pandas as pd
import matplotlib.pyplot as plt

# Load dataset (make sure exoTrain.csv and exoTest.csv are in /data folder)
train = pd.read_csv("../data/exoTrain.csv")
test = pd.read_csv("../data/exoTest.csv")

print("Train shape:", train.shape)
print("Test shape:", test.shape)

# Convert labels: 2 → Planet (1), 1 → No Planet (0)
train['LABEL'] = train['LABEL'].apply(lambda x: 1 if x == 2 else 0)
test['LABEL'] = test['LABEL'].apply(lambda x: 1 if x == 2 else 0)

print("\nLabel distribution (Train):\n", train['LABEL'].value_counts())
print("\nLabel distribution (Test):\n", test['LABEL'].value_counts())

# ---------------------------
# Visualize Sample Light Curves
# ---------------------------

# First planet light curve
planet_curve = train[train['LABEL'] == 1].iloc[0, 1:].values

# First non-planet light curve
star_curve = train[train['LABEL'] == 0].iloc[0, 1:].values

plt.figure(figsize=(12, 6))
plt.plot(planet_curve, label="Exoplanet Detected", color="orange")
plt.plot(star_curve, label="No Exoplanet", color="blue")
plt.xlabel("Time Steps")
plt.ylabel("Flux (Brightness)")
plt.title("Sample Light Curves from Kepler Data")
plt.legend()
plt.show()
