# Neural Network


## Imports

In [2]:
import os
import random
import numpy as np
import pandas as pd
import joblib
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score, confusion_matrix, classification_report, roc_auc_score
from sklearn.utils.class_weight import compute_class_weight

**Reproducibility**

In [4]:
SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)
random.seed(SEED)

In [5]:
df = pd.read_csv("[CLEANED]supernova-dataset.csv")

## Defining X and y

In [7]:
df = df[df["koi_disposition"] != "CANDIDATE"].copy()

df["target"] = (df["koi_disposition"] == "CONFIRMED").astype(int)

drop_cols = ["Unnamed: 0", "kepid", "koi_disposition"]
X = df.drop(columns=drop_cols + ["target"])
y = df["target"]

## Train/Test Split

In [8]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=SEED
)

## Scaling

In [11]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

os.makedirs("models", exist_ok=True)
joblib.dump(scaler, "models/nn_scaler.pkl")

['models/nn_scaler.pkl']

## Class weights

In [12]:
class_weights = compute_class_weight(
    class_weight="balanced",
    classes=np.unique(y_train),
    y=y_train
)

class_weights = dict(enumerate(class_weights))

## Model Architecture

In [16]:
def build_model(input_dim):
    model = keras.Sequential([
        keras.Input(shape=(input_dim,)),  # <-- fix here

        layers.Dense(256, activation="relu"),
        layers.BatchNormalization(),
        layers.Dropout(0.3),

        layers.Dense(128, activation="relu"),
        layers.BatchNormalization(),
        layers.Dropout(0.3),

        layers.Dense(64, activation="relu"),
        layers.BatchNormalization(),
        layers.Dropout(0.2),

        layers.Dense(1, activation="sigmoid")
    ])

    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=1e-3),
        loss="binary_crossentropy",
        metrics=[keras.metrics.AUC(name="auc")]
    )

    return model

## Early Stopping

In [17]:
early_stop = keras.callbacks.EarlyStopping(
    monitor="val_loss",
    patience=10,
    restore_best_weights=True
)

## Training

In [18]:
model = build_model(X_train.shape[1])

history = model.fit(
    X_train,
    y_train,
    validation_split=0.2,
    epochs=200,
    batch_size=64,
    class_weight=class_weights,
    callbacks=[early_stop],
    verbose=1
)

Epoch 1/200
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 12ms/step - auc: 0.8339 - loss: 0.5231 - val_auc: 0.9421 - val_loss: 0.4329
Epoch 2/200
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - auc: 0.9237 - loss: 0.3574 - val_auc: 0.9495 - val_loss: 0.3645
Epoch 3/200
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - auc: 0.9304 - loss: 0.3419 - val_auc: 0.9532 - val_loss: 0.3257
Epoch 4/200
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - auc: 0.9404 - loss: 0.3135 - val_auc: 0.9563 - val_loss: 0.2940
Epoch 5/200
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - auc: 0.9444 - loss: 0.3023 - val_auc: 0.9575 - val_loss: 0.2901
Epoch 6/200
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - auc: 0.9436 - loss: 0.3024 - val_auc: 0.9581 - val_loss: 0.2814
Epoch 7/200
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - auc: