# Neural Network


## Imports

In [2]:
import os
import random
import numpy as np
import pandas as pd
import joblib
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score, confusion_matrix, classification_report, roc_auc_score
from sklearn.utils.class_weight import compute_class_weight

**Reproducibility**

In [3]:
SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)
random.seed(SEED)

In [4]:
df = pd.read_csv("[CLEANED]supernova-dataset.csv")

## Defining X and y

In [5]:
df = df[df["koi_disposition"] != "CANDIDATE"].copy()

df["target"] = (df["koi_disposition"] == "CONFIRMED").astype(int)

drop_cols = ["Unnamed: 0", "kepid", "koi_disposition"]
X = df.drop(columns=drop_cols + ["target"])
y = df["target"]

## Train/Test Split

In [6]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=SEED
)

## Scaling

In [7]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

os.makedirs("models", exist_ok=True)
joblib.dump(scaler, "models/nn_scaler.pkl")

['models/nn_scaler.pkl']

## Class weights

In [8]:
class_weights = compute_class_weight(
    class_weight="balanced",
    classes=np.unique(y_train),
    y=y_train
)

class_weights = dict(enumerate(class_weights))

## Model Architecture

In [9]:
def build_model(input_dim):
    model = keras.Sequential([
        keras.Input(shape=(input_dim,)),  # <-- fix here

        layers.Dense(256, activation="relu"),
        layers.BatchNormalization(),
        layers.Dropout(0.3),

        layers.Dense(128, activation="relu"),
        layers.BatchNormalization(),
        layers.Dropout(0.3),

        layers.Dense(64, activation="relu"),
        layers.BatchNormalization(),
        layers.Dropout(0.2),

        layers.Dense(1, activation="sigmoid")
    ])

    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=1e-3),
        loss="binary_crossentropy",
        metrics=[keras.metrics.AUC(name="auc")]
    )

    return model

## Early Stopping

In [10]:
early_stop = keras.callbacks.EarlyStopping(
    monitor="val_loss",
    patience=10,
    restore_best_weights=True
)

## Training

In [11]:
model = build_model(X_train.shape[1])

history = model.fit(
    X_train,
    y_train,
    validation_split=0.2,
    epochs=200,
    batch_size=64,
    class_weight=class_weights,
    callbacks=[early_stop],
    verbose=1
)

Epoch 1/200
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 11ms/step - auc: 0.8406 - loss: 0.5180 - val_auc: 0.9471 - val_loss: 0.4368
Epoch 2/200
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - auc: 0.9265 - loss: 0.3493 - val_auc: 0.9502 - val_loss: 0.3653
Epoch 3/200
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - auc: 0.9310 - loss: 0.3366 - val_auc: 0.9534 - val_loss: 0.3072
Epoch 4/200
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - auc: 0.9378 - loss: 0.3208 - val_auc: 0.9556 - val_loss: 0.2869
Epoch 5/200
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - auc: 0.9418 - loss: 0.3088 - val_auc: 0.9559 - val_loss: 0.2770
Epoch 6/200
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - auc: 0.9439 - loss: 0.3010 - val_auc: 0.9569 - val_loss: 0.2764
Epoch 7/200
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - auc: 0

## metrics


**find best threshold for F1**

```
thresholds = np.linspace(0.1, 0.9, 100)
best_f1 = 0
best_thresh = 0.5

for t in thresholds:
    preds = (y_probs >= t).astype(int)
    score = f1_score(y_test, preds)
    if score > best_f1:
        best_f1 = score
        best_thresh = t
```



In [12]:
y_probs = model.predict(X_test)
y_pred = (y_probs >= 0.5).astype(int)

print("F1:", f1_score(y_test, y_pred))
print("ROC-AUC:", roc_auc_score(y_test, y_probs))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
F1: 0.8602878916172735
ROC-AUC: 0.9588418253241481
[[789 124]
 [ 41 508]]
              precision    recall  f1-score   support

           0       0.95      0.86      0.91       913
           1       0.80      0.93      0.86       549

    accuracy                           0.89      1462
   macro avg       0.88      0.89      0.88      1462
weighted avg       0.90      0.89      0.89      1462



## saving

In [13]:
model.save("models/nn_model.keras")