In [1]:
import pandas as pd
from google.colab import drive

# Mount drive and point to your specific path
drive.mount('/content/drive')
path = '/content/drive/MyDrive/duhacks/archive (1)/'

# Loading the datasets (ensure filenames match exactly)
train_df = pd.read_csv(path + 'exoTrain.csv')
test_df = pd.read_csv(path + 'exoTest.csv')

print(f"Dataset Loaded: {train_df.shape[0]} stars found.")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Dataset Loaded: 5087 stars found.


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE
import tensorflow as tf
from tensorflow.keras import layers, models

# 1. Load Data (Assumes files from your Drive are locally accessible or in Colab)
# train = pd.read_csv('/content/drive/MyDrive/exoTrain.csv')
# test = pd.read_csv('/content/drive/MyDrive/exoTest.csv')

def preprocess_data(df):
    # Separate features and labels
    x = df.drop('LABEL', axis=1)
    y = df['LABEL'] - 1  # Convert labels 1,2 to 0,1

    # Normalize features (Standardization)
    scaler = StandardScaler()
    x_scaled = scaler.fit_transform(x)

    return x_scaled, y

x_train, y_train = preprocess_data(train_df)
x_test, y_test = preprocess_data(test_df)

# 2. Address Imbalance using SMOTE
smote = SMOTE(sampling_strategy='minority')
x_train_res, y_train_res = smote.fit_resample(x_train, y_train)

# Reshape for 1D CNN: (samples, time_steps, features)
x_train_res = np.reshape(x_train_res, (x_train_res.shape[0], x_train_res.shape[1], 1))
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

# 3. Build the 1D CNN Architecture
model = models.Sequential([
    layers.Conv1D(filters=32, kernel_size=10, activation='relu', input_shape=(x_train_res.shape[1], 1)),
    layers.MaxPooling1D(pool_size=4),
    layers.BatchNormalization(),

    layers.Conv1D(filters=64, kernel_size=10, activation='relu'),
    layers.MaxPooling1D(pool_size=4),
    layers.Dropout(0.3),

    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 4. Train the Model
history = model.fit(x_train_res, y_train_res, epochs=20, batch_size=64, validation_data=(x_test, y_test))

# 5. Evaluate Performance
y_pred = (model.predict(x_test) > 0.5).astype("int32")
print(classification_report(y_test, y_pred))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 363ms/step - accuracy: 0.6329 - loss: 0.6264 - val_accuracy: 0.3088 - val_loss: 0.7094
Epoch 2/20
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 345ms/step - accuracy: 0.8126 - loss: 0.4242 - val_accuracy: 0.9912 - val_loss: 0.1613
Epoch 3/20
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 395ms/step - accuracy: 0.8945 - loss: 0.3016 - val_accuracy: 0.9912 - val_loss: 0.1425
Epoch 4/20
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 346ms/step - accuracy: 0.9364 - loss: 0.2059 - val_accuracy: 0.9912 - val_loss: 0.2223
Epoch 5/20
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 346ms/step - accuracy: 0.9577 - loss: 0.1458 - val_accuracy: 0.9912 - val_loss: 0.1598
Epoch 6/20
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 346ms/step - accuracy: 0.9712 - loss: 0.1082 - val_accuracy: 0.9912 - val_loss: 0.2301
Epoch 7/20