In [1]:
import torch
import torch.nn as nn 
import torch.optim as optim 


In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [3]:
import pandas as pd 
from sklearn.preprocessing import StandardScaler 



data = pd.read_csv("custom_sample_mass_radius_k2_5.csv", 
                   header=None, 
                   names=["Mass", "Radius","k2", "Type"], 
                   low_memory=False)

m_r_p = data.iloc[1:, :-1]
star_type = data.iloc[1:, -1]

scaler = StandardScaler()
m_r_p = scaler.fit_transform(m_r_p)



In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, confusion_matrix

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

# 1. Load dataset
df = pd.read_csv("custom_sample_mass_radius_k2_15.csv", 
                 )

# 2. Separate features and target
X = df.drop(columns=["Type"]).values.astype(np.float32)
y = df["Type"].values

# 3. Encode labels
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)

# 4. Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.4, random_state=42, stratify=y_encoded
)

# 5. Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 6. Convert to tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# 7. Dataloaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)




In [None]:
import numpy as np
import torch
import torch.nn as nn
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from skorch import NeuralNetClassifier

class FlexibleModel(nn.Module):
    def __init__(self, in_features, hidden_layers, out_features, dropout_p=0.3):
        """
        in_features: int — input features
        hidden_layers: list of ints — neurons in each hidden layer
        out_features: int — number of outputs (classes)
        dropout_p: float — dropout probability
        """
        super().__init__()
        layers = []
        prev = in_features
        for h in hidden_layers:
            layers.append(nn.Linear(prev, h))
            layers.append(nn.ReLU())
            # uncomment to use dropout:
            # layers.append(nn.Dropout(p=dropout_p))
            prev = h
        self.hidden = nn.Sequential(*layers)
        self.out = nn.Linear(prev, out_features)

    def forward(self, x):
        x = self.hidden(x)
        x = self.out(x)
        return x


X_train_ = X_train.astype(np.float32)
X_test_ = X_test.astype(np.float32)
y_train_ = y_train.astype(np.longlong)
y_test_ = y_test.astype(np.longlong)

input_dim = X_train_.shape[1]
output_dim = len(np.unique(y_train_))  


net = NeuralNetClassifier(
    module=FlexibleModel,
    module__in_features=input_dim,
    module__hidden_layers=[32, 64, 32],  # will be overwritten by grid search
    module__out_features=output_dim,
    max_epochs=20,                # overwritten in grid search
    lr=0.001,                     # overwritten in grid search
    optimizer=torch.optim.Adam,
    criterion=nn.CrossEntropyLoss,
    batch_size=64,
    verbose=0,
)

param_grid = {
    'module__hidden_layers': [
        [32, 64], 
        [64, 64],
        [32, 64, 32],
        [128, 64], [128, 64, 32],
        [256, 128, 64],
        [128, 128, 64, 32]
    ],
    'lr': [0.01, 0.001, 0.0005],
    'batch_size': [16, 32, 64],
    'max_epochs': [30, 50, 100]
}

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

gs = GridSearchCV(net, param_grid, cv=cv, scoring='accuracy', n_jobs=-1, verbose=2)
gs.fit(X_train_, y_train_)

print("\n Best parameters found:\n", gs.best_params_)
print(" Best cross-val accuracy: ", gs.best_score_)

best_net = gs.best_estimator_
y_pred = best_net.predict(X_test_)

print("\n Test accuracy: ", accuracy_score(y_test_, y_pred))
print("\nClassification report:\n", classification_report(y_test_, y_pred))
print("\nConfusion matrix:\n", confusion_matrix(y_test_, y_pred))


Fitting 5 folds for each of 189 candidates, totalling 945 fits

✅ Best parameters found:
 {'batch_size': 16, 'lr': 0.01, 'max_epochs': 30, 'module__hidden_layers': [32, 64]}
✅ Best cross-val accuracy:  1.0

✅ Test accuracy:  1.0

Classification report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     12622
           1       1.00      1.00      1.00     12986

    accuracy                           1.00     25608
   macro avg       1.00      1.00      1.00     25608
weighted avg       1.00      1.00      1.00     25608


Confusion matrix:
 [[12622     0]
 [    0 12986]]
