In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset


In [3]:
# Load data
df = pd.read_csv('./finalalldata.csv')

# Prepare features and target
X = df.drop(columns=['label','uid'])
y = df['label']

In [12]:

# Identify numeric and categorical columns
numeric_cols = X.select_dtypes(include=[np.number]).columns.tolist()
categorical_cols = X.select_dtypes(include=['object']).columns.tolist()

# Handle missing values in numeric columns
if numeric_cols:
    X[numeric_cols] = X[numeric_cols].fillna(X[numeric_cols].median())

# Handle missing values and encode categorical columns
if categorical_cols:
    for col in categorical_cols:
        X[col] = X[col].fillna(X[col].mode()[0] if not X[col].mode().empty else 'missing')
    
    # One-hot encode categorical variables
    X = pd.get_dummies(X, columns=categorical_cols, drop_first=True)

# Ensure all columns are numeric
X = X.astype(float)

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Convert to PyTorch tensors
X_train_tensor = torch.FloatTensor(X_train_scaled)
y_train_tensor = torch.FloatTensor(y_train.values).unsqueeze(1)  # Binary classification
X_test_tensor = torch.FloatTensor(X_test_scaled)
y_test_tensor = torch.FloatTensor(y_test.values).unsqueeze(1)

In [14]:

# Create DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)


In [21]:

# Define Neural Network with 2 hidden layers (halving dimensions)
input_dim = X_train.shape[1]
hidden1_dim = 15
hidden2_dim = 7

class NeuralNetwork(nn.Module):
    def __init__(self, input_dim, hidden1_dim, hidden2_dim):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden1_dim)
        self.fc2 = nn.Linear(hidden1_dim, hidden2_dim)
        self.fc3 = nn.Linear(hidden2_dim, 1)  # Binary classification output
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.2)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.sigmoid(self.fc3(x))  # Sigmoid for binary classification
        return x


In [22]:

# Initialize model
model = NeuralNetwork(input_dim, hidden1_dim, hidden2_dim)
criterion = nn.BCELoss()  # Binary Cross Entropy Loss
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [31]:

# Training
num_epochs = 100
print("Training started...")
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    if (epoch + 1) % 20 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(train_loader):.4f}')


Training started...
Epoch [20/100], Loss: 0.1458
Epoch [40/100], Loss: 0.1139
Epoch [60/100], Loss: 0.1318
Epoch [80/100], Loss: 0.1008
Epoch [100/100], Loss: 0.1268


In [32]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
# Evaluation
print("\nEvaluating model...")
model.eval()
with torch.no_grad():
    y_pred_prob = model(X_test_tensor)
    y_pred = (y_pred_prob >= 0.5).float()
    acc = accuracy_score(y_test_tensor.numpy(), y_pred.numpy())
    
    print(f'\nTest Accuracy: {acc:.4f}')
    print(f'\nClassification Report:')
    print(classification_report(y_test_tensor.numpy(), y_pred.numpy(), 
                                target_names=['Class 0', 'Class 1']))
    print(f'Confusion Matrix:')
    print(confusion_matrix(y_test_tensor.numpy(), y_pred.numpy()))

print(f'\nModel Architecture:')
print(f'Input Layer: {input_dim} neurons')
print(f'Hidden Layer 1: {hidden1_dim} neurons')
print(f'Hidden Layer 2: {hidden2_dim} neurons')
print(f'Output Layer: 1 neuron (binary classification)')



Evaluating model...

Test Accuracy: 0.8346

Classification Report:
              precision    recall  f1-score   support

     Class 0       0.75      0.69      0.72        39
     Class 1       0.87      0.90      0.88        88

    accuracy                           0.83       127
   macro avg       0.81      0.80      0.80       127
weighted avg       0.83      0.83      0.83       127

Confusion Matrix:
[[27 12]
 [ 9 79]]

Model Architecture:
Input Layer: 15 neurons
Hidden Layer 1: 15 neurons
Hidden Layer 2: 7 neurons
Output Layer: 1 neuron (binary classification)


In [33]:

# Save the model
torch.save({
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'scaler': scaler,
    'input_dim': input_dim,
    'hidden1_dim': hidden1_dim,
    'hidden2_dim': hidden2_dim
}, 'binary_model.pth')

print('\nModel saved as binary_model.pth')



Model saved as binary_model.pth
