In [237]:
import os
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import torch

# Load dataset
df = pd.read_csv('dataset.csv')
df = df.apply(lambda x: pd.to_numeric(x, errors='coerce')).dropna()
X = df.drop('target', axis=1)
y = df['target']
# print(X.dtypes)
# print(y.dtypes)

# Scale numerical features using StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# print(type(X_train),type(X_test),type(y_train),type(y_test))

In [238]:
import torch.nn as nn

class SimpleBinaryClassifier(nn.Module):
    def __init__(self):
        super(SimpleBinaryClassifier, self).__init__()
        self.input_layer = nn.Linear(7, 128)   # input layer to hidden layer 1
        self.hidden_layer1 = nn.Linear(128, 128)  # hidden layer 1 to hidden layer 2
        self.hidden_layer2 = nn.Linear(128, 64)    # hidden layer 2 to hidden layer 3
        self.hidden_layer3 = nn.Linear(64,1) # hidden layer 2 to output layer
        self.activation1 = nn.ReLU()          # activation function for hidden layers
        self.activation2 = nn.ReLU()          # activation function for hidden layers
        self.activation3 = nn.Sigmoid()    # activation function for output layer
        
    def forward(self, x):
        x = self.activation1(self.input_layer(x))
        x = self.activation2(self.hidden_layer1(x))
        x = self.activation2(self.hidden_layer2(x))
        x = self.activation3(self.hidden_layer3(x))
        return x


In [245]:
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset


train_dataset = TensorDataset(torch.from_numpy(X_train).float(), torch.from_numpy(y_train.values.reshape(-1, 1)).float())
test_dataset = TensorDataset(torch.from_numpy(X_test).float(), torch.from_numpy(y_test.values.reshape(-1, 1)).float())
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)


# Define loss function and optimizer
# https://pytorch.org/docs/stable/nn.html#loss-functions
model = SimpleBinaryClassifier()
criterion = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Train the model
epochs = 50
for epoch in range(epochs):
    running_loss = 0.0
    for inputs, labels in train_loader:
        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    
    # Print training loss at each epoch
    # print(f'Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader):.4f}')

# Evaluate the model on test data
with torch.no_grad():
    correct = 0
    total = 0
    for inputs, labels in test_loader:
        # Forward pass
        outputs = model(inputs)
        predicted_labels = (outputs >= 0.5).int()
        total += labels.size(0)
        correct += (predicted_labels == labels).sum().item()
    accuracy = correct / total
    print(f'Test Accuracy: {accuracy:.4f}')

# Save the model weights
torch.save(model.state_dict(), 'sungjun_abhinav_assignment2_part1.h5')

Test Accuracy: 0.7697
