In [212]:
import torch
import torch.nn as nn
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd

In [213]:
dataframe = pd.read_csv('diabetes.csv')
X = dataframe.drop(columns='Outcome')
y = dataframe['Outcome']

X_train, X_test, y_train, y_test = train_test_split(X,y,train_size = 0.8, random_state = 42)


In [214]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [215]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [216]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1).to(device) #need to convert to numpy arrays use .values
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).unsqueeze(1).to(device)

In [217]:
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_dataloader = DataLoader(train_dataset, batch_size= 32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [218]:
class BinaryClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(BinaryClassifier, self).__init__()
        self.layer1 = nn.Linear(input_size, 128)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)  # Add dropout
        self.layer2 = nn.Linear(128, hidden_size)
        self.layer3 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.layer1(x)
        x = self.relu(x)
        x = self.dropout(x)  # Apply dropout
        x = self.layer2(x)
        x = self.layer3(x)
        return x  # No sigmoid here, for BCEWithLogitsLoss

input_size = X.shape[1]
hidden_size = 64
output_size = 1 #binary classification

print(f'input Size: {input_size}, Output Size: {output_size}')

input Size: 8, Output Size: 1


In [219]:
model = BinaryClassifier(input_size, hidden_size, output_size).to(device)
criterion = nn.BCEWithLogitsLoss()
optimiser = torch.optim.Adam(model.parameters(), lr = 0.01)

c = X_train is torch.tensor
c

False

In [220]:
num_epochs = 10000

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    for X_batch, y_batch in train_dataloader:
        optimiser.zero_grad()
        y_predicted = model(X_batch)
        loss = criterion(y_predicted, y_batch)
        loss.backward()
        optimiser.step()
        epoch_loss += loss.item()

    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss/len(train_dataloader):.4f}')
    

Epoch [100/10000], Loss: 0.3987
Epoch [200/10000], Loss: 0.3333
Epoch [300/10000], Loss: 0.3137
Epoch [400/10000], Loss: 0.3086
Epoch [500/10000], Loss: 0.2609
Epoch [600/10000], Loss: 0.2902
Epoch [700/10000], Loss: 0.2808
Epoch [800/10000], Loss: 0.2401
Epoch [900/10000], Loss: 0.2605
Epoch [1000/10000], Loss: 0.2975
Epoch [1100/10000], Loss: 0.2688
Epoch [1200/10000], Loss: 0.2414
Epoch [1300/10000], Loss: 0.2327
Epoch [1400/10000], Loss: 0.2706
Epoch [1500/10000], Loss: 0.2873
Epoch [1600/10000], Loss: 0.2345
Epoch [1700/10000], Loss: 0.2803
Epoch [1800/10000], Loss: 0.2289
Epoch [1900/10000], Loss: 0.2267
Epoch [2000/10000], Loss: 0.2218
Epoch [2100/10000], Loss: 0.2438
Epoch [2200/10000], Loss: 0.2180
Epoch [2300/10000], Loss: 0.2786
Epoch [2400/10000], Loss: 0.2559
Epoch [2500/10000], Loss: 0.2345
Epoch [2600/10000], Loss: 0.2589
Epoch [2700/10000], Loss: 0.2224
Epoch [2800/10000], Loss: 0.2227
Epoch [2900/10000], Loss: 0.2551
Epoch [3000/10000], Loss: 0.2281
Epoch [3100/10000],

In [221]:
# Evaluate the model
model.eval()
with torch.no_grad():
    all_preds = []
    all_labels = []
    for X_batch, y_batch in test_dataloader:
        y_predicted = model(X_batch)
        y_predicted = torch.sigmoid(y_predicted)  # Apply sigmoid to get probabilities
        all_preds.append(y_predicted)
        all_labels.append(y_batch)

    y_predicted = torch.cat(all_preds)
    y_labels = torch.cat(all_labels)
    y_predicted = (y_predicted > 0.5).float()  # Convert probabilities to binary predictions

    accuracy = accuracy_score(y_labels.cpu(), y_predicted.cpu())
    print(f'Test Accuracy: {accuracy:.4f}')

Test Accuracy: 0.7208
