In [1]:
import torch
from torch import nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
import pandas as pd
from torch.utils.tensorboard import SummaryWriter

In [2]:
df = pd.read_csv('dataset_LUMINAL_A_B.csv')

In [3]:
class TorchStandardScaler:
  def fit(self, x):
    self.mean = x.mean(0, keepdim=True)
    self.std = x.std(0, unbiased=False, keepdim=True)
  def transform(self, x):
    x -= self.mean
    x /= (self.std + 1e-7)
    return x

In [4]:
# Data preprocessing
output_mapping = {'Luminal A': 0, 'Luminal B': 1}
df['l'] = df['l'].str.strip().replace(output_mapping)

# Train-test split
n = df.shape[0]
perm = torch.randperm(n)
df = df.iloc[perm]

train_size = int(0.8 * len(df))
test_size = len(df) - train_size

df_train, df_test = df.iloc[:train_size], df.iloc[train_size:]

X_tr = torch.tensor(df_train.drop(columns=['l']).values, dtype=torch.float32)
y_tr = torch.tensor(df_train['l'].values, dtype=torch.float32)
X_te = torch.tensor(df_test.drop(columns=['l']).values, dtype=torch.float32)
y_te = torch.tensor(df_test['l'].values, dtype=torch.float32)

# # Scaling
scaler = TorchStandardScaler()
scaler.fit(X_tr)
X_tr = scaler.transform(X_tr)
X_te = scaler.transform(X_te)

In [5]:
class MLP(nn.Module):
    def __init__(self, inplanes):
        super(MLP, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(inplanes, 128),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

In [6]:
# Training
NUM_EPOCHS = 50

train_dataset = TensorDataset(X_tr, y_tr)
test_dataset = TensorDataset(X_te, y_te)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Assuming that we are on a CUDA machine, this should print a CUDA device:
print(device)

net = MLP(inplanes=1022)
net.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

writer = SummaryWriter()

for epoch in range(NUM_EPOCHS):
    for i, data in enumerate(train_loader):
        inputs, labels = data                   # 0) get the inputs
        inputs = inputs.to(device)
        labels = labels.to(device).unsqueeze(1)

        optimizer.zero_grad()                   # 1) zero the gradients
        outputs = net(inputs)
        loss = criterion(outputs, labels)       # 2) forward
        writer.add_scalar("Loss/train", loss, epoch)
        loss.backward()                         # 3) backward
        optimizer.step()                        # 4) optimization step
        
        print(f'Epoch [{epoch + 1}/{NUM_EPOCHS}], Loss: {loss.item()}')

writer.flush()
writer.close()

cpu
Epoch [1/50], Loss: -0.0
Epoch [1/50], Loss: -0.0
Epoch [1/50], Loss: -0.0
Epoch [1/50], Loss: -0.0
Epoch [1/50], Loss: -0.0
Epoch [2/50], Loss: -0.0
Epoch [2/50], Loss: -0.0
Epoch [2/50], Loss: -0.0
Epoch [2/50], Loss: -0.0
Epoch [2/50], Loss: -0.0
Epoch [3/50], Loss: -0.0
Epoch [3/50], Loss: -0.0
Epoch [3/50], Loss: -0.0
Epoch [3/50], Loss: -0.0
Epoch [3/50], Loss: -0.0
Epoch [4/50], Loss: -0.0
Epoch [4/50], Loss: -0.0
Epoch [4/50], Loss: -0.0
Epoch [4/50], Loss: -0.0
Epoch [4/50], Loss: -0.0
Epoch [5/50], Loss: -0.0
Epoch [5/50], Loss: -0.0
Epoch [5/50], Loss: -0.0
Epoch [5/50], Loss: -0.0
Epoch [5/50], Loss: -0.0
Epoch [6/50], Loss: -0.0
Epoch [6/50], Loss: -0.0
Epoch [6/50], Loss: -0.0
Epoch [6/50], Loss: -0.0
Epoch [6/50], Loss: -0.0
Epoch [7/50], Loss: -0.0
Epoch [7/50], Loss: -0.0
Epoch [7/50], Loss: -0.0
Epoch [7/50], Loss: -0.0
Epoch [7/50], Loss: -0.0
Epoch [8/50], Loss: -0.0
Epoch [8/50], Loss: -0.0
Epoch [8/50], Loss: -0.0
Epoch [8/50], Loss: -0.0
Epoch [8/50], Loss: -

In [7]:
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        images = images.to(device)
        labels = labels.to(device)

        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the {X_te.shape[0]} test samples: %d %%' % (100 * correct / total))

Accuracy of the network on the 20 test samples: 60 %
