In [0]:
import csv

labels = []
markers = []
early_late = []

train_labels = []
train_markers = []
train_early_late = []

val_labels = []
val_markers = []
val_early_late = []

test_labels = []
test_markers = []
test_early_late = []

def split_train_val_test(x):
  train = x[:560]
  val = x[560:720]
  test = x[720:]
  return train, val, test

with open('ovarian_cancer.csv') as csvfile:
  reader = csv.reader(csvfile, delimiter=';')
  for row in reader:
    labels.append(row[1])
    markers.append(row[4:])
    early_late.append(row[2])
    # train_test.append(row[3])
    
for i in range(len(markers)):
  for j in range(len(markers[i])):
    if (markers[i][j] == 'NA' or markers[i][j] == ''):
      markers[i][j] = 0

train_labels, val_labels, test_labels = split_train_val_test(labels)
train_markers, val_markers, test_markers = split_train_val_test(markers)
train_early_late, val_early_late, test_early_late = split_train_val_test(early_late)


In [0]:
import numpy as np
from sklearn import svm

def format_labels(lbls):
  # return [1 if label == 'Case' else 0 for label in lbls]
  return [2 if label == '1' else 1 if label == '0' else 0 for label in lbls]

# train_labels_formatted = format_labels(train_labels)
# val_labels_formatted = format_labels(val_labels)
# test_labels_formatted = format_labels(test_labels)

train_labels_formatted = format_labels(train_early_late)
val_labels_formatted = format_labels(val_early_late)
test_labels_formatted = format_labels(test_early_late)

# markers_of_interest = []

# for i in range(len(np.array(train_markers).T.tolist())):
#   marker = np.array(train_markers)[:, i]
#   clf = svm.SVC(gamma='scale')
#   clf.fit(marker.reshape(-1, 1), train_labels_formatted)
#   score = sum([clf.predict(np.array(val_markers[j][i]).reshape(-1, 1))[0] == val_labels_formatted[j] for j in range(len(val_markers))]) / len(val_markers)
#   if score > 0.85:
#     score_test = sum([clf.predict(np.array(test_markers[j][i]).reshape(-1, 1))[0] == test_labels_formatted[j] for j in range(len(test_markers))]) / len(test_markers)
#     print('success', i, score, score_test)
#     markers_of_interest.append(i)

In [0]:
clf = svm.SVC(gamma='scale')
clf.fit(train_markers, train_labels_formatted)
score = sum([clf.predict(np.array(val_markers[j]).reshape(1, -1)) == val_labels_formatted[j] for j in range(len(val_markers))]) / len(val_markers)
print(score)
score_test = sum([clf.predict(np.array(test_markers[j]).reshape(1, -1)) == test_labels_formatted[j] for j in range(len(test_markers))]) / len(test_markers)
print(score_test)

[0.875]
[0.8875]


In [0]:
early_total = sum([val_early_late[j] == "0" for j in range(len(val_early_late))])
late_total = sum([val_early_late[j] == "1" for j in range(len(val_early_late))])
healthy_total = len(val_early_late) - early_total - late_total

early_acc = sum([clf.predict(np.array(val_markers[j]).reshape(1, -1)) == val_labels_formatted[j] and val_early_late[j] == "0" for j in range(len(val_markers))]) / early_total
late_acc = sum([clf.predict(np.array(val_markers[j]).reshape(1, -1)) == val_labels_formatted[j] and val_early_late[j] == "1" for j in range(len(val_markers))]) / late_total
healthy_acc = sum([clf.predict(np.array(val_markers[j]).reshape(1, -1)) == val_labels_formatted[j] and val_early_late[j] == "NA" for j in range(len(val_markers))]) / healthy_total

print(early_total, late_total, healthy_total)
print(early_acc, late_acc, healthy_acc)

16 16 128
[0.] [0.75] [1.]


In [0]:
import torch
import torch.nn as nn
import torch.utils.data as utils

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyper-parameters 
input_size = 61
hidden_size = 30
hidden_size2 = 10
num_classes = 3
num_epochs = 50
batch_size = 80
learning_rate = 0.001

def to_tensor(data):
  return torch.stack([torch.Tensor(np.array(i).astype(np.float)) for i in data])

def to_tensor_y(data):
  return torch.stack([torch.Tensor(np.array([i])).long() for i in data])

tensor_x_train = to_tensor(train_markers)
tensor_y_train = to_tensor_y(train_labels_formatted)

tensor_x_val = to_tensor(val_markers)
tensor_y_val = to_tensor_y(val_labels_formatted)

tensor_x_test = to_tensor(test_markers)
tensor_y_test = to_tensor_y(test_labels_formatted)

train_dataset = utils.TensorDataset(tensor_x_train, tensor_y_train)
val_dataset = utils.TensorDataset(tensor_x_val, tensor_y_val)
test_dataset = utils.TensorDataset(tensor_x_test, tensor_y_test)

train_loader = utils.DataLoader(train_dataset, batch_size=batch_size)
val_loader = utils.DataLoader(val_dataset, batch_size=batch_size)
test_loader = utils.DataLoader(test_dataset, batch_size=batch_size)


class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size) 
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, hidden_size2)
        self.fc3 = nn.Linear(hidden_size2, num_classes)
        self.softmax = nn.Softmax()
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.softmax(out)
        return out

model = NeuralNet(input_size, hidden_size, num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  

# Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (xs, ys) in enumerate(train_loader):  
        # Move tensors to the configured device
        xs = xs.reshape(-1, 61).to(device)
        ys = ys.to(device)
        
        # Forward pass
        outputs = model(xs)
        loss = criterion(outputs, ys.view(-1))
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 5 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
            # Test the model
            # In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
    correct = 0
    total = 0
    for xs, ys in val_loader:
        xs = xs.reshape(-1, 61).to(device)
        ys = ys.to(device)
        outputs = model(xs)
        _, predicted = torch.max(outputs.data, 1)
        print(predicted)
        total += ys.size(0)
        correct += (predicted == ys).sum().item()

    print('Accuracy of the network on the val patients: {} %'.format(correct / total))

# Save the model checkpoint
torch.save(model.state_dict(), 'model.ckpt')



Epoch [1/50], Step [5/7], Loss: 2.3010
Epoch [2/50], Step [5/7], Loss: 2.2825
Epoch [3/50], Step [5/7], Loss: 2.2596
Epoch [4/50], Step [5/7], Loss: 2.2336
Epoch [5/50], Step [5/7], Loss: 2.2076
Epoch [6/50], Step [5/7], Loss: 2.1816
Epoch [7/50], Step [5/7], Loss: 2.1520
Epoch [8/50], Step [5/7], Loss: 2.1153
Epoch [9/50], Step [5/7], Loss: 2.0699
Epoch [10/50], Step [5/7], Loss: 2.0161
Epoch [11/50], Step [5/7], Loss: 1.9570
Epoch [12/50], Step [5/7], Loss: 1.8978
Epoch [13/50], Step [5/7], Loss: 1.8430
Epoch [14/50], Step [5/7], Loss: 1.7950
Epoch [15/50], Step [5/7], Loss: 1.7545
Epoch [16/50], Step [5/7], Loss: 1.7210
Epoch [17/50], Step [5/7], Loss: 1.6938
Epoch [18/50], Step [5/7], Loss: 1.6720
Epoch [19/50], Step [5/7], Loss: 1.6546
Epoch [20/50], Step [5/7], Loss: 1.6408
Epoch [21/50], Step [5/7], Loss: 1.6296
Epoch [22/50], Step [5/7], Loss: 1.6203
Epoch [23/50], Step [5/7], Loss: 1.6123
Epoch [24/50], Step [5/7], Loss: 1.6049
Epoch [25/50], Step [5/7], Loss: 1.5977
Epoch [26