In [1]:
import os
from time import time


import numpy as np
import torch


from datasets import spectral_dataloader

In [2]:
t00 = time()

In [3]:
dataset_folder = os.path.join(os.getcwd(), 'dataset')

# Load clinical data
X_fn = os.path.join(dataset_folder, 'X_finetune.npy')
y_fn = os.path.join(dataset_folder, 'y_finetune.npy')

X = np.load(X_fn)
y = np.load(y_fn)

print(X.shape, y.shape)

(3000, 1000) (3000,)


In [4]:
print(set(y))

{0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0}


In [5]:
X_df = []
y_df = []

valid_labels = [
    *range(14, 21+1), # From MRSA 1 to S. lugdunensis
    *range(25, 29+1), # From Group A Strep. to Group G Strep.
    6, # E. faecalis 1
    7, # E. faecalis 2
    19 # S. enterica
]

for (spectra, label) in zip(X, y):
    if label in valid_labels:
        X_df.append(spectra)
        y_df.append(label)

X = np.array(X_df)
y = np.array(y_df)

In [6]:
print(X.shape, y.shape)
y_set = {}

for i, k in enumerate(set(y)):
    y_set[k] = i

print(y_set)

print(set(y), len(set(y)))

(1500, 1000) (1500,)
{6.0: 0, 7.0: 1, 14.0: 2, 15.0: 3, 16.0: 4, 17.0: 5, 18.0: 6, 19.0: 7, 20.0: 8, 21.0: 9, 25.0: 10, 26.0: 11, 27.0: 12, 28.0: 13, 29.0: 14}
{6.0, 7.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 25.0, 26.0, 27.0, 28.0, 29.0} 15


In [7]:
batch_size = 10

In [8]:
p_val = 0.1
n_val = int(1500 * p_val)
idx_tr = list(range(1500))
np.random.shuffle(idx_tr)
idx_val = idx_tr[:n_val]
idx_tr = idx_tr[n_val:]

In [9]:
# Set up dataloaders
dl_tr = spectral_dataloader(X, y, idxs=idx_tr, batch_size=batch_size, shuffle=True)
dl_val = spectral_dataloader(X, y, idxs=idx_val, batch_size=batch_size, shuffle=False)

In [10]:
xb, yb = next(iter(dl_tr))
print(len(xb), len(xb[0]), len(xb[0][0]))

# print(xb, yb)

10 1 1000


In [11]:
print(len(dl_tr))
print(len(dl_val))

135
15


In [12]:
import torch.nn as nn
import torch.nn.functional as F 
from torch.autograd import Variable

In [13]:
num_epochs = 5
num_classes = 15
batch_size = 100
learning_rate = 0.001

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [14]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResidualBlock, self).__init__()

        # Layers
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size=5,
            stride=stride, padding=2, dilation=1, bias=False)
        self.bn1 = nn.BatchNorm1d(num_features=out_channels)
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size=5,
            stride=1, padding=2, dilation=1, bias=False)
        self.bn2 = nn.BatchNorm1d(num_features=out_channels)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv1d(in_channels, out_channels, kernel_size=1,
                    stride=stride, bias=False),
                nn.BatchNorm1d(out_channels))

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, hidden_sizes, num_blocks, input_dim=1000,
        in_channels=64, n_classes=15):
        super(ResNet, self).__init__()
        assert len(num_blocks) == len(hidden_sizes)
        self.input_dim = input_dim
        self.in_channels = in_channels
        self.n_classes = n_classes
        
        self.conv1 = nn.Conv1d(1, self.in_channels, kernel_size=5, stride=1,
            padding=2, bias=False)
        self.bn1 = nn.BatchNorm1d(self.in_channels)
        
        # Flexible number of residual encoding layers
        # layers = []
        # strides = [1] + [2] * (len(hidden_sizes) - 1)
        # for idx, hidden_size in enumerate(hidden_sizes):
        #     layers.append(self._make_layer(hidden_size, num_blocks[idx],
        #         stride=strides[idx]))
        # self.encoder = nn.Sequential(*layers)

        self.z_dim = self._get_encoding_size()
        self.linear = nn.Linear(self.z_dim, self.n_classes)


    def encode(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        # x = self.encoder(x)
        z = x.view(x.size(0), -1)
        return z

    def forward(self, x):
        z = self.encode(x)
        return self.linear(z)


    def _make_layer(self, out_channels, num_blocks, stride=1):
        strides = [stride] + [1] * (num_blocks - 1)
        blocks = []
        for stride in strides:
            blocks.append(ResidualBlock(self.in_channels, out_channels,
                stride=stride))
            self.in_channels = out_channels
        return nn.Sequential(*blocks)

    def _get_encoding_size(self):
        """
        Returns the dimension of the encoded input.
        """
        temp = Variable(torch.rand(1, 1, self.input_dim))
        z = self.encode(temp)
        z_dim = z.data.size(1)
        return z_dim


def add_activation(activation='relu'):
    """
    Adds specified activation layer, choices include:
    - 'relu'
    - 'elu' (alpha)
    - 'selu'
    - 'leaky relu' (negative_slope)
    - 'sigmoid'
    - 'tanh'
    - 'softplus' (beta, threshold)
    """
    if activation == 'relu':
        return nn.ReLU()
    elif activation == 'elu':
        return nn.ELU(alpha=1.0)
    elif activation == 'selu':
        return nn.SELU()
    elif activation == 'leaky relu':
        return nn.LeakyReLU(negative_slope=0.1)
    elif activation == 'sigmoid':
        return nn.Sigmoid()
    elif activation == 'tanh':
        return nn.Tanh()
    # SOFTPLUS DOESN'T WORK with automatic differentiation in pytorch
    elif activation == 'softplus':
        return nn.Softplus(beta=1, threshold=20)

In [15]:
class ConvNet(nn.Module):
    def __init__(self, num_classes):
        super(ConvNet, self).__init__()
        self.num_classes = num_classes

        self.block1 = nn.Sequential(
            nn.Conv1d(1, 8, kernel_size=3),
            nn.BatchNorm1d(8),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=3),
        )

        self.linear1 = nn.Linear(332, num_classes)
        # self.linear2 = nn.Linear(249, num_classes)

    def forward(self, x):
        x = self.block1(x)
        x = self.linear1(x)
        # x = self.linear2(x)

        return x

    def _get_encoding_size(self):
        """
        Returns the dimension of the encoded input.
        """
        temp = Variable(torch.rand(1, 1, 8))
        z = self.encode(temp)
        return z.data.size(1)

In [16]:
layers = 6
hidden_size = 100
block_size = 2
hidden_sizes = [hidden_size] * layers
num_blocks = [block_size] * layers
input_dim = 1000
in_channels = 32
n_classes = 15

# model = ConvNet(n_classes).to(device)
model = ResNet(hidden_sizes, num_blocks, input_dim, in_channels, n_classes)
print(model)

ResNet(
  (conv1): Conv1d(1, 32, kernel_size=(5,), stride=(1,), padding=(2,), bias=False)
  (bn1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (linear): Linear(in_features=32000, out_features=15, bias=True)
)


In [17]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [18]:
print('Batches:', len(dl_tr))
print()

Batches: 135



In [20]:
# Train the model
total_step = len(dl_tr)

for epoch in range(num_epochs):
    total_loss = 0
    correct = 0
    total = 0

    # For each batch size
    for i, (spectras, labels) in enumerate(dl_tr):
        spectras = Variable(spectras)
        labels = Variable(torch.Tensor(list(map(lambda x: y_set[x.item()], labels))).long())
        
        # Forward pass
        outputs = model(spectras)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)

        correct += predicted.eq(labels.data).sum().item()
        
        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}]'.format(epoch+1, num_epochs, i+1, total_step))
                
    acc = 100 * correct / total
    avg_loss = total_loss / total

    print ('Epoch [{}/{}], Acc: {:.6f}; Loss: {:.6f}'.format(epoch + 1, num_epochs, acc, avg_loss))

  9,  6,  3, 14,  8,  9])
tensor([ 9,  8,  7,  3,  8,  9,  5,  3, 11,  7])
tensor([11, 10,  8, 13,  2,  6,  3,  6,  5,  4])
tensor([10,  6,  6, 13,  6,  0,  5,  6, 11,  9])
tensor([ 1, 12,  5, 14,  3,  4,  8,  5, 12,  7])
tensor([10,  5,  2,  5,  4, 10, 10, 13, 12,  6])
tensor([14,  0,  9,  3,  9, 11,  0, 12,  3,  4])
tensor([ 3,  4,  6, 11,  9,  0,  3,  4, 12,  9])
tensor([ 1,  7,  7,  1,  1, 10,  2,  6,  8,  8])
tensor([ 6,  5, 13,  0,  8,  1, 10,  3, 14, 11])
tensor([14, 11, 12, 14,  2,  2,  5,  7,  7,  1])
tensor([ 6,  0,  9,  4,  5, 14, 11, 13, 10,  7])
tensor([ 2,  1,  6,  8, 10,  8,  1,  4,  3,  0])
tensor([12, 10,  9,  1, 12, 10, 10,  4,  4,  9])
tensor([6, 0, 0, 6, 7, 6, 3, 2, 7, 5])
tensor([ 3,  0,  5, 13, 10,  7, 14,  9,  7, 12])
tensor([ 9,  7,  8,  3,  6,  9, 10, 14,  1, 13])
tensor([ 1, 13, 13,  1, 13, 13,  3, 14, 13,  7])
tensor([ 6,  2, 12, 14, 12,  0, 11,  8,  4,  7])
tensor([12,  2,  2,  3, 12, 14, 12,  7,  4, 12])
tensor([11,  3,  5,  9,  1,  9,  3, 13,  8,  5])
tens