In [14]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
from sklearn import model_selection
import mil_pytorch.mil as mil

## Load data

In [33]:
data_dir = 'Musk1.xlsx'
test_index_dir = "Musk1.csv_rep1_fold1.txt"


In [37]:
data = pd.read_excel(data_dir)
test_indices  = np.loadtxt(test_index_dir, dtype=int)
train_data = data[~data.bagID.isin(test_indices)]
test_data = data[data.bagID.isin(test_indices)]

(434, 169)

In [38]:
class MilDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return self.data.nunique()
    
    def __getitem__(self, index):
        index_data = self.data[self.data.bagID==index+1]
        index_features =  torch.tensor(index_data[index_data.columns[3:]].values)
        index_label = torch.tensor(index_data.response.unique())

        return index_features, index_label


In [39]:
train_data = MilDataset(train_data)


In [40]:
train_data[0][1]

tensor([1])

In [44]:

train_dl = DataLoader(dataset=train_data, batch_size=1) # Using custom collate_fn mil.collate
test_dl = DataLoader(dataset=test_data, batch_size=1)

## Define model, criterion and optimizer

In [41]:
class MI_Net(nn.Module):
    def __init__(self, input_dim=166):
        super().__init__()

        self.fc1 = nn.Linear(in_features=input_dim, out_features=256)
        self.fc2 = nn.Linear(in_features=256, out_features=128)
        self.fc3 = nn.Linear(in_features=128, out_features=64)
        self.dropout = nn.Dropout(p=0.5, inplace=False)
        self.fc4 = nn.Linear(in_features=64, out_features=1)
    
    def forward(self, input):    
        
        x = input.float()
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.dropout(x) 
        x = torch.mean(x, dim=1, keepdim=True)
        x = self.fc4(x)

        return F.sigmoid(x)

model = MI_Net()

## Hyper-Parameters 

In [42]:

batch_size = 1
# Optimizer parameters from https://github.com/yanyongluan/MINNs/blob/master/MI_Net.py
learning_rate = 5e-4
weight_decay = 1e-4
momentum = 0.9
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay, momentum=momentum)
criterion = nn.BCELoss()
#criterion = nn.CrossEntropyLoss()


## Train model

In [46]:
import time

# Training parameters
epochs = 50

start = time.time()
print('TRAINING:')

# Tensor for collecting losses over batches
train_losses = []

for epoch in range(epochs): 
    for features, labels in train_dl:
        labels[labels==-1] = 0  # replace -1 classes with Zero
        pred = model(features)
        loss = criterion(pred, labels)
        
        # Update weights
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # Save loss on this batch
        train_losses.append(loss.float())
    
    # Compute avarega loss on this epoch
    train_loss = torch.mean(torch.tensor(train_losses), dim = 0, keepdim = True)
    
    # Clear tensor for saving losses over batches
    train_losses = []

    # Print info about learning every 100 epochs
    if (epoch+1)%100 == 0:
        print('[{}/{}] | train_loss: {}'.format(epoch+1, epochs, train_loss.item()))

print('Finished training - elapsed time: {}'.format(time.time() - start))

TRAINING:


TypeError: 'Series' object cannot be interpreted as an integer

In [52]:
labels


tensor([ 1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
         1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
         1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
         1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
         1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
         1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
         1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
         1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
         1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
         1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
         1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
         1,  1,  1,  1,  1,  1,  1,  1,  1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
        -1, -1, -1, -1, -1, -1, -1, -1, 

## Evaluation

In [48]:
labels

tensor([10, 10, 10, 10,  1,  1, 10, 10, 10, 10, 10, 10,  1,  1,  1, 10,  1, 10,
         1,  1, 10, 10,  1, 10,  1, 10, 10,  1,  1, 10, 10, 10, 10, 10,  1, 10,
         1,  1,  1, 10,  1,  1,  1,  1,  1,  1, 10, 10,  1, 10, 10,  1, 10, 10,
        10, 10, 10, 10,  1, 10, 10, 10, 10,  1,  1,  1,  1,  1, 10,  1,  1,  1,
        10])

In [31]:
from sklearn import metrics

def eer(pred, labels):
    fpr, tpr, threshold = metrics.roc_curve(labels.detach(), pred.detach(), pos_label=1)
    fnr = 1 - tpr
    EER_fpr = fpr[np.nanargmin(np.absolute((fnr - fpr)))]
    EER_fnr = fnr[np.nanargmin(np.absolute((fnr - fpr)))]
    return EER_fpr, EER_fnr

def accuracy(pred, target, threshold = 0):
    pred = pred.detach().np()
    target = target.detach().np()

    pred[pred >= threshold] = 1
    pred[pred < threshold] = -1

    return np.sum(target == pred)/target.shape[0]

print('EVALUATION:')

# Train dataloader for evaluation
train_dl = DataLoader(dataset, sampler = train_sampler, batch_size = len(train_indices), collate_fn=mil.collate)

for data, ids, labels in train_dl:
    pred = model((data, ids))
    loss = criterion(pred[:,0], labels)
    acc = accuracy(pred[:,0], labels)
    eer_fpr, eer_fnr = eer(pred[:,0], labels)

print('TRAIN DATA')
print('Loss: {:6}'.format(loss.item()))
print('Accuracy: {:.2%}'.format(acc))
print('Equal error rate approximation using false positive rate: {:.3}'.format(eer_fpr))
print('Equal error rate approximation using false negative rate: {:.3}'.format(eer_fnr))


for data, ids, labels in test_dl:
    pred = model((data, ids))
    loss = criterion(pred[:,0], labels)
    acc = accuracy(pred[:,0], labels)
    eer_fpr, eer_fnr = eer(pred[:,0], labels)

print('TEST DATA')
print('Loss: {:6}'.format(loss.item()))
print('Accuracy: {:.2%}'.format(acc))
print('Equal error rate approximation using false positive rate: {:.3}'.format(eer_fpr))
print('Equal error rate approximation using false negative rate: {:.3}'.format(eer_fnr))

EVALUATION:




IndexError: too many indices for tensor of dimension 1

In [33]:
pred
labels

tensor([-1, -1,  1,  1, -1, -1,  1, -1,  1,  1, -1, -1, -1,  1, -1,  1, -1, -1,
         1,  1, -1, -1,  1, -1,  1, -1,  1,  1, -1,  1, -1,  1,  1,  1, -1, -1,
        -1,  1, -1, -1, -1,  1, -1, -1, -1,  1, -1,  1, -1, -1, -1, -1, -1, -1,
         1,  1, -1,  1,  1, -1,  1, -1, -1,  1, -1,  1, -1,  1, -1,  1,  1,  1,
         1])