<a href="https://colab.research.google.com/github/foxtrotmike/MIL/blob/master/milgen.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [31]:
# -*- coding: utf-8 -*-
"""
Created on Tue Dec 17 01:47:34 2019
How to do MIL when the bag just won't fit into memory
Idea:
In each iteration
  Load random patches (examples) from a bag
  Append to it, the last max scoring example of the bag
  Save max scoring example of the bag to file
  Do the above for a randomly chosen positive and a random negative bag
  Compute loss
  Backpropagate
Based on: https://stanford.edu/~shervine/blog/pytorch-how-to-generate-data-parallel
@author: Fayyaz Minhas
"""
import numpy as np
import torch
from torch.utils import data

class MILDataset(data.Dataset):
  'Characterizes a Multiple Instance dataset for PyTorch'
  def __init__(self, list_IDs, labels, maxsize = None):
        'Initialization'
        self.labels = labels #dictionary of id to label
        self.list_IDs = list_IDs #list of ids
        self.maxsize = maxsize #max size to load at once
  def __len__(self):
        'Denotes the total number of samples'
        return len(self.list_IDs)

  def __getitem__(self, index):
        'Generates one sample of data'
        # Select sample
        ID = self.list_IDs[index]
        if self.maxsize is None:
          maxsize = 10
        else:
          maxsize = self.maxsize
        #Load random examples (patches) from bag file (WSI) based on ID   
        # Preferably a fast mechanism of loading random patches
        X = np.random.rand(maxsize,2) #temporary random data
        # load the top scoring patch (example) for this id if it exists
        #xtop = load('data/' + ID + '.top')
        xtop = np.random.rand(1,2) #temporary random data
        # can do data augmentation as well if required here
        X = np.vstack((X,xtop))
        y = self.labels[ID]
        return X,ID,y
    
# CUDA for PyTorch
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
#cudnn.benchmark = True
max_iters = 10 #ensure that each bag is selected multiple times
# Datasets
partition = {'pos': ['id-1', 'id-2', 'id-3'], 'neg': ['id-4','id-5']}# IDs (bag ids or WSI names)
labels = dict([(k,1.0) for k in partition['pos']]+[(k,-1.0) for k in partition['neg']]) 
# Generators (for positive and negative bags)
posgen = data.DataLoader(MILDataset(partition['pos'], labels), pin_memory = True, shuffle = True)
neggen = data.DataLoader(MILDataset(partition['neg'], labels), pin_memory = True, shuffle = True)
positer, negiter = iter(posgen), iter(neggen)
# Loop over epochs
for i in range(max_iters):
    try: #not pretty but works!
      PX,Pid,_ = next(positer)
    except StopIteration:
      positer = iter(posgen)
      PX,Pid,_ = next(positer)
    PX,Pid = PX[0].to(device),Pid[0]
    try:
      NX,Nid,_ = next(negiter)
    except StopIteration:
      negiter = iter(neggen)
      NX,Ni,_ = next(negiter)
    NX,Nid = NX[0].to(device),Nid[0]
    print(i,Pid,Nid)
    # compute score over PX and NX. Save top scoring patch of each to a file
    # so it can be loaded
    # Comput loss 

0 id-1 id-5
1 id-3 id-4
2 id-2 i
3 id-3 id-5
4 id-2 i
5 id-1 id-5
6 id-2 i
7 id-1 id-5
8 id-3 i
9 id-3 id-4


In [29]:
PX

tensor([[0.4601, 0.4991],
        [0.1672, 0.3796],
        [0.9596, 0.7124],
        [0.7853, 0.6906],
        [0.8232, 0.8574],
        [0.1193, 0.2484],
        [0.2003, 0.5079],
        [0.6945, 0.4801],
        [0.6093, 0.5968],
        [0.3975, 0.3490]], dtype=torch.float64)

In [31]:
local_labels

tensor([0])