<a href="https://colab.research.google.com/github/Vigneshthanga/SpecialTopics/blob/master/Assignment_1_Part_1/MIL_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Importing torch and torchvision library

In [1]:
import torch
from torchvision.models.resnet import ResNet, BasicBlock
from torchvision.datasets import MNIST

In [2]:
mnist_train = MNIST(root='.', download=True, train=True)
mnist_train_data = mnist_train.train_data.float()



## Creating list of transforms functions to apply on the dataset

In [3]:
from torchvision.transforms.transforms import *
transformList = Compose([ Resize((224, 224)),ToTensor(), Normalize((mnist_train_data.mean()/255), (mnist_train_data.std()/255))])

## MNIST Train and Validation DataLoader

In [4]:
from torch.utils.data import DataLoader

def get_data_loaders(train_batch_size, val_batch_size):
  train_loader = DataLoader(MNIST(download=True, root=".", transform=transformList, train=True),
                              batch_size=train_batch_size, shuffle=True)
  val_loader = DataLoader(MNIST(download=True, root=".", transform=transformList, train=False),
                              batch_size=val_batch_size, shuffle=False)
  
  return train_loader, val_loader

## Import Keras Mnist data to create Multi Instance Label dataset

In [5]:
import tensorflow as tf
(xtrain, ytrain), (xtest, ytest) = tf.keras.datasets.mnist.load_data()

##Train-test split

In [6]:
xtrain = xtrain[:30001]
ytrain = ytrain[:30001]
xtest = xtest[:9000]
ytest = ytest[:9000]

In [7]:
xtrain = xtrain/255
xtest = xtest/255
print('x_train shape:', xtrain.shape)
print('Number of images in x_train', xtrain.shape[0])
print('Number of images in x_test', xtest.shape[0])

x_train shape: (30001, 28, 28)
Number of images in x_train 30001
Number of images in x_test 9000


In [8]:
instance_index_label = [(i, ytrain[i]) for i in range(xtrain.shape[0])]
instance_index_label_test = [(i, ytest[i]) for i in range(xtest.shape[0])]

In [9]:
instance_index_label[0]

(0, 5)

## Find indices whose label is 1

In [10]:

find_index = [instance_index_label[i][0] for i in range(len(instance_index_label)) if instance_index_label[i][1]==1]
find_index_test = [instance_index_label_test[i][0] for i in range(len(instance_index_label_test))
                   if instance_index_label_test[i][1]==1]


In [11]:
print('index:', instance_index_label[0][0])
print('label:', instance_index_label[0][1])

index: 0
label: 5


## Defining Custom Resnet 

In [12]:
class CustomResNet(ResNet):
    def __init__(self):
        super(CustomResNet, self).__init__(BasicBlock, [2, 2, 2, 2], num_classes=10)
        self.conv1 = torch.nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        
    def forward(self, x):
        return torch.softmax(super(CustomResNet, self).forward(x), dim=-1)

## Mounting Google Drive

In [13]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [14]:
ls drive/My\ Drive/mnist_state.pt

'drive/My Drive/mnist_state.pt'


## Load the Pretrained model on MNIST data and remove the last Argmax layer

In [15]:
model = CustomResNet()
model.load_state_dict(torch.load('/content/drive/My Drive/mnist_state.pt'))
body = torch.nn.Sequential(*list(model.children()))
model = body[:9]
model.eval()

Sequential(
  (0): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Con

## Data, Loss function and Optimizer

In [16]:
train_batch_size = 1
val_batch_size = 1

train_loader, val_loader = get_data_loaders(train_batch_size, val_batch_size)
loss_function = torch.nn.CrossEntropyLoss()

optimizer = torch.optim.Adadelta(model.parameters())

In [17]:
losses = []
train_batch_len = len(train_loader)
val_batch_len = len(val_loader)

## Make use of GPU if available

In [18]:
if (torch.cuda.is_available()):
  device = "cuda:0"
else:
  device = "cpu"

## Extract the image features from pretrained model

In [19]:
import numpy as np
from tqdm.autonotebook import tqdm
meta_table = dict()
feature_result = []

# progress bar
progress = tqdm(enumerate(train_loader), desc="Loss: ", total=train_batch_len)

model.cuda()
model.eval()

for i, data in progress:
    if i==30001:
        break
    X = data[0].to(device)
    y = data[1].to(device)

    # training step for single batch
    model.zero_grad()
    outputs = model(X)

    feature_result.append(outputs.reshape(-1).tolist())
    meta_table[i] = outputs.reshape(-1).tolist()
    
feature_array = np.array(feature_result)
np.save('feature_array_full',feature_array)

HBox(children=(FloatProgress(value=0.0, description='Loss: ', max=60000.0, style=ProgressStyle(description_wid…

In [20]:
len(feature_array)

30001

In [21]:
feature_array = np.load('feature_array_full.npy', allow_pickle=True)

In [22]:
meta_t_table = dict()
feature_t_result = []

# progress bar
progress = tqdm(enumerate(val_loader), desc="Loss: ", total=val_batch_len)

model.eval()

for i, data in progress:
    if i==9000:
        break
    X = data[0].to(device) 
    y = data[1].to(device)

    # training step for single batch
    model.zero_grad()
    outputs_t = model(X)

    feature_t_result.append(outputs_t.reshape(-1).tolist())
    meta_t_table[i] = outputs_t.reshape(-1).tolist()

feature_test_array = np.array(feature_t_result)
# save 
np.save('feature_test_array_full',feature_test_array )

HBox(children=(FloatProgress(value=0.0, description='Loss: ', max=10000.0, style=ProgressStyle(description_wid…

In [23]:
#feature_test_array = np.load('feature_test_array_full.npy', allow_pickle=True)

## Create Bag of images. A bag is labeled 1 if it contains atleast one 1 else 0

In [24]:
from typing import List, Dict, Tuple
import copy
def data_generation(instance_index_label: List[Tuple]) -> List[Dict]:
    """
    bags: {key1: [ind1, ind2, ind3],
           key2: [ind1, ind2, ind3, ind4, ind5],
           ... }
    bag_lbls:
        {key1: 0,
         key2: 1,
         ... }
    """
    bag_size = np.random.randint(3,7,size=len(instance_index_label)//5)
    data_cp = copy.copy(instance_index_label)
    np.random.shuffle(data_cp)
    bags = {}
    bags_per_instance_labels = {}
    bags_labels = {}
    for bag_ind, size in enumerate(bag_size):
        bags[bag_ind] = []
        bags_per_instance_labels[bag_ind] = []
        try:
            for _ in range(size):
                inst_ind, lbl = data_cp.pop()
                bags[bag_ind].append(inst_ind)
                # simplfy, just use a temporary variable instead of bags_per_instance_labels
                bags_per_instance_labels[bag_ind].append(lbl)
            bags_labels[bag_ind] = bag_label_from_instance_labels(bags_per_instance_labels[bag_ind])
        except:
            print('breaking')
            break
    return bags, bags_labels

def bag_label_from_instance_labels(instance_labels):
    return int(any(((x==1) for x in instance_labels)))


In [25]:
feature_array

array([[0.59622633, 0.46065578, 1.58199155, ..., 0.19270146, 0.38971853,
        2.38730597],
       [0.98215216, 0.56878608, 0.15145038, ..., 1.91796064, 0.42021093,
        0.41520336],
       [1.10731256, 0.29201722, 0.35634115, ..., 0.5070321 , 0.25034332,
        1.90367532],
       ...,
       [1.18139005, 0.43565094, 0.21293369, ..., 0.73760033, 0.12816109,
        1.52040565],
       [0.27648014, 0.71699166, 1.95807469, ..., 0.17226705, 3.60900259,
        0.53389448],
       [0.56366694, 0.49320748, 1.77415121, ..., 0.18600234, 0.37762031,
        2.23982334]])

In [26]:
bag_indices, bag_labels = data_generation(instance_index_label)
bag_features = {kk: torch.Tensor(feature_array[inds]) for kk, inds in bag_indices.items()}

In [27]:
bag_features.items()

dict_items([(0, tensor([[1.0376, 0.4624, 0.0980,  ..., 2.0733, 0.1366, 0.5650],
        [0.9469, 1.4397, 1.7155,  ..., 0.4923, 0.4925, 0.5514],
        [1.0817, 1.1441, 2.0158,  ..., 1.3244, 1.0166, 1.5910],
        [2.4357, 2.2256, 0.4803,  ..., 2.2342, 1.4978, 0.5509]])), (1, tensor([[1.0008, 0.4212, 0.2226,  ..., 2.1333, 0.3034, 0.6690],
        [1.1100, 1.9994, 1.0119,  ..., 0.9692, 0.7979, 0.1146],
        [1.0684, 0.8439, 0.4088,  ..., 0.7193, 1.0047, 0.1520],
        [1.1724, 0.3740, 0.2632,  ..., 0.2791, 0.1867, 0.5093],
        [1.1914, 0.2326, 0.2338,  ..., 0.1474, 0.0102, 0.4778],
        [2.3109, 2.0827, 0.5801,  ..., 1.7922, 1.4429, 0.2064]])), (2, tensor([[2.3975, 1.9269, 0.6667,  ..., 1.5352, 1.9454, 0.6119],
        [1.4174, 0.8770, 0.5373,  ..., 0.6084, 0.1874, 1.2069],
        [1.2118, 2.2442, 2.1127,  ..., 0.6185, 0.7063, 0.7223],
        [0.8766, 0.9040, 0.2534,  ..., 0.4535, 0.2591, 0.3304]])), (3, tensor([[1.0034, 0.9489, 0.3264,  ..., 0.6201, 1.0839, 0.3443],
   

## Save the intermediate feature arrays in pickle file for future use 

In [28]:
import pickle
pickle.dump(bag_indices, open( "bag_indices", "wb" ) )
pickle.dump(bag_labels, open( "bag_labels", "wb" ) )
pickle.dump(bag_features, open( "bag_features", "wb" ) )

In [29]:
import pickle
bag_indices = pickle.load( open( "bag_indices", "rb" ) )
bag_labels = pickle.load( open( "bag_labels", "rb" ) )
bag_features = pickle.load( open( "bag_features", "rb" ) )

In [30]:
!ls

bag_features  bag_t_features  drive			   MNIST
bag_indices   bag_t_indices   feature_array_full.npy	   sample_data
bag_labels    bag_t_labels    feature_test_array_full.npy


In [31]:
bag_t_indices, bag_t_labels = data_generation(instance_index_label_test)
bag_t_features = {kk: torch.Tensor(feature_test_array[inds]) for kk, inds in bag_t_indices.items()}

In [32]:
pickle.dump(bag_t_indices, open( "bag_t_indices", "wb" ) )
pickle.dump(bag_t_labels, open( "bag_t_labels", "wb" ) )
pickle.dump(bag_t_features, open( "bag_t_features", "wb" ) )

In [33]:
bag_t_indices = pickle.load( open( "bag_t_indices", "rb" ) )
bag_t_labels = pickle.load( open( "bag_t_labels", "rb" ) )
bag_t_features = pickle.load( open( "bag_t_features", "rb" ) )

## Utility class to apply transformations on input tensors to do padding

In [34]:
from torch.utils.data import Dataset
class Transform_data(Dataset):
    def __init__(self, data, transform=None):
        self.transform = transform
        self.data = data
        
    def __getitem__(self, index):
        tensor = self.data[index][0]
        if self.transform is not None:
            tensor = self.transform(tensor)
        return (tensor, self.data[index][1])

    def __len__(self):
        return len(self.data)

In [35]:
train_data = [(bag_features[i],bag_labels[i]) for i in range(len(bag_features))]

## Utility function to apply padding

In [36]:
def pad_tensor(data:list, max_number_instance) -> list:
    new_data = []
    for bag_index in range(len(data)):
        tensor_size = len(data[bag_index][0])
        pad_size = max_number_instance - tensor_size
        p2d = (0,0, 0, pad_size)
        padded = torch.nn.functional.pad(data[bag_index][0], p2d, 'constant', 0)
        new_data.append((padded, data[bag_index][1]))
    return new_data

## Maximum number of instances that can be in a bag is 7. It can accomodate less than 7.

In [37]:
max_number_instance = 7
padded_train = pad_tensor(train_data, max_number_instance)

In [38]:
test_data = [(bag_t_features[i],bag_t_labels[i]) for i in range(len(bag_t_features))]
padded_test = pad_tensor(test_data, max_number_instance)

In [39]:
def get_data_loaders_2(train_data, test_data, train_batch_size, val_batch_size):
    train_loader = DataLoader(train_data, batch_size=train_batch_size, shuffle=True)
    val_loader = DataLoader(test_data, batch_size=val_batch_size, shuffle=False)
    return train_loader, val_loader
train_loader,valid_loader = get_data_loaders_2(padded_train, padded_test, 1, 1)
train_batch_size = 1
val_batch_size = 1


## Utility classes for SoftMaxMeanSimple and AttentionSoftmax

In [40]:
class SoftMaxMeanSimple(torch.nn.Module):
    def __init__(self, n, n_inst, dim=0):
        super(SoftMaxMeanSimple, self).__init__()
        self.dim = dim
        self.gate = torch.nn.Softmax(dim=self.dim)      
        self.mdl_instance_transform = nn.Sequential(
                            nn.Linear(n, n_inst),
                            nn.LeakyReLU(),
                            nn.Linear(n_inst, n),
                            nn.LeakyReLU(),
                            )
    def forward(self, x):
        z = self.mdl_instance_transform(x)
        if self.dim==0:
            z = z.view((z.shape[0],1)).sum(1)
        elif self.dim==1:
            z = z.view((1, z.shape[1])).sum(0)
        gate_ = self.gate(z)
        res = torch.sum(x* gate_, self.dim)
        return res, gate_

    
class AttentionSoftMax(torch.nn.Module):
    def __init__(self, in_features = 3, out_features = None):
        super(AttentionSoftMax, self).__init__()
        self.otherdim = ''
        if out_features is None:
            out_features = in_features
        self.layer_linear_tr = nn.Linear(in_features, out_features)
        self.activation = nn.LeakyReLU()
        self.layer_linear_query = nn.Linear(out_features, 1)
        
    def forward(self, x):
        keys = self.layer_linear_tr(x)
        keys = self.activation(keys)
        attention_map_raw = self.layer_linear_query(keys)[...,0]
        attention_map = nn.Softmax(dim=-1)(attention_map_raw)
        result = torch.einsum(f'{self.otherdim}i,{self.otherdim}ij->{self.otherdim}j', attention_map, x)
        return result, attention_map


## Defining MoisyAnd Class, Custom MIL neural Network class

In [41]:
class NoisyAnd(torch.nn.Module):
    def __init__(self, a=10, dims=[1,2]):
        super(NoisyAnd, self).__init__()
        self.a = a
        self.b = torch.nn.Parameter(torch.tensor(0.01))
        self.dims =dims
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        mean = torch.mean(x, self.dims, True)
        res = (self.sigmoid(self.a * (mean - self.b)) - self.sigmoid(-self.a * self.b)) / (
              self.sigmoid(self.a * (1 - self.b)) - self.sigmoid(-self.a * self.b))
        return res
    


class NN(torch.nn.Module):

    def __init__(self, n=3584, n_mid = 1024,
                 n_out=1, dropout=0.2,
                 scoring = None,
                ):
        super(NN, self).__init__()
        self.linear1 = torch.nn.Linear(n, n_mid)
        self.non_linearity = torch.nn.LeakyReLU()
        self.linear2 = torch.nn.Linear(n_mid, n_out)
        self.dropout = torch.nn.Dropout(dropout)
        if scoring:
            self.scoring = scoring
        else:
            self.scoring = torch.nn.Softmax() if n_out>1 else torch.nn.Sigmoid()
        
    def forward(self, x):
        z = self.linear1(x)
        z = self.non_linearity(z)
        z = self.dropout(z)
        z = self.linear2(z)
        y_pred = self.scoring(z)
        return y_pred
    

class LogisticRegression(torch.nn.Module):
    def __init__(self, n=3584, n_out=1):
        super(LogisticRegression, self).__init__()
        self.linear = torch.nn.Linear(n, n_out)
        self.scoring = torch.nn.Softmax() if n_out>1 else torch.nn.Sigmoid()

    def forward(self, x):
        z = self.linear(x)
        y_pred = self.scoring(z)
        return y_pred

    
def regularization_loss(params,
                        reg_factor = 0.005,
                        reg_alpha = 0.5):
    params = [pp for pp in params if len(pp.shape)>1]
    l1_reg = nn.L1Loss()
    l2_reg = nn.MSELoss()
    loss_reg =0
    for pp in params:
        loss_reg+=reg_factor*((1-reg_alpha)*l1_reg(pp, target=torch.zeros_like(pp)) +\
                           reg_alpha*l2_reg(pp, target=torch.zeros_like(pp)))
    return loss_reg

class MIL_NN(torch.nn.Module):

    def __init__(self, n=3584,  
                 n_mid=1024, 
                 n_classes=1, 
                 dropout=0.1,
                 agg = None,
                 scoring=None,
                ):
        super(MIL_NN, self).__init__()
        self.agg = agg if agg is not None else AttentionSoftMax(n)
        
        if n_mid == 0:
            self.bag_model = LogisticRegression(n, n_classes)
        else:
            self.bag_model = NN(n, n_mid, n_classes, dropout=dropout, scoring=scoring)
        
    def forward(self, bag_feature_stacked, bag_lbls=None):
        return self.bag_model(bag_feature_stacked)

## To claculate metric and display scores

In [42]:
import inspect
def calculate_metric(metric_fn, true_y, pred_y):
    # multi class problems need to have averaging method
    if "average" in inspect.getfullargspec(metric_fn).args:
        return metric_fn(true_y, pred_y, average="macro")
    else:
        return metric_fn(true_y, pred_y)
    
def print_scores(p, r, f1, a, batch_size):
    # just an utility printing function
    for name, scores in zip(("precision", "recall", "F1", "accuracy"), (p, r, f1, a)):
        print(f"\t{name.rjust(14, ' ')}: {sum(scores)/batch_size:.4f}")

## Dumping the train tensor

In [43]:
i=0
for x, e in enumerate(train_loader):
  print(x)
  print(e)
  if (i==2):
    break
  i+=1

0
[tensor([[[1.3653, 0.7464, 0.7242,  ..., 1.6607, 0.2067, 1.5258],
         [1.0364, 0.3388, 0.1581,  ..., 2.1122, 0.1954, 0.6616],
         [0.7840, 0.3218, 0.5067,  ..., 1.2163, 0.1786, 1.5284],
         ...,
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]]]), tensor([0])]
1
[tensor([[[1.0292, 2.0650, 2.1351,  ..., 0.7148, 0.7587, 0.5094],
         [0.4056, 0.7396, 1.4632,  ..., 0.1477, 2.7407, 0.4806],
         [0.9756, 1.6984, 1.5282,  ..., 0.1304, 0.6081, 0.2161],
         ...,
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]]]), tensor([1])]
2
[tensor([[[0.5631, 1.0603, 1.2048,  ..., 0.8638, 3.1409, 0.3297],
         [1.4041, 0.5276, 0.2163,  ..., 2.2008, 0.3422, 0.5076],
         [0.6867, 0.2

## Multi-Instance Learning Training

In [44]:
import numpy as np
import time
import torch.nn as nn
from torch import optim
from sklearn.metrics import *

start_ts = time.time()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

lr0 = 1e-4

# model:
model = MIL_NN()

# params you need to specify:
epochs = 10
train_loader,valid_loader = get_data_loaders_2(padded_train, padded_test, 1, 1)
train_batch_size = 1
val_batch_size = 1

loss_function = torch.nn.BCELoss(reduction='mean') # your loss function, cross entropy works well for multi-class problems

torch.nn.BCEWithLogitsLoss()
optimizer = optim.SGD(model.parameters(), lr=lr0, momentum=0.9)

losses = []
batches = len(train_loader)
val_batches = len(valid_loader)

# loop for every epoch (training + evaluation)
for epoch in range(epochs):
    total_loss = 0

    # progress bar (works in Jupyter notebook too!)
    progress = tqdm(enumerate(train_loader), desc="Loss: ", total=batches)

    # ----------------- TRAINING  -------------------- 
    # set model to training
    model.train()
    for i, data in progress:
        X = data[0]
        y = data[1]
        print()
        X = X.reshape([1, 7*512])
        y = y.type(torch.FloatTensor)
        # training step for single batch
        model.zero_grad() # to make sure that all the grads are 0 
        outputs = model(X)

        loss = loss_function(outputs, y)
        loss.backward()
        optimizer.step()

        # getting training quality data
        current_loss = loss.item()
        total_loss += current_loss

        print('Total Loss: '+str(total_loss))
        # updating progress bar
        progress.set_description("Loss: {:.4f}".format(total_loss/(i+1)))
        
    # releasing unceseccary memory in GPU
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    
    # ----------------- VALIDATION  ----------------- 
    val_losses = 0
    precision, recall, f1, accuracy = [], [], [], []
    
    # set model to evaluating (testing)
    model.eval()
    with torch.no_grad():
        for i, data in enumerate(valid_loader):
            X = data[0] 
            y = data[1]
            X = X.reshape([1,7*512])
            y = y.type(torch.FloatTensor)
            outputs = model(X) # this get's the prediction from the network
            prediced_classes =outputs.detach().round()
            #y_pred.extend(prediced_classes.tolist())
            val_losses += loss_function(outputs, y)
            
            # calculate P/R/F1/A metrics for batch
            for acc, metric in zip((precision, recall, f1, accuracy), 
                                   (precision_score, recall_score, f1_score, accuracy_score)):
                acc.append(
                    calculate_metric(metric, y.cpu(), prediced_classes.cpu())
                )
          
    print(f"Epoch {epoch+1}/{epochs}, training loss: {total_loss/batches}, validation loss: {val_losses/val_batches}")
    print_scores(precision, recall, f1, accuracy, val_batches)
    losses.append(total_loss/batches) # for plotting learning curve
print(f"Training time: {time.time()-start_ts}s")

HBox(children=(FloatProgress(value=0.0, description='Loss: ', max=6000.0, style=ProgressStyle(description_widt…


Total Loss: 0.7120054960250854

Total Loss: 1.5163761377334595

Total Loss: 2.1092549562454224

Total Loss: 2.8191948533058167

Total Loss: 3.536969482898712

Total Loss: 4.237954795360565

Total Loss: 4.872810959815979

Total Loss: 5.600542306900024

Total Loss: 6.326898694038391

Total Loss: 6.9985504150390625



  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m

Total Loss: 2383.553642362356

Total Loss: 2383.9659530222416

Total Loss: 2385.2406499683857

Total Loss: 2386.675984174013

Total Loss: 2387.383952409029

Total Loss: 2388.1093170940876

Total Loss: 2388.6119906008244

Total Loss: 2389.531103283167

Total Loss: 2390.2677632272243

Total Loss: 2390.9877822697163

Total Loss: 2391.5644615590572

Total Loss: 2392.8287868201733

Total Loss: 2393.4420253932476

Total Loss: 2393.8899130523205

Total Loss: 2394.189560800791

Total Loss: 2394.530295699835

Total Loss: 2395.5950332581997

Total Loss: 2396.5900328457355

Total Loss: 2397.890234261751

Total Loss: 2398.2930022776127

Total Loss: 2398.716864466667

Total Loss: 2399.2422301769257

Total Loss: 2399.968088924885

Total Loss: 2401.2024390101433

Total Loss: 2401.6776356101036

Total Loss: 2402.1231556236744

Total Loss: 2402.788748651743

Total Loss: 2403.200384646654

Total Loss: 2404.4614267647266

Total Loss: 2404.

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/3, training loss: 0.6761125057985385, validation loss: 0.6666103005409241
	     precision: 0.5917
	        recall: 0.5917
	            F1: 0.5917
	      accuracy: 0.5917


HBox(children=(FloatProgress(value=0.0, description='Loss: ', max=6000.0, style=ProgressStyle(description_widt…

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Total Loss: 2339.601607248187

Total Loss: 2339.9151241332293

Total Loss: 2340.334422633052

Total Loss: 2340.9935701340437

Total Loss: 2341.70484302938

Total Loss: 2342.2325075119734

Total Loss: 2342.8599458783865

Total Loss: 2343.5203071683645

Total Loss: 2344.3567517846823

Total Loss: 2345.029147133231

Total Loss: 2345.3421664386988

Total Loss: 2345.880409553647

Total Loss: 2346.6877210885286

Total Loss: 2347.305936470628

Total Loss: 2347.7028510421515

Total Loss: 2348.124353453517

Total Loss: 2348.651666626334

Total Loss: 2349.396447941661

Total Loss: 2349.765050575137

Total Loss: 2350.5353549569845

Total Loss: 2351.5556083768606

Total Loss: 2352.2944531291723

Total Loss: 2352.9965591281652

Total Loss: 2353.3524332493544

Total Loss: 2354.208003267646

Total Loss: 2354.8965945094824

Total Loss: 2355.495201036334

Total Loss: 2356.6815752238035

Total Loss: 2357.431084319949

Total Loss: 2358.0681

HBox(children=(FloatProgress(value=0.0, description='Loss: ', max=6000.0, style=ProgressStyle(description_widt…

[1;30;43mStreaming output truncated to the last 5000 lines.[0m

Total Loss: 2297.1621810495853

Total Loss: 2297.7765385210514

Total Loss: 2299.3385724127293

Total Loss: 2300.0494116842747

Total Loss: 2300.366455167532

Total Loss: 2301.3292362987995

Total Loss: 2301.6206807792187

Total Loss: 2302.481839865446

Total Loss: 2304.1298447549343

Total Loss: 2304.41493293643

Total Loss: 2305.079580336809

Total Loss: 2305.399008959532

Total Loss: 2305.8416415452957

Total Loss: 2306.6735460162163

Total Loss: 2307.873938500881

Total Loss: 2309.455164372921

Total Loss: 2310.6833944916725

Total Loss: 2312.1000490784645

Total Loss: 2312.455648422241

Total Loss: 2313.0895461440086

Total Loss: 2313.5309049487114

Total Loss: 2314.37900608778

Total Loss: 2314.7346384227276

Total Loss: 2315.1661506593227

Total Loss: 2315.6521064937115

Total Loss: 2316.6193487346172

Total Loss: 2317.329469591379

Total Loss: 2317.621814787388

Total Loss: 2318.0599962174892

Total Loss: 2318.55

## Save the model for future use

In [46]:
torch.save(model.state_dict(), 'mnist_mil_state.pt')