### Simple CNN model with and without instrument inclusion 

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time, gc

from sklearn.utils import class_weight
from sklearn.metrics import accuracy_score

import torch
import torchvision
import torch.nn as nn
from torch.utils.data.sampler import WeightedRandomSampler

from tqdm import tqdm

from src.dl_model_utils import get_transform_train,get_transform_valid, Cholec80Dataset, EarlyStopping, start_timer, end_timer_and_print

from src.models import ResNet18Model,ResNet18LSTM, ResNetBlock, ResNet50,ResNet50_ST,ResNet50_ST_Phase, ResNet50LSTM, Xception


In [2]:
INSTRUMENT_MAP = {
    '':0,
    'tool_Grasper':1,
    'tool_Grasper, tool_Hook':2,
    'tool_Hook':3,
    'tool_Grasper, tool_Irrigator':4,
    'tool_Irrigator':5,
    'tool_Bipolar':6,
    'tool_Grasper, tool_Bipolar':7,
    'tool_Grasper, tool_Clipper':8,
    'tool_Clipper':9,
    'tool_Grasper, tool_Scissors':10,
    'tool_SpecimenBag':11,
    'tool_Grasper, tool_SpecimenBag':12,
    'tool_Scissors':13,
    'tool_Grasper, tool_Bipolar, tool_Irrigator':14,
    'tool_Bipolar, tool_Irrigator':15,
    'tool_Bipolar, tool_SpecimenBag':16,
    'tool_Grasper, tool_Bipolar, tool_SpecimenBag':17,
    'tool_Irrigator, tool_SpecimenBag':18,
    'tool_Bipolar, tool_Irrigator, tool_SpecimenBag':19,
    'tool_Grasper, tool_Irrigator, tool_SpecimenBag':20,
    'tool_Grasper, tool_Clipper, tool_Irrigator':21,
    'tool_Clipper, tool_Irrigator':22,
    'tool_Grasper, tool_Clipper, tool_SpecimenBag':23,
    'tool_Grasper, tool_Scissors, tool_SpecimenBag':24,
    'tool_Hook, tool_Irrigator':25,
    'tool_Grasper, tool_Hook, tool_Irrigator':26,
    'tool_Bipolar, tool_Scissors':27
    }

In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device Type : {}".format(device))


Device Type : cuda


In [4]:
#Import train and validation sets
train_df = pd.read_parquet('data/ordered_train_df.parquet')
val_df = pd.read_parquet('data/ordered_val_df.parquet')

#Sample train and val datasets at 1fps
train_df = train_df[train_df['frame'] % 25 == 0].reset_index(drop=True)
val_df = val_df[val_df['frame'] % 25 == 0].reset_index(drop=True)

train_df['tool_target'] = train_df[['tool_Grasper', 'tool_Bipolar',
        'tool_Hook', 'tool_Scissors', 'tool_Clipper', 'tool_Irrigator',
        'tool_SpecimenBag']].dot(train_df[['tool_Grasper', 'tool_Bipolar',
        'tool_Hook', 'tool_Scissors', 'tool_Clipper', 'tool_Irrigator',
        'tool_SpecimenBag']].columns + ', ').str.rstrip(', ').map(INSTRUMENT_MAP).values
val_df['tool_target'] = val_df[['tool_Grasper', 'tool_Bipolar',
        'tool_Hook', 'tool_Scissors', 'tool_Clipper', 'tool_Irrigator',
        'tool_SpecimenBag']].dot(val_df[['tool_Grasper', 'tool_Bipolar',
        'tool_Hook', 'tool_Scissors', 'tool_Clipper', 'tool_Irrigator',
        'tool_SpecimenBag']].columns + ', ').str.rstrip(', ').map(INSTRUMENT_MAP).values

### Training Cycle

In [6]:
def model_train_validate_cnn(train_df, val_df,epochs=100, model_num=0, optimizer_num = 0,batch_size = 128, learning_rate = 1e-3, weight_decay=5e-4,momentum=0.9,patience=20, patience_delta = 0.0005):
    '''
    Model nums:

    Model 0: ResNet18
    Model 1: Resnet50 - Pre-Trained
    Model 2: ResNet18 w/LSTM
    Model 3: ResNet50 - Pre-Trained w/LSTM
    Model 4: Xception - Pre-Trained
    Model 5: Resnet50 - OD Phase

    '''

    #Get train label weights and convert to torch tensor
    phase_weights=class_weight.compute_class_weight('balanced',classes=np.unique(train_df['phase']),y=train_df['phase'].to_numpy())
    phase_weights=torch.tensor(phase_weights,dtype=torch.float)

    #Create datasets for phase segmentation
    trainset = Cholec80Dataset(train_df,  get_transform_train())
    validationset = Cholec80Dataset(val_df,  get_transform_valid())

    #Shuffle for non-lstm model. For lstm we preserve the sequence
    if model_num == 2 or model_num == 3:
        trainloader = torch.utils.data.DataLoader(
            trainset, batch_size=batch_size, shuffle=False
        )

    else:
        trainloader = torch.utils.data.DataLoader(
            trainset, batch_size=batch_size, shuffle=True
        )


    validationloader = torch.utils.data.DataLoader(
        validationset, batch_size=batch_size, shuffle=False
    )

    #Initialise two required loss functions
    criterion = nn.CrossEntropyLoss(weight=phase_weights)
    criterion_tool = nn.BCEWithLogitsLoss(reduction='mean')
    if device == 'cuda':
        criterion.to(device)
  

    #Initialize early stopping
    early_stopping = EarlyStopping(patience, patience_delta)


    #Model selection
    if model_num == 0:
        model = ResNet18Model(ResNetBlock)
        model_name = 'resnet_18_ord'
        
    elif model_num == 1:
        model = ResNet50()
        model.freeze()
        model_name = 'resnet_50_ord'
    elif model_num == 2:
        model = ResNet18LSTM()
        model_name = 'resnet_18_lstm_ord'
    elif model_num == 3:
        model = ResNet50LSTM()
        model_name = 'resnet_50_lstm_ord'
    elif model_num == 4:
        model = Xception()
        model.freeze()
        model_name = 'xception_ord'
    elif model_num == 5:
        model = ResNet50_ST_Phase()
        model.freeze()
        model_name = 'resnet_50_od_phase_ord'


    #Select optimizer for job
    optimizer = None
    if optimizer_num == 0:
        optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate, momentum=momentum,weight_decay=weight_decay)
    elif optimizer_num == 1:
        optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate)

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, verbose=True)

    if device == 'cuda':
        model.to(device)

    start_timer()
    for epoch in range(0,epochs):
        print(f'Training Epoch: {epoch+1} of {epochs}..')

        train_loss = 0.0
        train_correct = 0
        total = 0.0

        # Outputs for HMM
        img_path = []
        true_labels = []
        soft_max_out = []
        predicted_labels = []

        model.train()
        for i, (train_inputs,train_img_id, train_labels,train_tools,train_tool_targets) in tqdm(enumerate(trainloader, 0)):
            img_path = img_path + list(train_img_id)
            true_labels = true_labels + train_labels.tolist()
            if device == 'cuda':
                train_inputs, train_labels, train_tools = train_inputs.to(device), train_labels.to(device), train_tools.to(device)

            optimizer.zero_grad()
            # if model_num < 2:
            if model_num != 5:
                train_outputs  = model.forward(train_inputs)
            else:
                train_outputs, _ = model.forward(train_inputs)
            
            _, train_preds = torch.max(train_outputs.data, 1)
            loss = criterion(train_outputs, train_labels)

            loss.backward()
            optimizer.step()

                
            # else:

                  
                
                
            #     tool_outputs, train_outputs  = model.forward(train_inputs)
            #     tool_outputs = tool_outputs.data

                
                

            #     train_tool_outputs = (tool_outputs > 0.5).float()

                
            #     train_tool_outputs = train_tool_outputs.float()

            #     tool_l = criterion_tool(train_tool_outputs, train_tools.float())

            #     _, train_preds = torch.max(train_outputs.data, 1)

            #     phase_l = criterion(train_outputs, train_labels)

            #     loss = tool_l + phase_l
            #     loss.backward()
            #     optimizer.step()

            soft_max_out = soft_max_out + train_outputs.cpu().detach().tolist()
            predicted_labels = predicted_labels + train_preds.cpu().tolist()

            train_loss += loss.item()
            total += train_labels.size(0)
            train_correct += train_preds.eq(train_labels).sum().item()

            
        hmm_df = pd.DataFrame(
            {'img_path': img_path,
                'true_labels': true_labels,
                'cnn_output': soft_max_out,
                'predicted_labels': predicted_labels
                })

        
        print(len(trainloader), 'Training Loss Phase: %.5f | Training Acc Phase: %.5f%% (%d/%d)'
        % (train_loss / len(trainloader), 100. * train_correct / total, train_correct, total))

        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0.0
            
        with torch.no_grad():
            for j, (val_inputs,val_img_id, val_labels,val_tools,val_tool_targets) in enumerate(validationloader):
                if device == 'cuda':
                    val_inputs, val_labels, val_tools = val_inputs.to(device), val_labels.to(device), val_tools.to(device)

                if model_num != 5:
                    val_outputs = model.forward(val_inputs)
                else:
                    val_outputs, _ = model.forward(val_inputs)
    
                _, val_preds = torch.max(val_outputs.data, 1)
                loss = criterion(val_outputs, val_labels)

                # else:
                #     tool_outputs, val_outputs = model.forward(val_inputs)
                #     tool_outputs = tool_outputs.data


                #     val_tool_outputs = (tool_outputs > 0.5).float()

                #     tool_l = criterion_tool(val_tool_outputs, val_tools.float())
        
                #     _, val_preds = torch.max(val_outputs.data, 1)
                #     phase_l = criterion(val_outputs, val_labels)
                #     loss = tool_l + phase_l


                val_loss += loss.item()
                val_total += val_labels.size(0)
                val_correct += val_preds.eq(val_labels).sum().item()

        
            print(len(validationloader), 'Validation Loss Phase: %.5f | Validation Acc Phase: %.5f%% (%d/%d)'
                % (val_loss / len(validationloader), 100. * val_correct / val_total, val_correct, val_total))
    
            scheduler.step(val_loss)
            #Get average for early stopping check and call the early stopping class to perform calc
            val_loss_avg = val_loss/len(validationloader)
            early_stopping(val_loss_avg, model, model_name, hmm_df)

            #If limit reached stop execution
            if early_stopping.stop:
                break
              
    end_timer_and_print()
    
  
            

In [7]:
start_time = None
model_train_validate_cnn(train_df, val_df,batch_size=256, model_num=3)



Training Epoch: 1 of 100..


338it [06:17,  1.12s/it]


338 Training Loss Phase: 1.75155 | Training Acc Phase: 37.47553% (32356/86339)
84 Validation Loss Phase: 1.71584 | Validation Acc Phase: 35.89648% (7698/21445)
Training Epoch: 2 of 100..


338it [05:53,  1.05s/it]


338 Training Loss Phase: 1.65905 | Training Acc Phase: 39.66805% (34249/86339)
84 Validation Loss Phase: 1.59922 | Validation Acc Phase: 35.89648% (7698/21445)
Validation loss improved from 1.72 to 1.60  Saving Model
Training Epoch: 3 of 100..


338it [05:54,  1.05s/it]


338 Training Loss Phase: 1.49853 | Training Acc Phase: 41.45519% (35792/86339)
84 Validation Loss Phase: 1.42711 | Validation Acc Phase: 39.01142% (8366/21445)
Validation loss improved from 1.60 to 1.43  Saving Model
Training Epoch: 4 of 100..


338it [05:47,  1.03s/it]


338 Training Loss Phase: 1.29728 | Training Acc Phase: 43.41839% (37487/86339)
84 Validation Loss Phase: 1.23114 | Validation Acc Phase: 38.44719% (8245/21445)
Validation loss improved from 1.43 to 1.23  Saving Model
Training Epoch: 5 of 100..


338it [05:44,  1.02s/it]


338 Training Loss Phase: 1.14694 | Training Acc Phase: 48.13584% (41560/86339)
84 Validation Loss Phase: 1.15234 | Validation Acc Phase: 43.65586% (9362/21445)
Validation loss improved from 1.23 to 1.15  Saving Model
Training Epoch: 6 of 100..


338it [05:39,  1.01s/it]


338 Training Loss Phase: 1.05212 | Training Acc Phase: 52.31008% (45164/86339)
84 Validation Loss Phase: 1.10079 | Validation Acc Phase: 46.36512% (9943/21445)
Validation loss improved from 1.15 to 1.10  Saving Model
Training Epoch: 7 of 100..


338it [05:43,  1.02s/it]


338 Training Loss Phase: 0.99457 | Training Acc Phase: 55.07940% (47555/86339)
84 Validation Loss Phase: 1.03924 | Validation Acc Phase: 49.37748% (10589/21445)
Validation loss improved from 1.10 to 1.04  Saving Model
Training Epoch: 8 of 100..


338it [05:42,  1.01s/it]


338 Training Loss Phase: 0.89011 | Training Acc Phase: 58.34212% (50372/86339)
84 Validation Loss Phase: 0.99609 | Validation Acc Phase: 50.23082% (10772/21445)
Validation loss improved from 1.04 to 1.00  Saving Model
Training Epoch: 9 of 100..


338it [05:43,  1.02s/it]


338 Training Loss Phase: 0.77878 | Training Acc Phase: 62.34610% (53829/86339)
84 Validation Loss Phase: 0.96011 | Validation Acc Phase: 52.38051% (11233/21445)
Validation loss improved from 1.00 to 0.96  Saving Model
Training Epoch: 10 of 100..


338it [05:44,  1.02s/it]


338 Training Loss Phase: 0.68376 | Training Acc Phase: 66.84349% (57712/86339)
84 Validation Loss Phase: 0.91696 | Validation Acc Phase: 62.91443% (13492/21445)
Validation loss improved from 0.96 to 0.92  Saving Model
Training Epoch: 11 of 100..


338it [05:38,  1.00s/it]


338 Training Loss Phase: 0.59629 | Training Acc Phase: 72.48752% (62585/86339)
84 Validation Loss Phase: 1.02177 | Validation Acc Phase: 66.08533% (14172/21445)
Early Stopping count is at: 1 maximum is: 20
Training Epoch: 12 of 100..


338it [05:48,  1.03s/it]


338 Training Loss Phase: 0.48627 | Training Acc Phase: 79.01644% (68222/86339)
84 Validation Loss Phase: 1.20148 | Validation Acc Phase: 57.16484% (12259/21445)
Early Stopping count is at: 2 maximum is: 20
Training Epoch: 13 of 100..


338it [05:34,  1.01it/s]


338 Training Loss Phase: 0.38795 | Training Acc Phase: 83.93889% (72472/86339)
84 Validation Loss Phase: 1.26519 | Validation Acc Phase: 59.19795% (12695/21445)
Early Stopping count is at: 3 maximum is: 20
Training Epoch: 14 of 100..


338it [05:24,  1.04it/s]


338 Training Loss Phase: 0.30330 | Training Acc Phase: 87.35102% (75418/86339)
84 Validation Loss Phase: 1.03004 | Validation Acc Phase: 63.84705% (13692/21445)
Early Stopping count is at: 4 maximum is: 20
Training Epoch: 15 of 100..


338it [05:30,  1.02it/s]


338 Training Loss Phase: 0.26715 | Training Acc Phase: 88.94242% (76792/86339)
84 Validation Loss Phase: 1.54067 | Validation Acc Phase: 50.99557% (10936/21445)
Early Stopping count is at: 5 maximum is: 20
Training Epoch: 16 of 100..


338it [05:28,  1.03it/s]


338 Training Loss Phase: 0.22732 | Training Acc Phase: 89.34317% (77138/86339)
84 Validation Loss Phase: 1.08422 | Validation Acc Phase: 65.59571% (14067/21445)
Early Stopping count is at: 6 maximum is: 20
Training Epoch: 17 of 100..


338it [05:17,  1.07it/s]


338 Training Loss Phase: 0.15120 | Training Acc Phase: 93.22786% (80492/86339)
84 Validation Loss Phase: 0.90441 | Validation Acc Phase: 76.69387% (16447/21445)
Validation loss improved from 0.92 to 0.90  Saving Model
Training Epoch: 18 of 100..


338it [05:30,  1.02it/s]


338 Training Loss Phase: 0.31059 | Training Acc Phase: 87.63710% (75665/86339)
84 Validation Loss Phase: 0.83330 | Validation Acc Phase: 74.08254% (15887/21445)
Validation loss improved from 0.90 to 0.83  Saving Model
Training Epoch: 19 of 100..


338it [05:34,  1.01it/s]


338 Training Loss Phase: 0.21761 | Training Acc Phase: 91.09093% (78647/86339)
84 Validation Loss Phase: 1.28317 | Validation Acc Phase: 59.73420% (12810/21445)
Early Stopping count is at: 1 maximum is: 20
Training Epoch: 20 of 100..


338it [05:36,  1.00it/s]


338 Training Loss Phase: 0.14619 | Training Acc Phase: 94.02240% (81178/86339)
84 Validation Loss Phase: 0.68464 | Validation Acc Phase: 77.94824% (16716/21445)
Validation loss improved from 0.83 to 0.68  Saving Model
Training Epoch: 21 of 100..


338it [05:35,  1.01it/s]


338 Training Loss Phase: 0.09674 | Training Acc Phase: 95.70878% (82634/86339)
84 Validation Loss Phase: 1.18775 | Validation Acc Phase: 69.71788% (14951/21445)
Early Stopping count is at: 1 maximum is: 20
Training Epoch: 22 of 100..


338it [05:35,  1.01it/s]


338 Training Loss Phase: 0.17709 | Training Acc Phase: 92.98000% (80278/86339)
84 Validation Loss Phase: 1.18004 | Validation Acc Phase: 68.58941% (14709/21445)
Early Stopping count is at: 2 maximum is: 20
Training Epoch: 23 of 100..


338it [05:29,  1.03it/s]


338 Training Loss Phase: 0.10242 | Training Acc Phase: 95.97864% (82867/86339)
84 Validation Loss Phase: 0.88101 | Validation Acc Phase: 75.17370% (16121/21445)
Early Stopping count is at: 3 maximum is: 20
Training Epoch: 24 of 100..


338it [05:27,  1.03it/s]


338 Training Loss Phase: 0.05562 | Training Acc Phase: 97.74957% (84396/86339)
84 Validation Loss Phase: 0.98679 | Validation Acc Phase: 74.87993% (16058/21445)
Early Stopping count is at: 4 maximum is: 20
Training Epoch: 25 of 100..


338it [05:27,  1.03it/s]


338 Training Loss Phase: 0.04405 | Training Acc Phase: 98.12483% (84720/86339)
84 Validation Loss Phase: 1.14157 | Validation Acc Phase: 71.19142% (15267/21445)
Early Stopping count is at: 5 maximum is: 20
Training Epoch: 26 of 100..


338it [05:28,  1.03it/s]


338 Training Loss Phase: 0.03302 | Training Acc Phase: 98.67036% (85191/86339)
84 Validation Loss Phase: 1.02560 | Validation Acc Phase: 74.59548% (15997/21445)
Early Stopping count is at: 6 maximum is: 20
Training Epoch: 27 of 100..


131it [02:10,  1.01it/s]


KeyboardInterrupt: 

In [6]:

def train_validate_tool_detect(train_df, val_df, method=0, device='cuda',epochs=100, optimizer_num = 0,batch_size = 128, learning_rate = 1e-3, weight_decay=5e-4,momentum=0.9,patience=20, patience_delta = 0.0005):
    '''
    Model nums:
    CNN tool detection
    Model 0: ResNet50

    '''
    
    #Get train label weights and convert to torch tensor
    tool_weights=class_weight.compute_class_weight('balanced',classes=np.unique(train_df['tool_target']),y=train_df['tool_target'].to_numpy())
    tool_weights=torch.tensor(tool_weights,dtype=torch.float)
    print(f'Weighted classes: {tool_weights}')

    #Create datasets for phase segmentation
    trainset = Cholec80Dataset(train_df,  get_transform_train())
    validationset = Cholec80Dataset(val_df,  get_transform_valid())

    trainloader = torch.utils.data.DataLoader(
        trainset,
        batch_size=batch_size,
        shuffle=True
    )
    validationloader = torch.utils.data.DataLoader(
        validationset, 
        batch_size=batch_size, 
        shuffle=False
    )

    #Initialise tool detection loss
    criterion = nn.BCEWithLogitsLoss(reduction='sum')
    criterion_tool = nn.CrossEntropyLoss(weight=tool_weights)
    if device == 'cuda':
        criterion.to(device)
        criterion_tool.to(device)
        
    #Initialize early stopping
    early_stopping = EarlyStopping(patience, patience_delta)


    #Model selection
    model = ResNet50_ST()
    model_name = 'ResNet50_od'

    # elif model_num == 2:
    #     model = DeiT_OD_phase()
    #     model_name = 'DeiT3_ord_od'
    #     model.freeze()

    #Final calculated learning rate
    learning_rate_final = learning_rate * batch_size / 512

    #Select optimizer for job
    optimizer = None
    if optimizer_num == 0:
        optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate_final, momentum=momentum, weight_decay=weight_decay)
    elif optimizer_num == 1:
        # optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate)
        optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate_final, weight_decay=weight_decay)

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, verbose=True)


    if device == 'cuda':
        model.to(device)

    start_timer()
    for epoch in range(0,epochs):
        print(f'Training Epoch: {epoch+1} of {epochs}..')

        train_loss = 0.0
        train_correct = 0
        total = 0.0
        batch_gt = []
        batch_pred = []

        # Outputs for HMM
        img_path = []
        true_labels = []
        softmax_out = []
        predicted_labels = []


        model.train()
        for i, (train_inputs,train_img_id, train_labels,train_tools,train_tool_targets) in tqdm(enumerate(trainloader, 0)):
            img_path = img_path + list(train_img_id)
            true_labels = true_labels + train_tool_targets.tolist()

            if device == 'cuda':
                train_inputs, train_labels, train_tools,train_tool_targets = train_inputs.to(device), train_labels.to(device), train_tools.to(device), train_tool_targets.to(device)

            optimizer.zero_grad()

            train_outputs, sigmoid_outputs  = model.forward(train_inputs)
            if method ==0:
                train_preds = (sigmoid_outputs > 0.5).float()
            else:
                _, train_preds = torch.max(train_outputs.data, 1)

            if method == 0:
                loss = criterion(train_outputs, train_tools)
            else:
                loss = criterion_tool(train_outputs, train_tool_targets)
                
            loss.backward()
            optimizer.step()

            softmax_out = softmax_out + train_outputs.cpu().detach().tolist()
            predicted_labels = predicted_labels + train_preds.cpu().tolist()

            train_loss += loss.item()
            if method == 0:
                batch_gt.append(np.concatenate(train_tools.cpu().tolist()))
                batch_pred.append(np.concatenate(train_preds.cpu().tolist()))
            else:
                total += train_labels.size(0)
                train_correct += train_preds.eq(train_tool_targets).sum().item()
                
        if method == 0:
            all_batch_gt = np.concatenate(batch_gt)
            all_batch_pred = np.concatenate(batch_pred)

        hmm_df = pd.DataFrame(
            {'img_path': img_path,
                'true_labels': true_labels,
                'cnn_output': softmax_out,
                'predicted_labels': predicted_labels
                })
        if method == 0:
            print(len(trainloader), 'Training Loss Phase: %.5f | Training Acc Phase: %.5f%%'
            % (train_loss / len(trainloader),100. * accuracy_score(all_batch_gt,all_batch_pred)))
        else:
            print(len(trainloader), 'Training Loss Phase: %.5f | Training Acc Phase: %.5f%% (%d/%d)'
            % (train_loss / len(trainloader), 100. * train_correct / total, train_correct, total))

        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0.0
        val_batch_gt = []
        val_batch_pred = []
            
        with torch.no_grad():
            for j, (val_inputs,val_img_id, val_labels,val_tools,val_tool_targets) in enumerate(validationloader):
                if device == 'cuda':
                    val_inputs, val_labels, val_tools, val_tool_targets = val_inputs.to(device), val_labels.to(device), val_tools.to(device), val_tool_targets.to(device)

                val_outputs, val_sigmoid_outputs = model.forward(val_inputs)
                if method == 0:
                    val_preds = (val_sigmoid_outputs > 0.5).float()
                else:
                    _, val_preds = torch.max(val_outputs.data, 1)

                if method == 0:
                    loss = criterion(val_outputs, val_tools)
                else:
                    loss = criterion_tool(val_outputs, val_tool_targets)

                val_loss += loss.item()
                if method == 0:
                    val_batch_gt.append(np.concatenate(val_tools.cpu().tolist()))
                    val_batch_pred.append(np.concatenate(val_preds.cpu().tolist()))
                else:
                    val_total += val_labels.size(0)
                    val_correct += val_preds.eq(val_tool_targets).sum().item()

            if method == 0:
                all_batch_gt = np.concatenate(val_batch_gt)
                all_batch_pred = np.concatenate(val_batch_pred)

            if method == 0:
                print(len(validationloader), 'Validation Loss Phase: %.5f | Validation Acc Phase: %.5f%%'
                % (val_loss / len(validationloader), 100. * accuracy_score(all_batch_gt,all_batch_pred)))
            else:
                print(len(validationloader), 'Validation Loss Phase: %.5f | Validation Acc Phase: %.5f%% (%d/%d)'
                % (val_loss / len(validationloader), 100. * val_correct / val_total, val_correct, val_total))
    
            scheduler.step(val_loss)
            #Get average for early stopping check and call the early stopping class to perform calc
            val_loss_avg = val_loss/len(validationloader)
            early_stopping(val_loss_avg, model, model_name, hmm_df)

            #If limit reached stop execution
            if early_stopping.stop:
                break

    end_timer_and_print()

In [7]:

start_time = None
train_validate_tool_detect(train_df, val_df, batch_size=256)


Weighted classes: tensor([3.5000e-01, 2.6292e-01, 9.2914e-02, 2.0479e-01, 1.4761e+00, 2.3115e+00,
        2.8420e+00, 1.9868e+00, 1.4197e+00, 2.9995e+00, 3.1497e+00, 4.3800e+00,
        8.7081e-01, 5.6997e+00, 6.1303e+00, 1.3465e+01, 2.4280e+01, 9.0960e+00,
        1.0560e+01, 1.5813e+01, 5.7852e+00, 2.5696e+02, 7.7088e+02, 3.0835e+03,
        1.1013e+02, 5.4097e+01, 2.2673e+01, 4.0573e+01])




Training Epoch: 1 of 100..


338it [04:54,  1.15it/s]


338 Training Loss Phase: 146.00623 | Training Acc Phase: 96.94510%
84 Validation Loss Phase: 264.77233 | Validation Acc Phase: 95.32025%
Training Epoch: 2 of 100..


338it [04:46,  1.18it/s]


338 Training Loss Phase: 68.75395 | Training Acc Phase: 98.63826%
84 Validation Loss Phase: 256.70888 | Validation Acc Phase: 95.66732%
Validation loss improved from 264.77 to 256.71  Saving Model
Training Epoch: 3 of 100..


338it [04:49,  1.17it/s]


338 Training Loss Phase: 49.50459 | Training Acc Phase: 99.03569%
84 Validation Loss Phase: 187.66276 | Validation Acc Phase: 97.03827%
Validation loss improved from 256.71 to 187.66  Saving Model
Training Epoch: 4 of 100..


338it [04:49,  1.17it/s]


338 Training Loss Phase: 40.50297 | Training Acc Phase: 99.19503%
84 Validation Loss Phase: 284.64410 | Validation Acc Phase: 96.28818%
Early Stopping count is at: 1 maximum is: 20
Training Epoch: 5 of 100..


338it [04:50,  1.16it/s]


338 Training Loss Phase: 32.84200 | Training Acc Phase: 99.33865%
84 Validation Loss Phase: 233.43043 | Validation Acc Phase: 96.80245%
Early Stopping count is at: 2 maximum is: 20
Training Epoch: 6 of 100..


338it [04:48,  1.17it/s]


338 Training Loss Phase: 27.09414 | Training Acc Phase: 99.45828%
84 Validation Loss Phase: 225.79697 | Validation Acc Phase: 96.90038%
Early Stopping count is at: 3 maximum is: 20
Training Epoch: 7 of 100..


338it [04:49,  1.17it/s]


338 Training Loss Phase: 22.65830 | Training Acc Phase: 99.54283%
84 Validation Loss Phase: 375.91159 | Validation Acc Phase: 95.99241%
Early Stopping count is at: 4 maximum is: 20
Training Epoch: 8 of 100..


338it [04:49,  1.17it/s]


338 Training Loss Phase: 20.33195 | Training Acc Phase: 99.59462%
84 Validation Loss Phase: 288.94266 | Validation Acc Phase: 96.39543%
Early Stopping count is at: 5 maximum is: 20
Training Epoch: 9 of 100..


338it [04:49,  1.17it/s]


338 Training Loss Phase: 18.12771 | Training Acc Phase: 99.63367%
84 Validation Loss Phase: 274.01960 | Validation Acc Phase: 96.91969%
Early Stopping count is at: 6 maximum is: 20
Training Epoch: 10 of 100..


338it [04:49,  1.17it/s]


338 Training Loss Phase: 15.74508 | Training Acc Phase: 99.68579%
84 Validation Loss Phase: 293.96703 | Validation Acc Phase: 96.73517%
Early Stopping count is at: 7 maximum is: 20
Training Epoch: 11 of 100..


338it [04:47,  1.17it/s]


338 Training Loss Phase: 13.56817 | Training Acc Phase: 99.72500%
84 Validation Loss Phase: 359.32754 | Validation Acc Phase: 96.35280%
Early Stopping count is at: 8 maximum is: 20
Training Epoch: 12 of 100..


338it [04:49,  1.17it/s]


338 Training Loss Phase: 11.41628 | Training Acc Phase: 99.77216%
84 Validation Loss Phase: 311.14849 | Validation Acc Phase: 96.84708%
Early Stopping count is at: 9 maximum is: 20
Training Epoch: 13 of 100..


338it [04:45,  1.18it/s]


338 Training Loss Phase: 10.94884 | Training Acc Phase: 99.78705%
84 Validation Loss Phase: 311.13645 | Validation Acc Phase: 96.76448%
Early Stopping count is at: 10 maximum is: 20
Training Epoch: 14 of 100..


338it [04:44,  1.19it/s]


338 Training Loss Phase: 9.75946 | Training Acc Phase: 99.80790%
84 Validation Loss Phase: 404.63814 | Validation Acc Phase: 96.13963%
Epoch 00014: reducing learning rate of group 0 to 5.0000e-05.
Early Stopping count is at: 11 maximum is: 20
Training Epoch: 15 of 100..


338it [04:44,  1.19it/s]


338 Training Loss Phase: 4.71298 | Training Acc Phase: 99.91611%
84 Validation Loss Phase: 333.35540 | Validation Acc Phase: 97.05559%
Early Stopping count is at: 12 maximum is: 20
Training Epoch: 16 of 100..


338it [04:45,  1.18it/s]


338 Training Loss Phase: 2.72536 | Training Acc Phase: 99.95648%
84 Validation Loss Phase: 325.29377 | Validation Acc Phase: 97.16018%
Early Stopping count is at: 13 maximum is: 20
Training Epoch: 17 of 100..


338it [04:44,  1.19it/s]


338 Training Loss Phase: 1.98381 | Training Acc Phase: 99.97187%
84 Validation Loss Phase: 354.66527 | Validation Acc Phase: 97.09889%
Early Stopping count is at: 14 maximum is: 20
Training Epoch: 18 of 100..


338it [04:43,  1.19it/s]


338 Training Loss Phase: 1.57729 | Training Acc Phase: 99.98048%
84 Validation Loss Phase: 342.04924 | Validation Acc Phase: 97.21280%
Early Stopping count is at: 15 maximum is: 20
Training Epoch: 19 of 100..


338it [04:46,  1.18it/s]


338 Training Loss Phase: 1.32506 | Training Acc Phase: 99.98163%
84 Validation Loss Phase: 369.90087 | Validation Acc Phase: 97.13353%
Early Stopping count is at: 16 maximum is: 20
Training Epoch: 20 of 100..


338it [04:48,  1.17it/s]


338 Training Loss Phase: 1.17439 | Training Acc Phase: 99.98527%
84 Validation Loss Phase: 378.90439 | Validation Acc Phase: 97.13153%
Early Stopping count is at: 17 maximum is: 20
Training Epoch: 21 of 100..


338it [05:00,  1.12it/s]


338 Training Loss Phase: 1.06828 | Training Acc Phase: 99.98776%
84 Validation Loss Phase: 367.36948 | Validation Acc Phase: 97.21613%
Early Stopping count is at: 18 maximum is: 20
Training Epoch: 22 of 100..


338it [04:45,  1.18it/s]


338 Training Loss Phase: 0.94571 | Training Acc Phase: 99.98742%
84 Validation Loss Phase: 388.74334 | Validation Acc Phase: 97.12420%
Early Stopping count is at: 19 maximum is: 20
Training Epoch: 23 of 100..


338it [04:47,  1.18it/s]


338 Training Loss Phase: 0.76478 | Training Acc Phase: 99.99206%
84 Validation Loss Phase: 401.01594 | Validation Acc Phase: 97.08690%
Early Stopping count is at: 20 maximum is: 20
Training Epoch: 24 of 100..


338it [04:48,  1.17it/s]


338 Training Loss Phase: 0.79704 | Training Acc Phase: 99.99123%
84 Validation Loss Phase: 387.36524 | Validation Acc Phase: 97.20947%
Early Stopping count is at: 21 maximum is: 20
Training Epoch: 25 of 100..


338it [04:48,  1.17it/s]


338 Training Loss Phase: 0.65518 | Training Acc Phase: 99.99255%
84 Validation Loss Phase: 401.36209 | Validation Acc Phase: 97.14152%
Epoch 00025: reducing learning rate of group 0 to 5.0000e-06.
Early Stopping count is at: 22 maximum is: 20
Training Epoch: 26 of 100..


338it [04:48,  1.17it/s]


338 Training Loss Phase: 0.57756 | Training Acc Phase: 99.99421%
84 Validation Loss Phase: 394.58029 | Validation Acc Phase: 97.15085%
Early Stopping count is at: 23 maximum is: 20
Training Epoch: 27 of 100..


338it [04:48,  1.17it/s]


338 Training Loss Phase: 0.58262 | Training Acc Phase: 99.99404%
84 Validation Loss Phase: 411.39350 | Validation Acc Phase: 97.10555%
Early Stopping count is at: 24 maximum is: 20
Training Epoch: 28 of 100..


338it [04:49,  1.17it/s]


338 Training Loss Phase: 0.55745 | Training Acc Phase: 99.99437%
84 Validation Loss Phase: 404.60815 | Validation Acc Phase: 97.12953%
Early Stopping count is at: 25 maximum is: 20
Training Epoch: 29 of 100..


338it [04:49,  1.17it/s]


338 Training Loss Phase: 0.55790 | Training Acc Phase: 99.99487%
84 Validation Loss Phase: 401.40203 | Validation Acc Phase: 97.18882%
Early Stopping count is at: 26 maximum is: 20
Training Epoch: 30 of 100..


35it [00:33,  1.03it/s]


KeyboardInterrupt: 

In [6]:
# running_loss_history = []
# running_corrects_history = []
# val_running_loss_history = []
# val_running_corrects_history = []

# start_timer()
# # training epochs
# for epoch in range(0, EPOCHS):

#     correct = 0.0
#     total = 0.0
#     running_loss = 0.0

#     # Outputs for HMM
#     img_path = []
#     true_labels = []
#     soft_max_out = []
#     predicted_labels = []

#     for i, data in tqdm(enumerate(trainloader, 0)):
#         # get the inputs; data is a list of [inputs, labels]
#         inputs,img_id, labels,tools  = data
#         img_path = img_path + list(img_id)


#         true_labels = true_labels + labels.tolist()
        
#         inputs, labels = inputs.to(device), labels.to(device)
#         inputs_tools = tools.to(device)


#         # zero the parameter gradients
#         optimizer.zero_grad()

#         # forward + backward + optimize
#         if USE_TOOL:
#             outputs = model(inputs, inputs_tools)
#         else:
#             outputs = model(inputs)

#         soft_max_out = soft_max_out + outputs.cpu().detach().tolist()


#         loss = criterion(outputs, labels)

#         loss.backward()
#         optimizer.step()

#         # print statistics
#         running_loss += loss.item()
#         _, predicted = outputs.max(1)
#         predicted_labels = predicted_labels + predicted.cpu().tolist()

#         total += labels.size(0)
#         correct += predicted.eq(labels).sum().item()

#     hmm_df = pd.DataFrame(
#         {'img_path': img_path,
#             'true_labels': true_labels,
#             'cnn_output': soft_max_out,
#             'predicted_labels': predicted_labels
#             })

#     print(len(trainloader), 'Training Loss: %.5f | Training Acc: %.5f%% (%d/%d)'
#             % (running_loss / len(trainloader), 100. * correct / total, correct, total))

#     model.eval()
#     val_loss = 0.0
#     val_correct = 0.0
#     val_total = 0.0

#     with torch.no_grad():
#         for j, (val_inputs,val_img_id, val_labels,val_tools) in enumerate(validationloader):
#             val_inputs, val_labels = val_inputs.to(device), val_labels.to(device)
#             val_inputs_tools = val_tools.to(device)


            

#             if USE_TOOL:
#                 val_outputs = model(val_inputs, val_inputs_tools)
#             else:
#                 val_outputs = model(val_inputs)

#             validation_loss = criterion(val_outputs, val_labels)

#             val_loss += validation_loss.item()
#             _, val_predicted = val_outputs.max(1)
#             val_total += val_labels.size(0)
#             val_correct += val_predicted.eq(val_labels).sum().item()

#         print(len(validationloader), 'Validation Loss: %.5f | Validation Acc: %.5f%% (%d/%d)'
#                 % (val_loss / len(validationloader), 100. * val_correct / val_total, val_correct, val_total))
#         scheduler.step(val_loss)
        
#         #Get average for early stopping check and call the early stopping class to perform calc
#         val_loss_avg = val_loss/len(validationloader)
#         early_stopping(val_loss_avg, model, MODEL_NAME)

#         #If limit reached stop execution
#         if early_stopping.stop:
#             break

#     epoch_loss = running_loss / len(trainloader)  # loss per epoch
#     epoch_acc = correct / total  # accuracy per epoch
#     running_loss_history.append(epoch_loss)  # appending for displaying
#     running_corrects_history.append(epoch_acc)

#     val_epoch_loss = val_loss / len(validationloader)
#     val_epoch_acc = val_correct / val_total
#     val_running_loss_history.append(val_epoch_loss)
#     val_running_corrects_history.append(val_epoch_acc)


# plt.style.use('ggplot')
# plt.plot(running_loss_history, label='training loss')
# plt.plot(val_running_loss_history, label='validation loss')
# plt.title("Training validation loss curve")
# plt.legend()
# print("Finished Training")
# end_timer_and_print()