### Train Transformer

Paper: https://pubs.rsna.org/doi/suppl/10.1148/ryai.210174 

Code: https://github.com/tarakapoor/thyroid_deep_learning

Dataset: https://stanfordaimi.azurewebsites.net/datasets/a72f2b02-7b53-4c5d-963c-d7253220bfd5

In [1]:
#req
import pandas as pd
from PIL import Image
import os
import os.path

import numpy as np
import re
from pathlib import Path
import tables
import cv2
import h5py
import math
import random
import csv

#data aug
from albumentations.pytorch import ToTensorV2
#from albumentations.pytorch import ToTensor
import albumentations as A

import torch.optim as optim
import torch.utils.data as data
import torch
from torch.utils.data import DataLoader
from sklearn.utils import resample
from sklearn.metrics import roc_auc_score, accuracy_score, roc_curve, f1_score

from tqdm import tqdm_notebook as tqdm
import h5py
import cv2
from PIL import Image



## Define Transformer (transformer_model.py)

In [2]:
import torch
import torch.nn as nn
import torchvision
import torchvision.models as models
import torchvision.transforms as transforms 

import math
import numpy as np
import os
from os import path
import operator
import pandas as pd
import glob
import random
from torch import Tensor
import time
from numpy import sqrt
from numpy import argmax

#https://github.com/pytorch/pytorch/issues/24826
class PositionalEncoder(torch.nn.Module):
    def __init__(self, d_model, max_seq_len=72):
        super().__init__()
        self.d_model = d_model
        pe = torch.zeros(max_seq_len, d_model) #36, 256
        for pos in range(max_seq_len): #in range 36 (each patient feature vector)
            for i in range(0, d_model, 2): #in range [256] within feature vector
                pe[pos, i] = \
                    math.sin(pos / (10000 ** ((2 * i) / d_model)))
                pe[pos, i + 1] = \
                    math.cos(pos / (10000 ** ((2 * (i + 1)) / d_model)))
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)

    def forward(self, x):
        with torch.no_grad():
            x = x * math.sqrt(self.d_model)
            seq_len = x.size(1)
            pe = self.pe[:, :seq_len]
            x = x + pe
            return x
        

class TransformerModel(nn.Module):
    def __init__(self,in_size,use_position_enc,
                 n_heads=2,
                 n_encoders=2,
                 num_outputs=2,
                ):
        """Custom Transformer Model class with encoder.
        Keyword arguments:
        in_size -- number of features in one frame's feature vector
        use_position_enc -- whether to use positional encoding
        n_heads -- number of (self-attention + feed forward) heads in an encoder
        n_encoders -- number of encoders
        num_outputs -- number of output classes (binary 0 or 1 means 2 output classes)"""
        
        super(TransformerModel, self).__init__()

        self.in_size=in_size
        self.use_position_enc = use_position_enc #positional encoding or not?
        
        self.encoder_layer =nn.TransformerEncoderLayer(in_size, 
                                                       n_heads,)
        
        self.encoder=nn.TransformerEncoder(self.encoder_layer, n_encoders)
#        self.decoder=nn.TransformerDecoder(self.decoder_layer, 2)

        print("transformer in size", self.in_size)

        self.classifier = nn.Linear(in_size, num_outputs)
        self.pos_embd=PositionalEncoder(in_size)
        
    def forward(self, x):
        """Forward function for Transformer Model.
        Reshapes input to encoder input (S, N, E) : S = sequence or # frame feature vectors fed in from patient (36), N = batch size (1), E = # of features (in_size, 256)"""
        x = x.permute(1, 0, 2) #now [S, 1, 256]
        
        if(self.use_position_enc): #positional encoding choice
            x = self.pos_embd(x)
        
        x = self.encoder(x)
        out = self.classifier(x)
        return out
    

**data transforms**

In [3]:
def normalize(col):
    """NORMALIZE 2D FEATURES from CSV file (manually extracted).
    Add minimum value to all feature values to make minimum 0, then normalize so all feature values between 0 and 1.
    Input is column of feature values (1 feature value per frame) for a given feature, output is normalized column of feature values."""

    minfeat = np.nanmin(col)
    if(minfeat < 0):
        for row in range(len(col)):
            col[row] = col[row] + (-1*minfeat)
    #min feature should now be 0 (no negatives)
    
    maxfeat = np.nanmax(col)
    if(maxfeat != 0):
        for row in range(len(col)):
            if(math.isnan(col[row])):
                col[row] = 0.0
            col[row] = (col[row]/maxfeat)
    return col    


In [4]:
def process2dfeatures(vertconcat, cvphase, features2dpath, phase):
    """Extract manual 2d features, normalize features by column.
    Keyword arguments:
    vertconcat -- whether to vertically concatenate (true) or horizontally concatenate (false) manual 2d features.
    cvphase -- cross validation fold (0 to 4)
    features2dpath -- path to manual 2d features.
    Return lists of feature vectors by frame, label, patient ID and frame number within patient."""

    if(vertconcat):
        featurelength = 256
    else:
        featurelength = 256
    cur_all_concats = np.zeros((0,256)) #reset to empty
    concatpats = []
    concatlabs = []
    concatframenums = []
    
    if(phase == "train"):
        if(cvphase == 0):
            curfold = 3
        elif(cvphase == 1):
            curfold = 4
        else:
            curfold = cvphase - 2
    elif(phase == "val"):
        if(cvphase == 4):
            curfold = 0
        else:
            curfold = cvphase + 1
    elif(phase == "test"):
        curfold = cvphase
    elif(phase == "trainval"):
        if(cvphase == 0):
            curfold = 3
        elif(cvphase == 1):
            curfold = 4
        else:
            curfold = cvphase - 2

    rowcount = 0
    #numberind = 0
    with open(features2dpath, newline='') as infh:
        print("opened csv to concatenate manual features")
        reader = csv.reader(infh)
        rowcount = 0

        mypats = []
        curstartsubtract = 0
        curind = 1
        for row in reader:
            if(row[2] != '0' and row[2] != '1'):
                print("error col label:", row[2])
                continue

            #numberind = (row[0].index('_'))+1
            curpatindexfull = row[0]
            concatpats.append(row[1])
            if(row[1] not in mypats):
                mypats.append(row[1])
                curind = 1 #first framenum, renumber
            concatframenums.append(curind)#(curpatindexfull[numberind:])
            concatlabs.append(row[2])

            concats = np.array(row[3:], dtype=float)
            #extrapad = np.zeros((1, featurelength-256))
            if(vertconcat):
                if(len(concats)<featurelength):
                    concats = np.append(concats, extrapad) #pad to 256
            
            cur_all_concats = np.vstack((cur_all_concats, concats)) #should have length 256
            if(rowcount % 5000 == 0):
                print("row:", rowcount, "curind", curind)
            #end loop, now new loop
            rowcount += 1
            curind += 1 #framenum
        print("rowcount", rowcount)

    #loop through columns and normalize
    for colnum in range(len(cur_all_concats[0])):
        cur_all_concats[:, colnum] = normalize(cur_all_concats[:, colnum])
        
    return cur_all_concats, concatpats, concatlabs, concatframenums

In [5]:
def load_csv_padded_transformer_horiz_concat(phase, cvphase, frametype, project_home_dir, features2dpath):
    """Extract features from CNN csv file for given phase and cvphase, stack based on frametype.
    Manual 2d features concatenated horizontally here (102 added to 256 features to have 358 per frame feature vector).
    Keyword arguments:
    phase -- train, val, trainval or test (which data to use)
    cvphase -- cross validation fold (0 to 4)
    frametype -- adjacent, equalspaced or singleframe (whether to stack frames or not, and how if so)
    Return (for given cross validation fold and train/val/trainval/test phase) lists of all feature vectors, labels, patient IDs and frame numbers within patient."""
    foundcount = 0
    
    trainvalfile = features2dpath
    filename = trainvalfile
    
    curfold = 0
    print("CVPHASE:", cvphase, "\ncsv:", filename)
    if(phase == "train"):
        curfile = filename
        if(cvphase == 0):
            curfold = 3
        elif(cvphase == 1):
            curfold = 4
        else:
            curfold = cvphase - 2
    elif(phase == "val"):
        curfile = valfile
        if(cvphase == 4):
            curfold = 0
        else:
            curfold = cvphase + 1
    elif(phase == "test"):
        curfile = testfile
        curfold = cvphase
    elif(phase == "trainval"):
        curfile = trainvalfile
        if(cvphase == 0):
            curfold = 3
        elif(cvphase == 1):
            curfold = 4
        else:
            curfold = cvphase - 2
    
    featurelength = 256+102
        
    cur_all_pat_inds = []
    cur_all_patients = []
    cur_all_labels = []
    cur_all_probs = np.zeros((0,featurelength))
    
    #col 0 = framenum, col 1 = patient id, col 2 = label, rest = probabilities
    rowcount = 0
    with open(curfile, newline='') as infh:
        print("opened csv for cnn features")
        reader = csv.reader(infh)
        rowcount = 0
        for row in reader:
            if(row[3] == ''):
                print("ERROR", row[2])
                continue
            cur_all_pat_inds.append(row[0])
            cur_all_labels.append(row[2])
            cur_all_patients.append(row[1])
            probs = np.array(row[3:], dtype=float)
            probs = np.append(probs, np.zeros((102)))
            
            if(rowcount % 500 == 0):
                print("row:", rowcount)
            
            cur_all_probs = np.vstack((cur_all_probs, probs))
            rowcount += 1
        print("rowcount", rowcount)
            
    print("length of probs, labels, ids", len(cur_all_probs), len(cur_all_labels), len(cur_all_patients))
    
    #get number of images per patient
    distinct_patient_ids = []
    distinct_num_pats = []
    
    #SORT IN ORDER OF FRAME NUM NOW, BEFORE PADDING!
    cur_all_pat_inds = np.array(cur_all_pat_inds).astype(np.float64)
    print("type of cur_all_pat_inds", type(cur_all_pat_inds))
    templist = list(zip(cur_all_patients, cur_all_pat_inds, cur_all_probs, cur_all_labels))
    sortedlist = sorted(templist, key=operator.itemgetter(0, 1))
    cur_all_patients, cur_all_pat_inds, cur_all_probs, cur_all_labels = zip(*sortedlist)
    print("\n\n\n should be sorted (framenums, patient id, label):", cur_all_pat_inds[0:10], cur_all_patients[0:10], cur_all_labels[0:10])
    
    print("\n\nLOADING", phase,"PHASE")
    maxpat = ""
    maxpatcount = 0
    curpatcount = 0
    
    for patt in cur_all_patients:
        if (patt not in distinct_patient_ids):
            curpatcount = 0
            distinct_patient_ids.append(patt)
            for xz in range(len(cur_all_patients)):                
                if(cur_all_patients[xz] == patt):
                    curpatcount += 1
                    if(curpatcount >= maxpatcount):
                        maxpat = patt
                        maxpatcount = curpatcount
                        
            distinct_num_pats.append(curpatcount)
            print("patient", patt, ": CNN extracted # of frames =", curpatcount)
    
    print("cur all probs shape", np.shape(cur_all_probs))
    print("num patients:", len(distinct_patient_ids), " total length:", len(cur_all_patients))
    

    cur_all_concats, _, _, _ = process2dfeatures(False, cvphase, features2dpath, "train")
    _, concatpats, concatlabs, concatframenums = cur_all_probs, cur_all_patients, cur_all_labels, cur_all_pat_inds


    #need to now add to probs the next two rows and then average before the for loop
    #THIS IS FOR ADJACENT FRAMES!
    print("from concat features: pats", concatpats[int(len(concatpats)/3)])

    for looppats in range(len(cur_all_patients)): #every single index of each individual feature vector instance. should be thousands
        if(looppats % 1000 == 0):
            print("looping through all feature vectors, index", looppats)
        thecurpat = cur_all_patients[looppats]
        thecurframenum = cur_all_pat_inds[looppats]

        patientind = concatpats.index(thecurpat) #find first instance of this patient in concat patient list
        lastpatientind = len(concatpats) - 1 - concatpats[::-1].index(thecurpat) #find last instance of this patient in patient list

        avgfeatures = np.zeros((3,256))
        found = False
        for gg in range(patientind, lastpatientind):
            avgfeatures = np.zeros((3,256))

            if(int(cur_all_pat_inds[looppats]) == int(concatframenums[gg])): #corresponding first frame #
                if(gg>=(len(cur_all_concats)-2)):
                    print("over limit; gg, features", gg, "len avg features and concats", len(avgfeatures), len(cur_all_concats[gg]))
                    gg-=2
                foundcount += 1
                found = True
                avgfeatures[0] = cur_all_concats[gg]#1st row of concat probs
                avgfeatures[1] = cur_all_concats[gg+1]#add 2nd row of concat probs
                avgfeatures[2] = cur_all_concats[gg+2]#add 3rd row of concat probs

                avgfeatures = np.average(avgfeatures, axis=0)
                avgfeatures = np.array(avgfeatures)
                print(avgfeatures.shape)
                cur_all_probs[looppats][-256:] = avgfeatures

        patienttot = len(cur_all_patients) - 1 - cur_all_patients[::-1].index(thecurpat) - cur_all_patients.index(thecurpat)
        if(not found):
            print("framenum notmatching", thecurpat, "cnn framenum:", cur_all_pat_inds[looppats], "vs.", concatframenums[patientind], "to", concatframenums[lastpatientind])#:lastpatientind])

        """#below is for appending first frame's features, not average of 3 stacked frames
        for gg in range(patientind, lastpatientind):
            if(cur_all_pat_inds[gg] == curpatindexfull[numberind:]):
                cur_all_probs[gg][256:] = probs #np.vstack((cur_all_probs, probs))#np.stack((cur_all_probs, probs))
                #print("ERROR SHAPE IS WRONG!!! in row", rowcount, len(cur_all_probs[gg][256:]), "vs. len probs", len(probs))
        """
    print("\n\nFOUND total of", foundcount,"corresponding 2d features vs. total of", len(cur_all_probs), "frames")

    tempp = list(zip(distinct_num_pats, distinct_patient_ids)) 
    tempp = sorted(tempp) 
    distinct_num_pats, distinct_patient_ids = zip(*tempp)
    
    print(phase, "PATIENTS:", distinct_patient_ids)
    print("cvphase", cvphase,"numpatients input:", len(distinct_patient_ids))
    
    curpatprobs = np.zeros((0,featurelength))
    curpatlabs = []
    curpatids = []
    curpatframenums = []
    
    allfeatures = np.zeros((0, featurelength))
    alllabels = []
    allids = []
    allframenums = []
    
    #ADD INDIVIDUAL PATIENT VECTORS TO ALLFEATURES VECTOR
    #pad and stack
    counttt = 0
    seq_len = 36

    for patind in range(len(distinct_patient_ids)):
        curpatprobs = np.zeros((0,featurelength))
        curpatlabs = []
        curpatids = []
        curpatframenums = []
        
        pat = distinct_patient_ids[patind]
        for p in range(len(cur_all_patients)):
            if (cur_all_patients[p] == pat):
                curprobs = cur_all_probs[p]
                curprobs = curprobs.reshape((1,featurelength))
                curlabels = cur_all_labels[p]
                curids = cur_all_patients[p]
                curframenums = cur_all_pat_inds[p]
                
                curpatprobs = np.concatenate((curpatprobs, curprobs), axis=0)
                curpatlabs = np.append(curpatlabs, curlabels)
                curpatids = np.append(curpatids, curids)
                curpatframenums = np.append(curpatframenums, curframenums)
        
        
        numimgs = len(curpatprobs)
        goodlenprobs = np.zeros((seq_len,featurelength))
        #PAD NOW
        if (numimgs > seq_len):
            num_seqs = 1
            lencurseq = 36
            while((numimgs / num_seqs) > seq_len):
                num_seqs += 1
            
            lencurseq = numimgs / num_seqs
            
            for g in range(num_seqs):
                #split into groups of less than 36 vectors of length 256
                start = int(g*lencurseq)
                end = int(((g+1)*lencurseq))
                if(g<(num_seqs)-1):
                    currentseq = curpatprobs[start:end]
                    currentlabs = curpatlabs[start:end]
                    currentids = curpatids[start:end]
                    currentframes = curpatframenums[start:end]
                else:
                    currentseq = curpatprobs[start:]
                    currentlabs = curpatlabs[start:]
                    currentids = curpatids[start:]
                    currentframes = curpatframenums[start:]
                    
                #pad each to 36
                currentseq = torch.nn.utils.rnn.pad_sequence([torch.from_numpy(currentseq), torch.from_numpy(goodlenprobs)], batch_first=True, padding_value=0.0)
                currentseq = np.array(currentseq[0])
                while(len(currentlabs)<seq_len):
                    currentlabs = np.append(currentlabs, currentlabs[0]) #or just '0'?
                    currentids = np.append(currentids, currentids[0])
                    currentframes = np.append(currentframes, 0)
                if((g==0) and (patind == 0)):
                    print("shape of padded sequence, labels, ids:", np.shape(currentseq), np.shape(currentlabs), np.shape(currentids), np.shape(currentframes))
                
                #add 36 at a time from curpat probs to allfeatures
                allfeatures = np.concatenate((allfeatures, currentseq), axis=0)
                alllabels = np.append(alllabels, currentlabs)
                allids = np.append(allids, currentids)
                allframenums = np.append(allframenums, currentframes)
            
        elif (numimgs < seq_len):
            #just pad to 36
            diff = seq_len-len(curpatprobs)
            while(len(curpatlabs)<seq_len):
                curpatlabs = np.append(curpatlabs, curpatlabs[0])
                curpatids = np.append(curpatids, curpatids[0])
                curpatframenums = np.append(curpatframenums, curpatframenums[0])#0)

            currentseq = torch.nn.utils.rnn.pad_sequence([torch.from_numpy(curpatprobs), torch.from_numpy(goodlenprobs)], batch_first=True, padding_value=0.0)
            currentseq = np.array(currentseq[0])
            
            #add the 36 from curpatprobs to allfeatures
            allfeatures = np.concatenate((allfeatures, currentseq), axis=0)
            alllabels = np.append(alllabels, curpatlabs)
            allids = np.append(allids, curpatids)
            allframenums = np.append(allframenums, curpatframenums)

        else:
            allfeatures = np.concatenate((allfeatures, curpatprobs), axis=0)
            alllabels = np.append(alllabels, curpatlabs)
            allids = np.append(allids, curpatids)
            allframenums = np.append(allframenums, curpatframenums)
        
        counttt+= 1
        
    print("example features:\n", allfeatures[0][0], allfeatures[0][256], allfeatures[0][300])
    print("\nFINAL features, labels, ids shapes", np.shape(allfeatures), np.shape(alllabels), np.shape(allids), np.shape(allframenums))
    return(allfeatures, alllabels, allids, allframenums) #return feature vectors and labels and patient ids for transformer



In [6]:
!ls backbone_embeddings

mobilenet_embeddings_ALL_IMAGES.csv  ViT_embeddings_FOLD0.csv
mobilenet_embeddings_FOLD0.csv	     ViT_embeddings_FOLD1.csv
mobilenet_embeddings_FOLD1.csv	     ViT_embeddings_FOLD2.csv
mobilenet_embeddings_FOLD2.csv	     ViT_embeddings_FOLD3.csv
mobilenet_embeddings_FOLD3.csv	     ViT_embeddings_FOLD4.csv
mobilenet_embeddings_FOLD4.csv


In [7]:
class DatasetPaddedTransformer(data.Dataset):
    def __init__(self, phase, cvphase, concat_features, vertconcat, frametype, project_home_dir, all):
        """Data loader for Transformer Model."""
        super(DatasetPaddedTransformer, self).__init__()
        
        if all == False:
            features2dpath = "backbone_embeddings/mobilenet_embeddings_FOLD%s.csv" % cvphase
        else:
            features2dpath = "backbone_embeddings/mobilenet_embeddings_ALL_IMAGES.csv"
            

        self.phase = phase
        if(True): #horizontal concat or vertical concatenated 2d features
            if(vertconcat):
                self.all_frames, self.all_labels, self.all_annot_ids, self.all_frame_nums = load_csv_padded_transformer_vertical_concat(phase, cvphase, frametype, project_home_dir, features2dpath)
            else:
                self.all_frames, self.all_labels, self.all_annot_ids, self.all_frame_nums = load_csv_padded_transformer_horiz_concat(phase, cvphase, frametype, project_home_dir, features2dpath)
        else: #no concatenated 2d features
            self.all_frames, self.all_labels, self.all_annot_ids, self.all_frame_nums = load_csv_padded_transformer(phase, cvphase, frametype, project_home_dir)
                
        self.seqlength = 36
        self.start_indices = []
        numiters = len(self.all_frames) / self.seqlength
        print("numiters", numiters)
        for x in range(int(numiters)):
            self.start_indices.append(x*self.seqlength)
        print("done reading in images and labels for", phase, "!!!\n\n")
        print("start indices (should be multiples of sequence length only):", self.start_indices)
        print("all frames:", len(self.all_frames), np.shape(self.all_frames))
    
    
    #getitem is called 'batch_size' number of times in one iteration of the epoch
    def __getitem__(self, i):
        startind = self.start_indices[i]
        try:
            endind = self.start_indices[i+1] #this is the first of the NEXT patient so do NOT INCLUDE!
        except: #last patient
            endind = len(self.all_labels)
        
        img_frames = self.all_frames[startind:endind] #feature vectors. img_frames should be size (numimgsinpatient, 2)
        annot_ids = self.all_annot_ids[startind:endind] #should be all the same, annot_ids should be size (numimgsinpatient)
        intannot_ids = []
        
        #create labels for images
        labels = torch.LongTensor(endind-startind)
        for l in range(0, endind-startind):
            labels[l] = int(self.all_labels[startind+l]) #should be all the same, labels should be size (numimgsinpatient)
            intannot_ids.append(int(annot_ids[l][:-1])) #make into integer
        
        inputs = torch.from_numpy(img_frames).float()
        
        intannot_ids = np.array(intannot_ids)
        intannot_ids = torch.from_numpy(intannot_ids)
        
        framenums = self.all_frame_nums[startind:endind] #frame nums, should be in order

        for i in range(len(framenums)):
            framenums[i] = float(framenums[i])
        framenums = np.array(framenums, dtype=np.float32)
        framenums = torch.from_numpy(framenums).float()
        
        return {'input': inputs, 'label': labels, 'annot_id': intannot_ids, 'framenum': framenums}

    def __len__(self):
        return len(self.all_frames)


In [8]:
test_dataset = DatasetPaddedTransformer("train", 0, True, False, "adjacent", ".", True)

CVPHASE: 0 
csv: backbone_embeddings/mobilenet_embeddings_ALL_IMAGES.csv
opened csv for cnn features
row: 0
row: 500
row: 1000
row: 1500
row: 2000
row: 2500
row: 3000
row: 3500
row: 4000
row: 4500
row: 5000
row: 5500
rowcount 5661
length of probs, labels, ids 5661 5661 5661
type of cur_all_pat_inds <class 'numpy.ndarray'>



 should be sorted (framenums, patient id, label): (2.0, 5.0, 8.0, 11.0, 14.0, 17.0, 20.0, 23.0, 26.0, 29.0) ('100_', '100_', '100_', '100_', '100_', '100_', '100_', '100_', '100_', '100_') ('0', '0', '0', '0', '0', '0', '0', '0', '0', '0')


LOADING train PHASE
patient 100_ : CNN extracted # of frames = 13
patient 101_ : CNN extracted # of frames = 20
patient 102_ : CNN extracted # of frames = 29
patient 103_ : CNN extracted # of frames = 22
patient 104_ : CNN extracted # of frames = 33
patient 105_ : CNN extracted # of frames = 14
patient 106_ : CNN extracted # of frames = 20
patient 108_ : CNN extracted # of frames = 40
patient 109_ : CNN extracted # of frames = 

## Training

In [9]:
 patientwise_auroc = 0
#learning rate
lrs = []

seed_value = 1
torch.manual_seed(seed_value)

losses = []
#f_losses = []
losses_val = []
#f_losses_val = []

running_loss = 0.0
running_loss_val = 0.0

total_all = 0
correct_all = 0

transall_labels = []
all_probs_ones = []
transall_patients = [] #val
train_transall_patients = [] #train

tlabelsnp = np.zeros(16)

epoch_aurocs = []

#default for early stopping
min_val_loss = 10
prev_val_loss = 10
epochs_no_improve = 0
n_epochs_stop = 5
early_stop = False
min_epoch = 0

In [10]:
!nvidia-smi

Wed Dec 13 04:07:08 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.113.01             Driver Version: 535.113.01   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 3080        Off | 00000000:08:00.0  On |                  N/A |
|  0%   40C    P8              46W / 370W |    339MiB / 10240MiB |     22%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [11]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("device", device)

device cpu


In [12]:
model = TransformerModel(256+102, use_position_enc= False)#number of features in each.

for param in model.parameters():
    param.requires_grad = True

transformer in size 358




In [13]:
#adding weights to loss function because of imbalance in dataset
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.SGD(model.parameters(), 0.001, momentum=0.9)
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, 0.001, 100, 50)

In [14]:
transformer_train_set = DatasetPaddedTransformer("train", 1, True, False, "adjacent", ".", False)
transformer_test_set = DatasetPaddedTransformer("train", 2, True, False, "adjacent", ".", False)

CVPHASE: 1 
csv: backbone_embeddings/mobilenet_embeddings_FOLD1.csv
opened csv for cnn features
row: 0
row: 500
row: 1000
rowcount 1136
length of probs, labels, ids 1136 1136 1136
type of cur_all_pat_inds <class 'numpy.ndarray'>



 should be sorted (framenums, patient id, label): (3.0, 6.0, 9.0, 12.0, 15.0, 18.0, 21.0, 24.0, 27.0, 30.0) ('102_', '102_', '102_', '102_', '102_', '102_', '102_', '102_', '102_', '102_') ('0', '0', '0', '0', '0', '0', '0', '0', '0', '0')


LOADING train PHASE
patient 102_ : CNN extracted # of frames = 29
patient 105_ : CNN extracted # of frames = 14
patient 109_ : CNN extracted # of frames = 40
patient 110_ : CNN extracted # of frames = 15
patient 116_ : CNN extracted # of frames = 46
patient 11_ : CNN extracted # of frames = 55
patient 125_ : CNN extracted # of frames = 39
patient 126_ : CNN extracted # of frames = 11
patient 138_ : CNN extracted # of frames = 36
patient 143_ : CNN extracted # of frames = 51
patient 160_ : CNN extracted # of frames = 49
p

**training loop**

In [15]:
train_set_loader = DataLoader(dataset=transformer_train_set, num_workers=0, batch_size=1, shuffle=False)
val_set_loader = DataLoader(dataset=transformer_test_set, num_workers=0, batch_size=1, shuffle=False)

In [16]:
#START TRAINING!
numpatsval = 10
numpatstrain = 10
for epoch in tqdm(range(100)):
    #DOING THIS BY EPOCH FOR AUROC
    transall_labels = []
    all_probs_ones = []
    transall_patients = []
    train_transall_patients = []

    correct = 0
    total = 0
    traincorrect = 0
    traintotal = 0
    valcount = 0.0
    traincount = 0.0

    
#        UPDATE LEARNING RATE ONCE PER EPOCH
    if(epoch > 0):
        scheduler.step()

     #this calls getitem (for each i in train_set_loader)
    print("num iterations of train:", len(train_set_loader))
    model.train() #train mode
    for i, data in enumerate(train_set_loader):
        
        if(i >= (numpatstrain-1)):
            print("DONE at batch number:", i, "and ending now")
            break

        inputs = data['input'].to(device)
        labels = data['label'].to(device)
        annot_ids = data['annot_id']
        framenums = data['framenum']
        
        optimizer.zero_grad()
        #get batch size out of all
        labels = labels.squeeze(0)
        annot_ids = annot_ids.squeeze(0)
        
        train_transall_patients = np.append(train_transall_patients, annot_ids[0])
        if (i % 100 == 0):
            print("in train:", inputs.shape, labels.shape, annot_ids.shape)
            print("batch index {}, 0/1 distribution: {}/{}".format(i, len(np.where(labels.cpu().numpy() == 0)[0]),
        len(np.where(labels.cpu().numpy() == 1)[0])))
            
        # forward + backward + optimize (to find good parameters: weights + bias)
        outputs = model(inputs).to(device)
        #now squeeze outt he 1 from the outputs shape [x, 1, 2]
        outputs = outputs.squeeze(1)

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        traincount += 1.0
        

        sf = nn.Softmax(dim=1) #makes items in a row add to 1; dim = 0 makes items in a column add to 1
        outputs = sf(outputs)
        _, trainpredicted = torch.max(outputs, 1)

        traincorrect += (trainpredicted.cpu() == labels.cpu()).sum()


        # Total number of labels
        try:
            traintotal += len(labels.cpu())
        except:
            print(i, "index len 0 trainlabels; input shape", np.shape(inputs.cpu().numpy()))

        trainaccuracy = traincorrect / traintotal

        model.train()
        #calculate val accuracy

        with torch.no_grad():
            print("num iterations of val:", len(val_set_loader))
            for j, dataa in enumerate(val_set_loader):

                if(j >= (numpatsval-1)):
                    print("val DONE at batch number:", j, "and ending now")
                    break

                tinputs = dataa['input'].to(device)
                tlabels = dataa['label'].to(device)
                tannot_ids = dataa['annot_id']

                tlabels = tlabels.squeeze(0)
                tannot_ids = tannot_ids.squeeze(0)
    
                # Forward pass only to get logits/output
                outs = model(tinputs)
                outs = outs.squeeze(1)


                sf = nn.Softmax(dim=1) #makes items in a row add to 1; dim = 0 makes items in a column add to 1
                outs = sf(outs)

                _, predicted = torch.max(outs, 1)

                outs_ones = outs.detach().cpu().numpy()[:, 1]
                tlabelsnp = tlabels.cpu().numpy()
                transall_labels = np.append(transall_labels, tlabelsnp)
                
                if(j%100 == 0 or (tlabelsnp[0] == "1" and j%20 == 0)):
                    if(np.isnan(outs.detach().cpu().numpy()).any()):
                        print("val inputs:", tinputs.detach().cpu().numpy())
                        print("val outputs:", outs.detach().cpu().numpy(), outs_ones, "vs. labels:", tlabelsnp)
                    


                all_probs_ones = np.append(all_probs_ones, outs_ones)

                tannot_ids = np.asarray(tannot_ids)
                transall_patients = np.append(transall_patients, tannot_ids)

                #  USE GPU FOR MODEL
                # Total correct predictions

                correct += (predicted.cpu() == tlabels.cpu()).sum()
                total += len(tlabels.cpu())

                #end of torch no grad
                accuracy = 100 * correct / total

            
            #print statistics
            message = 'epoch: %d,  with loss: %.5f, train_acc: %.4f, val accuracy: %.3f' % (
                epoch, loss, trainaccuracy.data, accuracy.data)
            print(message)
            
        patients = [] #distinct patients
        for p in range(len(transall_patients)):
            if not (transall_patients[p] in patients):
                patients.append(transall_patients[p])

        patient_ave_preds = []
        patientlabels = []
        count = 0
        sum_pat_pred = 0
        cur_pat_labels = []
        patientwise_auroc = 0

        while(count < len(patients)):
            for p in range(len(transall_patients)):
                if (transall_patients[p] == patients[count]): #one patient at a time
                    sum_pat_pred += all_probs_ones[p]
                    cur_pat_label = transall_labels[p]
                    cur_pat_labels.append(transall_labels[p])
            patient_ave_preds.append(sum_pat_pred / float(len(cur_pat_labels)))
            patientlabels.append(cur_pat_labels[0])
            
            count += 1
            cur_pat_labels = []
            sum_pat_pred = 0
        patientwise_auroc = roc_auc_score(patientlabels, patient_ave_preds)
        print("AUC %.4f" % patientwise_auroc)
        model.train() #back to train mode, end of validation mode


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for epoch in tqdm(range(100)):


  0%|          | 0/100 [00:00<?, ?it/s]

num iterations of train: 1908
in train: torch.Size([1, 36, 358]) torch.Size([36]) torch.Size([36])
batch index 0, 0/1 distribution: 36/0
num iterations of val: 1908
val DONE at batch number: 9 and ending now
epoch: 0,  with loss: 0.27013, train_acc: 1.0000, val accuracy: 78.395
AUC 1.0000
num iterations of val: 1908
val DONE at batch number: 9 and ending now
epoch: 0,  with loss: 1.46884, train_acc: 0.5139, val accuracy: 78.858
AUC 1.0000
num iterations of val: 1908
val DONE at batch number: 9 and ending now
epoch: 0,  with loss: 0.39668, train_acc: 0.6667, val accuracy: 79.321
AUC 1.0000
num iterations of val: 1908
val DONE at batch number: 9 and ending now
epoch: 0,  with loss: 0.35783, train_acc: 0.7500, val accuracy: 79.090
AUC 1.0000
num iterations of val: 1908
val DONE at batch number: 9 and ending now
epoch: 0,  with loss: 0.38507, train_acc: 0.7889, val accuracy: 78.951
AUC 1.0000
num iterations of val: 1908
val DONE at batch number: 9 and ending now
epoch: 0,  with loss: 0.341