## PyTorch Work 

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy

### Directory Structure Based Dataloader

In [87]:
data_transform = transforms.Compose([
        #transforms.RandomSizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        #transforms.Normalize(mean=[0.485, 0.456, 0.406],
        #                     std=[0.229, 0.224, 0.225])
    ])

mitosis_dataset = datasets.ImageFolder(root='../model_dev/data_sample/train', 
                                           transform=data_transform)
dataset_loader = torch.utils.data.DataLoader(mitosis_dataset,
                                             batch_size=4, shuffle=True,
                                             num_workers=4)

In [5]:
resnet18 = models.resnet18(pretrained=True)
alexnet = models.alexnet(pretrained=True)
squeezenet = models.squeezenet1_0(pretrained=True)
vgg16 = models.vgg16(pretrained=True)
#densenet = models.densenet161(pretrained=True)
#inception = models.inception_v3(pretrained=True)
#googlenet = models.googlenet(pretrained=True)
#shufflenet = models.shufflenet_v2_x1_0(pretrained=True)
#mobilenet = models.mobilenet_v2(pretrained=True)
#resnext50_32x4d = models.resnext50_32x4d(pretrained=True)
#wide_resnet50_2 = models.wide_resnet50_2(pretrained=True)
#mnasnet = models.mnasnet1_0(pretrained=True)

Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to /home/jmwolf/.cache/torch/hub/checkpoints/alexnet-owt-7be5be79.pth


  0%|          | 0.00/233M [00:00<?, ?B/s]

Downloading: "https://download.pytorch.org/models/squeezenet1_0-b66bff10.pth" to /home/jmwolf/.cache/torch/hub/checkpoints/squeezenet1_0-b66bff10.pth


  0%|          | 0.00/4.78M [00:00<?, ?B/s]

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /home/jmwolf/.cache/torch/hub/checkpoints/vgg16-397923af.pth


  0%|          | 0.00/528M [00:00<?, ?B/s]

In [45]:
dir(models)

['AlexNet',
 'ConvNeXt',
 'DenseNet',
 'EfficientNet',
 'GoogLeNet',
 'GoogLeNetOutputs',
 'Inception3',
 'InceptionOutputs',
 'MNASNet',
 'MobileNetV2',
 'MobileNetV3',
 'RegNet',
 'ResNet',
 'ShuffleNetV2',
 'SqueezeNet',
 'VGG',
 'VisionTransformer',
 '_GoogLeNetOutputs',
 '_InceptionOutputs',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '_utils',
 'alexnet',
 'convnext',
 'convnext_base',
 'convnext_large',
 'convnext_small',
 'convnext_tiny',
 'densenet',
 'densenet121',
 'densenet161',
 'densenet169',
 'densenet201',
 'detection',
 'efficientnet',
 'efficientnet_b0',
 'efficientnet_b1',
 'efficientnet_b2',
 'efficientnet_b3',
 'efficientnet_b4',
 'efficientnet_b5',
 'efficientnet_b6',
 'efficientnet_b7',
 'feature_extraction',
 'googlenet',
 'inception',
 'inception_v3',
 'mnasnet',
 'mnasnet0_5',
 'mnasnet0_75',
 'mnasnet1_0',
 'mnasnet1_3',
 'mobilenet',
 'mobilenet_v2',
 'mobilenet_v3_large',
 '

In [13]:
resnet18.fc = nn.Linear(512, 2)

In [4]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


In [13]:
# Top level data directory. Here we assume the format of the directory conforms
#   to the ImageFolder structure
data_dir = 'MITOS_Datasets/Data_CMC_COADEL_224_1/'

# Models to choose from [resnet, alexnet, vgg, squeezenet, densenet, inception]
model_name = "resnet"

# Number of classes in the dataset
num_classes = 2

# Batch size for training (change depending on how much memory you have)
batch_size = 8

# Number of epochs to train for
num_epochs = 50

# Flag for feature extracting. When False, we finetune the whole model,
#   when True we only update the reshaped layer params
feature_extract = True

In [14]:
torch.cuda.get_device_name()

'NVIDIA GeForce RTX 3080'

### Define Train Model

In [15]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

In [16]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs=25, is_inception=False):
    since = time.time()

    val_acc_history = []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    # Special case for inception because in training it has an auxiliary output. In train
                    #   mode we calculate the loss by summing the final output and the auxiliary output
                    #   but in testing we only consider the final output.
                    if is_inception and phase == 'train':
                        # From https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958
                        outputs, aux_outputs = model(inputs)
                        loss1 = criterion(outputs, labels)
                        loss2 = criterion(aux_outputs, labels)
                        loss = loss1 + 0.4*loss2
                    else:
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_acc_history.append(epoch_acc)

        print()
        
        #optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
        #scheduler = ReduceLROnPlateau(optimizer, 'min')
        #for epoch in range(10):
        #     train(...)
        #     val_loss = validate(...)
        #     # Note that step should be called after validate()
        #     scheduler.step(val_loss)
        

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, val_acc_history

### Intialize Model

In [24]:
def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
    # Initialize these variables which will be set in this if statement. Each of these
    #   variables is model specific.
    model_ft = None
    input_size = 0

    if model_name == "resnet":
        """ Resnet18
        """
        model_ft = models.resnet18(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, num_classes)
        input_size = 128#224
        
    elif model_name == "resnet152":
        """ Resnet152
        """
        model_ft = models.resnet152(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs,num_classes)
        input_size = 128#224    

    elif model_name == "alexnet":
        """ Alexnet
        """
        model_ft = models.alexnet(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
        input_size = 128#224

    elif model_name == "vgg":
        """ VGG11_bn
        """
        model_ft = models.vgg11_bn(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
        input_size = 224

    elif model_name == "squeezenet":
        """ Squeezenet
        """
        model_ft = models.squeezenet1_0(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        model_ft.classifier[1] = nn.Conv2d(512, num_classes, kernel_size=(1,1), stride=(1,1))
        model_ft.num_classes = num_classes
        input_size = 128#224

    elif model_name == "densenet":
        """ Densenet
        """
        model_ft = models.densenet121(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier.in_features
        model_ft.classifier = nn.Linear(num_ftrs, num_classes)
        input_size = 128#224

    elif model_name == "inception":
        """ Inception v3
        Be careful, expects (299,299) sized images and has auxiliary output
        """
        model_ft = models.inception_v3(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        # Handle the auxilary net
        num_ftrs = model_ft.AuxLogits.fc.in_features
        model_ft.AuxLogits.fc = nn.Linear(num_ftrs, num_classes)
        # Handle the primary net
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs,num_classes)
        input_size = 299

    else:
        print("Invalid model name, exiting...")
        exit()

    return model_ft, input_size



In [25]:
# Initialize the model for this run
# inception
# densenet
# squeezenet
# vgg
# alexnet
# resnet
model_name = "vgg"
model_ft, input_size = initialize_model(model_name, 2, feature_extract, use_pretrained=True)

# Print the model we just instantiated
print(model_ft)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU(inplace=True)
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU(inplace=True)
    (11): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (13): ReLU(inplace=True)
    (14): MaxPool2d(ke

In [22]:
model_ft.fc = nn.Linear(512, num_classes)

In [26]:
# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(input_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(input_size),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

print("Initializing Datasets and Dataloaders...")

Initializing Datasets and Dataloaders...


In [27]:
#image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'Mitosis']}

# Create training and validation dataloaders
dataloaders_dict = {x: torch.utils.data.DataLoader(mitosis_dataset, batch_size=batch_size, shuffle=True, num_workers=4) for x in ['train', 'val']}

In [28]:
# Send the model to GPU
model_ft = model_ft.to(device)

# Gather the parameters to be optimized/updated in this run. If we are
#  finetuning we will be updating all parameters. However, if we are
#  doing feature extract method, we will only update the parameters
#  that we have just initialized, i.e. the parameters with requires_grad
#  is True.
params_to_update = model_ft.parameters()
print("Params to learn:")
if feature_extract:
    params_to_update = []
    for name,param in model_ft.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)
            print("\t",name)
else:
    for name,param in model_ft.named_parameters():
        if param.requires_grad == True:
            print("\t",name)

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=0.9)

Params to learn:
	 classifier.6.weight
	 classifier.6.bias


In [29]:
model_ft

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU(inplace=True)
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU(inplace=True)
    (11): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (13): ReLU(inplace=True)
    (14): MaxPool2d(ke

In [30]:
# Setup the loss fxn
criterion = nn.CrossEntropyLoss()

# Train and evaluate
model_ft, hist = train_model(model_ft, dataloaders_dict, criterion, optimizer_ft, num_epochs=num_epochs, is_inception=(model_name=="inception"))

Epoch 0/49
----------
train Loss: 0.6713 Acc: 0.6795
val Loss: 0.5940 Acc: 0.6817

Epoch 1/49
----------
train Loss: 0.6935 Acc: 0.6748
val Loss: 0.5880 Acc: 0.6931

Epoch 2/49
----------
train Loss: 0.7013 Acc: 0.6711
val Loss: 0.5853 Acc: 0.7216

Epoch 3/49
----------
train Loss: 0.6956 Acc: 0.6754
val Loss: 0.6418 Acc: 0.7212

Epoch 4/49
----------
train Loss: 0.6943 Acc: 0.6745
val Loss: 0.6600 Acc: 0.7229

Epoch 5/49
----------
train Loss: 0.6960 Acc: 0.6752
val Loss: 0.5751 Acc: 0.7013

Epoch 6/49
----------
train Loss: 0.6935 Acc: 0.6719
val Loss: 0.5649 Acc: 0.7110

Epoch 7/49
----------
train Loss: 0.6998 Acc: 0.6717
val Loss: 0.6546 Acc: 0.7208

Epoch 8/49
----------
train Loss: 0.6988 Acc: 0.6739
val Loss: 0.5756 Acc: 0.7031

Epoch 9/49
----------
train Loss: 0.6934 Acc: 0.6766
val Loss: 0.5613 Acc: 0.7227

Epoch 10/49
----------
train Loss: 0.6944 Acc: 0.6755
val Loss: 0.5666 Acc: 0.7084

Epoch 11/49
----------
train Loss: 0.7040 Acc: 0.6721
val Loss: 0.5806 Acc: 0.6886

Ep

In [31]:
torch.save(model_ft, 'vgg_224_mitosis.pth')

In [32]:
optimizer_ft = optim.SGD(params_to_update, lr=0.0001, momentum=0.9)

In [33]:
# Setup the loss fxn
criterion = nn.CrossEntropyLoss()

# Train and evaluate
model_ft, hist = train_model(model_ft, dataloaders_dict, criterion, optimizer_ft, num_epochs=num_epochs, is_inception=(model_name=="inception"))

Epoch 0/49
----------
train Loss: 0.6214 Acc: 0.6950
val Loss: 0.5387 Acc: 0.7279

Epoch 1/49
----------
train Loss: 0.6014 Acc: 0.7016
val Loss: 0.5448 Acc: 0.7286

Epoch 2/49
----------
train Loss: 0.5846 Acc: 0.7066
val Loss: 0.5330 Acc: 0.7300

Epoch 3/49
----------
train Loss: 0.5767 Acc: 0.7119
val Loss: 0.5477 Acc: 0.7212

Epoch 4/49
----------
train Loss: 0.5727 Acc: 0.7141
val Loss: 0.5405 Acc: 0.7283

Epoch 5/49
----------
train Loss: 0.5663 Acc: 0.7169
val Loss: 0.5345 Acc: 0.7303

Epoch 6/49
----------
train Loss: 0.5641 Acc: 0.7175
val Loss: 0.5351 Acc: 0.7333

Epoch 7/49
----------
train Loss: 0.5625 Acc: 0.7176
val Loss: 0.5330 Acc: 0.7333

Epoch 8/49
----------
train Loss: 0.5624 Acc: 0.7185
val Loss: 0.5336 Acc: 0.7332

Epoch 9/49
----------
train Loss: 0.5590 Acc: 0.7174
val Loss: 0.5342 Acc: 0.7310

Epoch 10/49
----------
train Loss: 0.5635 Acc: 0.7154
val Loss: 0.5332 Acc: 0.7296

Epoch 11/49
----------
train Loss: 0.5600 Acc: 0.7199
val Loss: 0.5326 Acc: 0.7312

Ep

KeyboardInterrupt: 

In [35]:
import os
os.mkdir('data_sample')

In [39]:
%cd '../model_dev/data_sample'
!pwd

/home/jmwolf/repos/XAI_Healthcare/model_dev/data_sample
/home/jmwolf/repos/XAI_Healthcare/model_dev/data_sample


In [41]:
os.mkdir('train')
%cd '../data_sample/train'
!pwd

[Errno 2] No such file or directory: '../data_sample/train'
/home/jmwolf/repos/XAI_Healthcare/model_dev/data_sample/train
/home/jmwolf/repos/XAI_Healthcare/model_dev/data_sample/train


In [42]:
os.mkdir('Mitosis')
os.mkdir('Nonmitosis')

In [43]:
import shutil

In [44]:
path_m = '../model_dev/Data_CMC_COADEL_224_1/train/Mitosis'
path_nm = '../model_dev/Data_CMC_COADEL_224_1/train/Nonmitosis'

In [45]:
dest_path_m = '../model_dev/data_sample/train/Mitosis'
dest_path_nm = '../model_dev/data_sample/train/Nonmitosis'

In [51]:
!pwd
%cd ../../
!pwd

/home/jmwolf/repos/XAI_Healthcare/model_dev/data_sample/train
/home/jmwolf/repos/XAI_Healthcare/model_dev
/home/jmwolf/repos/XAI_Healthcare/model_dev


In [52]:
mitosis_array = os.listdir(path_m)

In [54]:
mitosis_array[0]

'8739.jpg'

In [59]:
length = len(mitosis_array)
length

10695

In [61]:
import random
import glob

In [63]:
%rm -r ../model_dev/Data_CMC_COADEL_224_1/train/Mitosis/.ipynb_checkpoints

In [64]:
filenames = random.sample(os.listdir(path_m), round((length*0.1)))
for fname in filenames:
    srcpath = os.path.join(path_m, fname)
    destpath = os.path.join(dest_path_m, fname)
    shutil.copyfile(srcpath, destpath)

In [65]:
mitosis_array = os.listdir(path_nm)
length = len(mitosis_array)
length

27256

In [66]:
filenames = random.sample(os.listdir(path_nm), round((length*0.1)))
for fname in filenames:
    srcpath = os.path.join(path_nm, fname)
    destpath = os.path.join(dest_path_nm, fname)
    shutil.copyfile(srcpath, destpath)

In [67]:
mitosis_array = os.listdir(dest_path_nm)
length = len(mitosis_array)
length

2726

In [69]:
mitosis_array = os.listdir(dest_path_m)
length = len(mitosis_array)
length

1752

In [83]:
files = glob.glob('../model_dev/data_sample/train/Nonmitosis/' + '.ipynb_checkpoints')
files

[]

In [86]:
%rm -r '../model_dev/data_sample/train/.ipynb_checkpoints'

In [89]:
#image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'Mitosis']}

# Create training and validation dataloaders
dataloaders_dict = {x: torch.utils.data.DataLoader(mitosis_dataset, batch_size=batch_size, shuffle=True, num_workers=4) for x in ['train', 'val']}

In [90]:
optimizer_ft = optim.SGD(params_to_update, lr=0.0001, momentum=0.9)
# Setup the loss fxn
criterion = nn.CrossEntropyLoss()

# Train and evaluate
model_ft, hist = train_model(model_ft, dataloaders_dict, criterion, optimizer_ft, num_epochs=num_epochs, is_inception=(model_name=="inception"))

Epoch 0/49
----------
train Loss: 0.6213 Acc: 0.6590
val Loss: 0.5878 Acc: 0.6847

Epoch 1/49
----------
train Loss: 0.6170 Acc: 0.6579
val Loss: 0.5844 Acc: 0.6894

Epoch 2/49
----------
train Loss: 0.6211 Acc: 0.6541
val Loss: 0.5873 Acc: 0.6883

Epoch 3/49
----------
train Loss: 0.6212 Acc: 0.6525
val Loss: 0.5819 Acc: 0.6907

Epoch 4/49
----------
train Loss: 0.6166 Acc: 0.6588
val Loss: 0.5838 Acc: 0.6965

Epoch 5/49
----------
train Loss: 0.6188 Acc: 0.6570
val Loss: 0.5861 Acc: 0.6912

Epoch 6/49
----------
train Loss: 0.6197 Acc: 0.6501
val Loss: 0.5810 Acc: 0.6878

Epoch 7/49
----------
train Loss: 0.6130 Acc: 0.6563
val Loss: 0.5812 Acc: 0.6903

Epoch 8/49
----------
train Loss: 0.6162 Acc: 0.6599
val Loss: 0.5782 Acc: 0.6891

Epoch 9/49
----------
train Loss: 0.6116 Acc: 0.6637
val Loss: 0.5774 Acc: 0.6889

Epoch 10/49
----------
train Loss: 0.6133 Acc: 0.6563
val Loss: 0.5772 Acc: 0.6954

Epoch 11/49
----------
train Loss: 0.6131 Acc: 0.6628
val Loss: 0.5772 Acc: 0.6903

Ep