In [95]:
import os, sys
from PIL import Image
import cv2
import pickle
import numpy as np
import pandas as pd

import torch, torchvision
from torch import optim, nn
from torch.autograd import Variable
from torch.utils.data import DataLoader, Dataset
from torchvision import models, transforms

## Training Consts
MODEL_DIR = ''
MODEL_NAMES = ['vgg','densenet']
DEVICE = torch.device('cpu')  ### CHANGE THIS!!!!!!!!!
NUM_EPOCHS = 15
BATCH_SIZE = 32

## Data Handling Consts
HAM_DIR = '/Users/cz/Desktop/S3 CV/Project/[C]HAM10000'
ALL_IMG_FPS = [os.path.join(HAM_DIR,'Train',f) for f \
               in os.listdir(os.path.join(HAM_DIR,'Train'))]
ALL_IMG_IDS = [os.path.splitext(os.path.basename(f))[0] \
               for f in ALL_IMG_FPS]

IMG_SIZE = 224
NORM_MEAN = [0.7630423088417134, 0.5456486014607426, 0.5700468609021178]
NORM_STD = [0.0891409288333237, 0.11792632289606514, 0.1324623088597418]
CLASSES_TO_FULLNAMES = {
    'NV': 'Melanocytic nevi',
    'MEL': 'dermatofibroma',
    'BKL': 'Benign keratosis-like lesions ',
    'BCC': 'Basal cell carcinoma',
    'AKIEC': 'Actinic keratoses',
    'VASC': 'Vascular lesions',
    'DF': 'Dermatofibroma'
}
CLASSES = ['MEL','NV','BCC','AKIEC','BKL','DF','VASC']

In [96]:
### Data Methods and Containers
class HAM10k(Dataset):
    def __init__(self, dataframe, transform=None):
        self.df = dataframe
        self.transform = transform
    def __len__(self):
        return len(self.df)
    def __getitem__(self, idx):
        X = Image.open(self.df['path'].iloc[idx])
        y = torch.tensor(int(self.df['label'].iloc[idx]))
        if self.transform:
            X = self.transform(X)
        return X, y
        

### Instantiate Data Constants
# Datasets
df_dict = {'id': [], 'label': [], 'path': []}
with open(os.path.join(HAM_DIR,'Labels.csv'),'r') as f:
    for idx, line in enumerate(f):
        if idx == 0: continue
        line = line.rstrip()
        comps = line.split(',')
        for i in range(1,8):
            if '1' in comps[i]:
                df_dict['label'].append(i-1)
                break
        df_dict['id'].append(comps[0])
        df_dict['path'].append(os.path.join(HAM_DIR,'Train',comps[0] + '.jpg'))
DF = pd.DataFrame(df_dict); 

train_ids = []
with open(os.path.join(HAM_DIR,'TrainSplits','train.txt'),'r') as f:
    for line in f:
        train_ids.append(line.rstrip())
TRAIN_DF = DF.loc[DF['id'].isin(train_ids)]
# train_transform = transforms.Compose([transforms.Resize((IMG_SIZE,IMG_SIZE)),
#                                       transforms.RandomHorizontalFlip(),
#                                       transforms.RandomVerticalFlip(),
#                                       transforms.RandomRotation(20),
#                                       transforms.ColorJitter(brightness=0.1, contrast=0.1, hue=0.1),
#                                       transforms.ToTensor(), 
#                                       transforms.Normalize(NORM_MEAN, NORM_STD)])
train_transform = transforms.Compose([transforms.ToTensor()])
TRAIN_SET = HAM10k(TRAIN_DF, train_transform)

val_ids = []
with open(os.path.join(HAM_DIR,'TrainSplits','val.txt'),'r') as f:
    for line in f:
        val_ids.append(line.rstrip())
val_transform = transforms.Compose([transforms.Resize((IMG_SIZE,IMG_SIZE)), 
                                    transforms.ToTensor(),
                                    transforms.Normalize(NORM_MEAN, NORM_STD)])
VAL_DF = DF.loc[DF['id'].isin(val_ids)]
VAL_SET = HAM10k(VAL_DF, val_transform)


# Cleanup
del df_dict, train_ids, val_ids


In [97]:
### Model Functions
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
    # Model specific variables
    model_ft = None
    input_size = 0

    if model_name == "vgg": #VGG w/BN
        # model_ft = models.vgg11_bn(pretrained=use_pretrained)
        model_ft = models.vgg11_bn()
        model_ft.load_state_dict(torch.load(os.path.join('.','Models','vgg11_bn.pth')))
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
        input_size = 224
    elif model_name == "densenet": # Dense-121
        # model_ft = models.densenet121(pretrained=use_pretrained)
        model_ft = models.densenet121()
        model_ft.load_state_dict(torch.load(os.path.join('.','Models','densenet121.pth')))
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier.in_features
        model_ft.classifier = nn.Linear(num_ftrs, num_classes)
        input_size = 224
    else:
        print("Invalid model name, exiting...")
        sys.exit()
    return model_ft, input_size


In [101]:
### Training Functions: Train, Keep Stats, Evaluate

def train_model(train_loader, model, criterion, optimizer,
                epochs=10, tracker=None):
    model.train()
    
    for epoch in range(epochs):
        print(f'=========\nTraining Epoch {epoch+1}\n========\n')
        for i, data in enumerate(train_loader):
            images, labels = data
            images = Variable(images).to(DEVICE)
            labels = Variable(labels).to(DEVICE)
            N = images.size(0)
            
            optimizer.zero_grad()
            output = model(images)
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()
            
            prediction = output.max(1, keepdim=True)[1]
            
            # Print status
            if i % 100 == 0:
                tacc = prediction.eq(labels.view_as(prediction)).sum().item()/N
                print(f'[Epoch {epoch+1}], [Iter {i+1}/{len(train_loader)+1}], '
                      f'[TrnLoss {loss.item():.4}], [TrnAcc {tacc:.4}]')
                tracker.iter_update(loss.item(),tacc)
                
            break
              
              
class StatTracker:
    def __init__(self):
        self.iter_train_losses = []
        self.iter_train_acc = []
        self.full_train_acc = []
        self.full_val_acc = []
    def iter_update(self, tloss, tacc):
        self.iter_train_losses.append(tloss)
        self.iter_train_acc.append(tacc)
    def full_update(self, tacc, vacc):
        self.full_train_acc.append(tacc)
        self.full_val_acc.append(vacc)
              
        

In [102]:
"""
Fine tune (1) DenseNet (2) VGG.
Record statistics every epoch.
"""
def train_models():
    # Data Handling
    train_loader = DataLoader(TRAIN_SET,
                              batch_size=BATCH_SIZE,
                              shuffle=False,
                              num_workers=0)
    val_loader = DataLoader(VAL_SET,
                            batch_size=BATCH_SIZE,
                            shuffle=False,
                            num_workers=0)
    trackers = []
    
    # Train
    for modelname in MODEL_NAMES:
        model_ft, input_size = initialize_model(modelname, 
                                                len(CLASSES), 
                                                feature_extract=False, 
                                                use_pretrained=True)
        model = model_ft.to(DEVICE)
        optimizer = optim.Adam(model.parameters(), lr=1e-3)
        criterion = nn.CrossEntropyLoss().to(DEVICE)
        tracker = StatTracker()
        
        train_model(train_loader,
                    model,
                    criterion,
                    optimizer,
                    epochs=NUM_EPOCHS,
                    tracker=tracker)
        trackers.append(tracker)
        torch.save({'epoch': NUM_EPOCHS,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict()}, 
                    f'./{modelname}-ep{NUM_EPOCHS}.pth')
    
    with open('stats.pkl', 'wb') as f:
        pickle.dump(trackers, f)
    


In [None]:
if __name__ == '__main__':
    train_models()

Training Epoch 1

