In [None]:
# Bibliotecas Python
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from glob import glob
from PIL import Image

# Bibliotecas Pytorch
import torch
from torch import optim,nn
from torch.autograd import Variable
from torch.utils.data import DataLoader,Dataset
from torchvision import models,transforms

# Bibliotecas Sklearn 
from sklearn.model_selection import train_test_split

In [None]:
# Define Argumentos a serem utilizados no treinamento 
args = {'batch_size': 50,     #Tamanho do batch
        'lr': 1e-3,           #Taxa de aprendizado 
        'weight_decay': 5e-4, #Penalidade L2
        'epoch_num': 100,     #Numero de epocas
        'num_workers': 2      
}

if torch.cuda.is_available():
  args['device'] = torch.device('cuda')
else:
  args['device'] = torch.device('cpu')

print(args['device'])

cuda


In [None]:
# Código para montar o diretório do Google Drive 
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Acessa os arquivos no local do drive e monta um path com o caminho de cada imagem
data_dir = "/content/drive/MyDrive/dataset/"
all_image_path = glob(os.path.join(data_dir, '*', '*.jpg'))
imageid_path_dict = {os.path.splitext(os.path.basename(x))[0]: x for x in all_image_path}
lesion_type_dict = {
    'nv': 'Nevo Melanotico',
    'mel': 'Melanoma',
    'bkl': 'Queratose Seborreia',
    'bcc': 'Carcinoma',
    'akiec': 'Queratose Actinica',
    'vasc': 'Lesao Vascular',
    'df': 'Dermatofibroma'
}

In [None]:
# Código para calcular média e desvio padrão, foi utilizado uma vez e salvo os valores para o HAM10000
def calcular_img_mean_std(image_paths):   
    img_h, img_w = 224, 224
    imgs = []
    means, stdevs = [], []

    for i in tqdm(range(len(image_paths))):
        img = cv2.imread(image_paths[i])
        img = cv2.resize(img, (img_h, img_w))
        imgs.append(img)

    imgs = np.stack(imgs, axis=3)
    print(imgs.shape)

    imgs = imgs.astype(np.float32) / 255.

    for i in range(3):
        pixels = imgs[:, :, i, :].ravel() 
        means.append(np.mean(pixels))
        stdevs.append(np.std(pixels))

    means.reverse()  # BGR --> RGB 
    stdevs.reverse()

    print("normMean = {}".format(means))
    print("normStd = {}".format(stdevs))
    return means,stdevs

In [None]:
# Aciona a função para norma e desvio 
norm_mean,norm_std = compute_img_mean_std(all_image_path)

In [None]:
# Identifica se tem lesões com id duplicados 
def get_duplicates(x):
    unique_list = list(df_undup['lesion_id'])
    if x in unique_list:
        return 'unduplicated'
    else:
        return 'duplicated'

In [None]:
# Pré processamento dos metadados 
df_original = pd.read_csv(os.path.join(data_dir, 'HAM10000_metadata.csv'))
df_original['path'] = df_original['image_id'].map(imageid_path_dict.get)
df_original['cell_type'] = df_original['dx'].map(lesion_type_dict.get)
df_original['cell_type_idx'] = pd.Categorical(df_original['cell_type']).codes
df_original.head()

In [None]:

df_undup = df_original.groupby('lesion_id').count()
df_undup = df_undup[df_undup['image_id'] == 1]
df_undup.reset_index(inplace=True)
df_undup.head()


# Aplica a função para verificar duplicações para os Id's 
df_original['duplicates'] = df_original['lesion_id']
df_original['duplicates'] = df_original['duplicates'].apply(get_duplicates)
df_original.head()
df_original['duplicates'].value_counts()
df_undup = df_original[df_original['duplicates'] == 'unduplicated']
df_undup.shape

# Separação de teste
y = df_undup['cell_type_idx']
_, df_val = train_test_split(df_undup, test_size=0.2, random_state=101, stratify=y)
df_val.shape

df_val['cell_type_idx'].value_counts()


In [None]:
# Este conjunto será df_original excluindo todas as linhas que estão no conjunto val
# Esta função identifica se uma imagem faz parte do conjunto train ou val.
def get_val_rows(x):
    val_list = list(df_val['image_id'])
    if str(x) in val_list:
        return 'val'
    else:
        return 'train'

# identificar linhas de trem e val
df_original['train_or_val'] = df_original['image_id']
df_original['train_or_val'] = df_original['train_or_val'].apply(get_val_rows)
# df de treino
df_train = df_original[df_original['train_or_val'] == 'train']
print(len(df_train))
print(len(df_val))

In [None]:
df_train['cell_type_idx'].value_counts()

In [None]:
df_val['cell_type'].value_counts()

In [None]:
# DataAugmentation realizado para o balanceamento das classes
data_aug_rate = [15,10,5,50,0,40,5]
for i in range(7):
    if data_aug_rate[i]:
        df_train=df_train.append([df_train.loc[df_train['cell_type_idx'] == i,:]]*(data_aug_rate[i]-1), ignore_index=True)
df_train['cell_type'].value_counts()

In [None]:
df_train = df_train.reset_index()
df_val = df_val.reset_index()

In [None]:
df_val.to_csv('val_depois_aug.csv')
df_train.to_csv('train_depois_aug.csv')

In [None]:
# Definição das 3 Arquiteturas Pré-Carregadas pela bilbioteca Pytorch 
def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
    model_ft = None
    input_size = 0

    if model_name == "resnet":
        #Resnet50
        
        model_ft = models.resnet50(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, num_classes)
        input_size = 224


    elif model_name == "vgg":
        # VGG11_bn
        
        model_ft = models.vgg11_bn(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
        input_size = 224


    elif model_name == "densenet":
        # Densenet121
        
        model_ft = models.densenet121(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier.in_features
        model_ft.classifier = nn.Linear(num_ftrs, num_classes)
        input_size = 224

    else:
        print("Invalid model name, exiting...")
        exit()
    return model_ft, input_size

def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

In [None]:
# Define a Arquitetura a ser treinada 
model_name = 'densenet'
num_classes = 7
feature_extract = False

model_ft, input_size = initialize_model(model_name, num_classes, feature_extract, use_pretrained=True)
model = model_ft.to(args['device'])

cuda:0


In [None]:
# Realiza as transformações dos sets de treino e validação 
# Os valores de média e desvio feitas pela função calcular_img_mean_std

norm_mean = (0.76310134, 0.5456841, 0.57007784)
norm_std = (0.14092982, 0.1526007, 0.16996273)


train_transform = transforms.Compose([transforms.Resize((input_size,input_size)),transforms.RandomHorizontalFlip(),
                                      transforms.RandomVerticalFlip(),transforms.RandomRotation(20),
                                      transforms.ColorJitter(brightness=0.1, contrast=0.1, hue=0.1),
                                        transforms.ToTensor(), transforms.Normalize(norm_mean, norm_std)])

val_transform = transforms.Compose([transforms.Resize((input_size,input_size)), transforms.ToTensor(),
                                    transforms.Normalize(norm_mean, norm_std)])

In [None]:
# Define dataloader classe para carregamento das imagens
class HAM10000(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        # Load data and get label
        X = Image.open(self.df['path'][index])
        y = torch.tensor(int(self.df['cell_type_idx'][index]))

        if self.transform:
            X = self.transform(X)

        return X, y

In [None]:
training_set = HAM10000(df_train, transform=train_transform)
train_loader = DataLoader(training_set, batch_size=args['batch_size'], shuffle=True, num_workers=args['num_workers'])

validation_set = HAM10000(df_val, transform=train_transform)
val_loader = DataLoader(validation_set, batch_size=args['batch_size'], shuffle=False, num_workers=args['num_workers'])

In [None]:
# Adam Otimizados and Cross Entropy critério de perda
optimizer = optim.Adam(model.parameters(), lr=num_workers=args['lr'])
criterion = nn.CrossEntropyLoss().to(device)

In [None]:
# Realiza calculos dos resultados 
class AverageMeter(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [None]:
total_loss_train, total_acc_train = [],[]
def train(train_loader, model, criterion, optimizer, epoch):
    model.train()
    train_loss = AverageMeter()
    train_acc = AverageMeter()
    curr_iter = (epoch - 1) * len(train_loader)
    for i, data in enumerate(train_loader):
        images, labels = data
        N = images.size(0)
        # print('image shape:',images.size(0), 'label shape',labels.size(0))
        images = Variable(images).to(device)
        labels = Variable(labels).to(device)

        optimizer.zero_grad()
        outputs = model(images)

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        prediction = outputs.max(1, keepdim=True)[1]
        train_acc.update(prediction.eq(labels.view_as(prediction)).sum().item()/N)
        train_loss.update(loss.item())
        curr_iter += 1
        if (i + 1) % 100 == 0:
            print('[epoch %d], [iter %d / %d], [train loss %.5f], [train acc %.5f]' % (
                epoch, i + 1, len(train_loader), train_loss.avg, train_acc.avg))
            total_loss_train.append(train_loss.avg)
            total_acc_train.append(train_acc.avg)
    return train_loss.avg, train_acc.avg

In [None]:
def validate(val_loader, model, criterion, optimizer, epoch):
    model.eval()
    val_loss = AverageMeter()
    val_acc = AverageMeter()
    with torch.no_grad():
        for i, data in enumerate(val_loader):
            images, labels = data
            N = images.size(0)
            images = Variable(images).to(device)
            labels = Variable(labels).to(device)

            outputs = model(images)
            prediction = outputs.max(1, keepdim=True)[1]

            val_acc.update(prediction.eq(labels.view_as(prediction)).sum().item()/N)

            val_loss.update(criterion(outputs, labels).item())

    print('------------------------------------------------------------')
    print('[epoch %d], [val loss %.5f], [val acc %.5f]' % (epoch, val_loss.avg, val_acc.avg))
    print('------------------------------------------------------------')
    return val_loss.avg, val_acc.avg

In [None]:
epoch_num = args['epoch_num']
best_val_acc = 0
total_loss_val, total_acc_val = [],[]
for epoch in range(1, epoch_num+1):
    loss_train, acc_train = train(train_loader, model, criterion, optimizer, epoch)
    loss_val, acc_val = validate(val_loader, model, criterion, optimizer, epoch)
    total_loss_val.append(loss_val)
    total_acc_val.append(acc_val)
    if acc_val > best_val_acc:
        best_val_acc = acc_val
        print('*****************************************************')
        print('best record: [epoch %d], [val loss %.5f], [val acc %.5f]' % (epoch, loss_val, acc_val))
        print('*****************************************************')

print("Final Process!!!")

In [None]:
print(len(total_loss_train))

110


**SOME PLOTS**



In [None]:
plt.figure(figsize=(20, 9))
plt.plot(total_loss_train, label='Train Loss')
plt.plot(total_acc_train, label='Train Accuracy', linewidth=3, alpha=0.5)
plt.xlabel('Epochs', fontsize=16)
plt.ylabel('Loss', fontsize=16)
plt.title('VGG Training', fontsize=16)
plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(20, 9))
plt.plot(total_loss_val, label='Validation Loss')
plt.plot(total_acc_val, label='Validation Accuracy', linewidth=3, alpha=0.5)
plt.xlabel('Epochs', fontsize=16)
plt.ylabel('Loss', fontsize=16)
plt.title('VGG Validation', fontsize=16)
plt.legend()
plt.show()

In [None]:
fig = plt.figure(num = 2)
fig1 = fig.add_subplot(2,1,1)
fig2 = fig.add_subplot(2,1,2)
fig1.plot(total_loss_train, label = 'training loss')
fig1.plot(total_acc_train, label = 'training accuracy')
fig2.plot(total_loss_val, label = 'validation loss')
fig2.plot(total_acc_val, label = 'validation accuracy')
plt.legend()
plt.show()

**SAVE MODEL**

In [None]:
# Salva o modelo treinado para deploy
torch.save(model, 'model_vgg')