In [6]:
import os
import shutil
import pandas as pd
from PIL import Image
import numpy as np
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
from sklearn.model_selection import StratifiedKFold
import matplotlib.pyplot as plt
import warnings
import seaborn as sns

img_rows = 224
img_cols = 224
color_type = 3
batch_size=48
epochs=300
subject='Melanoma'
main_path=os.path.join("E:\\kaggle_imgs",subject)
img_path=os.path.join(main_path,"images")
data_path=os.path.join(main_path,"Data")
saved_path=os.path.join(main_path,"saved_models")
paths=[main_path, img_path,saved_path,data_path]
for fp in paths:
    print(fp)
    if not os.path.exists(fp):        
        os.mkdir(fp)
file_path=os.path.join(saved_path,"200621_")
file_best=os.path.join(saved_path,"200621__epoch_ 0_acc_92.00")

train_img_pkl=os.path.join(data_path,"train_imgs.npy")
test_img_pkl=os.path.join(data_path,"test_imgs.npy")
train_info_pkl=os.path.join(data_path,"train_folds.csv")

num_classes=4

E:\kaggle_imgs\Melanoma
E:\kaggle_imgs\Melanoma\images
E:\kaggle_imgs\Melanoma\saved_models
E:\kaggle_imgs\Melanoma\Data


In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms 

In [8]:
import os
import torch
import albumentations

import numpy as np
import pandas as pd

import torch.nn as nn
from sklearn import metrics
from sklearn import model_selection
from torch.nn import functional as F

from wtfml.utils import EarlyStopping
from wtfml.engine import Engine
from wtfml.data_loaders.image import ClassificationLoader

import pretrainedmodels

In [28]:
class SEResnext50_32x4d(nn.Module):
    def __init__(self, pretrained='imagenet'):
        super(SEResnext50_32x4d, self).__init__()
        
        self.base_model = pretrainedmodels.__dict__[
            "se_resnext50_32x4d"
        ](pretrained=None)
#         if pretrained is not None:
#             self.base_model.load_state_dict(
#                 torch.load(
#                     "../input/pretrained-model-weights-pytorch/se_resnext50_32x4d-a260b3a4.pth"
#                 )
#             )

        self.l0 = nn.Linear(2048, 1)
    
    def forward(self, image, targets):
        batch_size, _, _, _ = image.shape
        
        x = self.base_model.features(image)
        x = F.adaptive_avg_pool2d(x, 1).reshape(batch_size, -1)
        
        out = self.l0(x)
        loss = nn.BCEWithLogitsLoss()(out, targets.view(-1, 1).type_as(x))

        return out, loss

## Load train Data


In [23]:
def read_train_info():
    fp=train_info_pkl
    if False and os.path.exists(fp):
        try:
            print('loading train data from csv', flush=True)
            df=pd.read_csv(fp)
            print('complete!', flush=True)
        except EOFError:
            print('EOFError raised.', flush=True)
        
    else:
        df=pd.read_csv(data_path+"/train.csv")
        df["kfold"] = -1    
        df = df.sample(frac=1).reset_index(drop=True)
        y = df.target.values
        kf = model_selection.StratifiedKFold(n_splits=5)

        for f, (t_, v_) in enumerate(kf.split(X=df, y=y)):
            df.loc[v_, 'kfold'] = f
        
    return df

df=read_train_info()
df.to_csv(train_info_pkl,index=False)

In [24]:
df

Unnamed: 0,image_name,patient_id,sex,age_approx,anatom_site_general_challenge,diagnosis,benign_malignant,target,kfold
0,ISIC_5314855,IP_0663261,female,70.0,torso,unknown,benign,0,0
1,ISIC_6982423,IP_4966841,male,50.0,torso,unknown,benign,0,0
2,ISIC_4132000,IP_8171635,male,70.0,head/neck,unknown,benign,0,0
3,ISIC_4501708,IP_3434100,female,60.0,torso,unknown,benign,0,0
4,ISIC_5998926,IP_3078108,male,50.0,lower extremity,unknown,benign,0,0
...,...,...,...,...,...,...,...,...,...
33121,ISIC_4006569,IP_3169043,male,75.0,torso,unknown,benign,0,4
33122,ISIC_7302580,IP_6512345,male,70.0,lower extremity,unknown,benign,0,4
33123,ISIC_2410842,IP_4669427,female,35.0,torso,unknown,benign,0,4
33124,ISIC_1925590,IP_1676499,female,30.0,torso,unknown,benign,0,4


In [30]:
def train(fold):
    training_data_path = img_path+"/train3/"
    df = pd.read_csv(train_info_pkl)
    device = "cuda"
    epochs = 2#50
    train_bs = 32
    valid_bs = 16

    df_train = df[df.kfold != fold].reset_index(drop=True)
    df_valid = df[df.kfold == fold].reset_index(drop=True)

    model = SEResnext50_32x4d(pretrained="imagenet")
    model.to(device)

    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.224, 0.225)
    train_aug = albumentations.Compose(
        [
            albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True),
            albumentations.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.1, rotate_limit=15),
            albumentations.Flip(p=0.5)
        ]
    )

    valid_aug = albumentations.Compose(
        [
            albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True)
        ]
    )

    train_images = df_train.image_name.values.tolist()
    train_images = [os.path.join(training_data_path, i + ".jpg") for i in train_images]
    train_targets = df_train.target.values

    valid_images = df_valid.image_name.values.tolist()
    valid_images = [os.path.join(training_data_path, i + ".jpg") for i in valid_images]
    valid_targets = df_valid.target.values

    train_dataset = ClassificationLoader(
        image_paths=train_images,
        targets=train_targets,
        resize=None,
        augmentations=train_aug,
    )

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=train_bs, shuffle=True, num_workers=0
    )

    valid_dataset = ClassificationLoader(
        image_paths=valid_images,
        targets=valid_targets,
        resize=None,
        augmentations=valid_aug,
    )

    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=valid_bs, shuffle=False, num_workers=0
    )

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        patience=3,
        threshold=0.001,
        mode="max"
    )

    es = EarlyStopping(patience=5, mode="max")

    for epoch in range(epochs):
        train_loss = Engine.train(train_loader, model, optimizer, device=device)
        predictions, valid_loss = Engine.evaluate(
            valid_loader, model, device=device
        )
        predictions = np.vstack((predictions)).ravel()
        auc = metrics.roc_auc_score(valid_targets, predictions)
        print(f"Epoch = {epoch}, AUC = {auc}")
        scheduler.step(auc)

        es(auc, model, model_path=saved_path+f"model_fold_{fold}.bin")
        if es.early_stop:
            print("Early stopping")
            break

In [39]:
def predict(fold):
    test_data_path =img_path+"/test3/"
    df = pd.read_csv(data_path+"/test.csv")
    device = "cuda"
    model_path=saved_path+f"/model_fold_{fold}.bin"

    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.224, 0.225)
    aug = albumentations.Compose(
        [
            albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True)
        ]
    )

    images = df.image_name.values.tolist()
    images = [os.path.join(test_data_path, i + ".jpg") for i in images]
    targets = np.zeros(len(images))

    test_dataset = ClassificationLoader(
        image_paths=images,
        targets=targets,
        resize=None,
        augmentations=aug,
    )

    test_loader = torch.utils.data.DataLoader(
        test_dataset, batch_size=16, shuffle=False, num_workers=4
    )

    model = SEResnext50_32x4d(pretrained=None)
    model.load_state_dict(torch.load(model_path))
    model.to(device)

    predictions = Engine.predict(test_loader, model, device=device)
    predictions = np.vstack((predictions)).ravel()

    return predictions

In [29]:
train(0)

100%|███████████████████████████████████████████████████████████████████| 829/829 [09:43<00:00,  1.42it/s, loss=0.0878]
100%|███████████████████████████████████████████████████████████████████| 415/415 [01:31<00:00,  4.54it/s, loss=0.0768]
  0%|                                                                                          | 0/829 [00:00<?, ?it/s]

Epoch = 0, AUC = 0.8138120393459155
Validation score improved (-inf --> 0.8138120393459155). Saving model!


100%|███████████████████████████████████████████████████████████████████| 829/829 [06:17<00:00,  2.20it/s, loss=0.0803]
100%|███████████████████████████████████████████████████████████████████| 415/415 [00:37<00:00, 11.08it/s, loss=0.0766]
  0%|                                                                                          | 0/829 [00:00<?, ?it/s]

Epoch = 1, AUC = 0.8334127762611399
Validation score improved (0.8138120393459155 --> 0.8334127762611399). Saving model!


 11%|███████▋                                                            | 93/829 [00:43<05:44,  2.13it/s, loss=0.0732]


KeyboardInterrupt: 

In [31]:
train(1)


100%|███████████████████████████████████████████████████████████████████| 829/829 [06:21<00:00,  2.17it/s, loss=0.0893]
100%|███████████████████████████████████████████████████████████████████| 415/415 [00:40<00:00, 10.27it/s, loss=0.0773]
  0%|                                                                                          | 0/829 [00:00<?, ?it/s]

Epoch = 0, AUC = 0.8018446607085151
Validation score improved (-inf --> 0.8018446607085151). Saving model!


100%|███████████████████████████████████████████████████████████████████| 829/829 [06:22<00:00,  2.17it/s, loss=0.0796]
100%|████████████████████████████████████████████████████████████████████| 415/415 [00:36<00:00, 11.31it/s, loss=0.078]

Epoch = 1, AUC = 0.7996447889129641
EarlyStopping counter: 1 out of 5





In [32]:
train(2)


100%|███████████████████████████████████████████████████████████████████| 829/829 [06:15<00:00,  2.21it/s, loss=0.0872]
100%|███████████████████████████████████████████████████████████████████| 415/415 [00:36<00:00, 11.23it/s, loss=0.0814]
  0%|                                                                                          | 0/829 [00:00<?, ?it/s]

Epoch = 0, AUC = 0.7713596415194449
Validation score improved (-inf --> 0.7713596415194449). Saving model!


100%|███████████████████████████████████████████████████████████████████| 829/829 [06:16<00:00,  2.20it/s, loss=0.0798]
100%|███████████████████████████████████████████████████████████████████| 415/415 [00:36<00:00, 11.24it/s, loss=0.0844]


Epoch = 1, AUC = 0.777943254587385
Validation score improved (0.7713596415194449 --> 0.777943254587385). Saving model!


In [33]:
train(3)


100%|███████████████████████████████████████████████████████████████████| 829/829 [06:15<00:00,  2.21it/s, loss=0.0885]
100%|████████████████████████████████████████████████████████████████████| 415/415 [00:36<00:00, 11.29it/s, loss=0.104]
  0%|                                                                                          | 0/829 [00:00<?, ?it/s]

Epoch = 0, AUC = 0.7355811913279646
Validation score improved (-inf --> 0.7355811913279646). Saving model!


100%|███████████████████████████████████████████████████████████████████| 829/829 [06:15<00:00,  2.21it/s, loss=0.0802]
100%|███████████████████████████████████████████████████████████████████| 415/415 [00:37<00:00, 11.19it/s, loss=0.0835]


Epoch = 1, AUC = 0.8420248057617449
Validation score improved (0.7355811913279646 --> 0.8420248057617449). Saving model!


In [34]:
train(4)

100%|███████████████████████████████████████████████████████████████████| 829/829 [06:15<00:00,  2.21it/s, loss=0.0865]
100%|███████████████████████████████████████████████████████████████████| 415/415 [00:36<00:00, 11.31it/s, loss=0.0849]
  0%|                                                                                          | 0/829 [00:00<?, ?it/s]

Epoch = 0, AUC = 0.7719611365892866
Validation score improved (-inf --> 0.7719611365892866). Saving model!


100%|███████████████████████████████████████████████████████████████████| 829/829 [06:14<00:00,  2.21it/s, loss=0.0807]
100%|███████████████████████████████████████████████████████████████████| 415/415 [00:37<00:00, 11.21it/s, loss=0.0763]


Epoch = 1, AUC = 0.8103990355065954
Validation score improved (0.7719611365892866 --> 0.8103990355065954). Saving model!


In [40]:
p1 = predict(0)
p2 = predict(1)
p3 = predict(2)
p4 = predict(3)
p5 = predict(4)


100%|████████████████████████████████████████████████████████████████████████████████| 687/687 [00:47<00:00, 14.56it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 687/687 [00:46<00:00, 14.92it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 687/687 [00:46<00:00, 14.87it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 687/687 [00:45<00:00, 15.02it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 687/687 [00:45<00:00, 14.98it/s]


In [37]:
predictions = (p1 + p2 + p3 + p4 + p5) / 5
sample = pd.read_csv(data_path+"/sample_submission.csv")
sample.loc[:, "target"] = predictions
sample.to_csv("submission.csv", index=False)