In [None]:
# Version 12 GPU / Resnext 32x4d / 89% / basic aug

In [None]:
# !pip install xgboost

import numpy as np
import pandas as pd

from sklearn.datasets import load_iris
import xgboost as xgb
from sklearn.metrics import accuracy_score

In [None]:
train= pd.read_csv('../input/siim-isic-melanoma-classification/train.csv')
test= pd.read_csv('../input/siim-isic-melanoma-classification/test.csv')
sub   = pd.read_csv('../input/siim-isic-melanoma-classification/sample_submission.csv')
train.head()


In [None]:
train['sex'] = train['sex'].fillna('na')
train['age_approx'] = train['age_approx'].fillna(0)
train['anatom_site_general_challenge'] = train['anatom_site_general_challenge'].fillna('na')

test['sex'] = test['sex'].fillna('na')
test['age_approx'] = test['age_approx'].fillna(0)
test['anatom_site_general_challenge'] = test['anatom_site_general_challenge'].fillna('na')

In [None]:
train['sex'] = train['sex'].astype("category").cat.codes +1
train['anatom_site_general_challenge'] = train['anatom_site_general_challenge'].astype("category").cat.codes +1
train.head()

In [None]:
test['sex'] = test['sex'].astype("category").cat.codes +1
test['anatom_site_general_challenge'] = test['anatom_site_general_challenge'].astype("category").cat.codes +1
test.head()

In [None]:
x_train = train[['sex', 'age_approx','anatom_site_general_challenge']]
y_train = train['target']


x_test = test[['sex', 'age_approx','anatom_site_general_challenge']]


In [None]:
clf = xgb.XGBClassifier(n_estimators=2000, 
                        max_depth=8, 
                        objective='multi:softprob',
                        seed=0,  
                        nthread=-1, 
                        learning_rate=0.15, 
                        num_class = 2, 
                        scale_pos_weight = (32542/584))

In [None]:
clf.fit(x_train, y_train)

In [None]:
sub.target = clf.predict_proba(x_test)[:,1]
sub.to_csv("./sub_xgboost_0817.csv")

In [None]:
# TPU
!curl https://raw.githubusercontent.com/pytorch/xla/master/contrib/scripts/env-setup.py -o pytorch-xla-env-setup.py
!python pytorch-xla-env-setup.py --version nightly --apt-packages libomp5 libopenblas-dev
!pip install wtfml==0.0.3
!pip install efficientnet_pytorch

In [None]:
import warnings
import torch_xla
import torch_xla.debug.metrics as met
import torch_xla.distributed.data_parallel as dp
import torch_xla.distributed.parallel_loader as pl
import torch_xla.utils.utils as xu
import torch_xla.core.xla_model as xm 
import torch_xla.distributed.xla_multiprocessing as xmp
import torch_xla.test.test_utils as test_utils
import warnings
import gc
from wtfml.utils import EarlyStopping
from sklearn.metrics import roc_auc_score

warnings.filterwarnings("ignore")

In [None]:
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision

import cv2

import numpy as np 
import pandas as pd
import os

from torch.utils.data import DataLoader,TensorDataset,Dataset
import matplotlib.pyplot as plt
import albumentations
from sklearn import model_selection
from sklearn.metrics import roc_auc_score
from efficientnet_pytorch import EfficientNet

In [None]:
from efficientnet_pytorch import EfficientNet
class Effi(nn.Module):
    def __init__(self):
        super(Effi, self).__init__()
        self.base_model = EfficientNet.from_pretrained(
            'efficientnet-b4'
        )
        self.base_model._fc = nn.Linear(
            in_features=1792, 
            out_features=1, 
            bias=True
        )
        
    def forward(self, image):
        out = self.base_model(image)
#         loss = nn.BCEWithLogitsLoss()(out, targets.view(-1, 1).type_as(out))
        return out#, loss

In [None]:
## New TPU Model
class EffNet(nn.Module):
    def __init__(self,model='b4'):
        super(EffNet,self).__init__()
        
        model_name = 'efficientnet' + model
        self.feature = EfficientNet.from_pretrained("efficientnet-b4")
        self.drop = nn.Dropout(0.3)
        self.l0 = nn.Linear(1792,1) # b3 - 1536 b2 - 1408
        
        
    def forward(self,img):
        batch_size = img.shape[0]
        
        x = self.feature.extract_features(img)
        #print(x.shape)
        
        x = nn.functional.adaptive_avg_pool2d(x,1).reshape(batch_size,-1)
        #print(x.shape)
        
        x = self.drop(x)
        #print(x.shape)
        out = self.l0(x)
        #print(out.shape)
        
        return out

In [None]:
def make_csv():
    df = pd.read_csv("../input/siim-isic-melanoma-classification/train.csv")
    df["fold"] = -1    
    df = df.sample(frac=1).reset_index(drop=True)
    y = df.target.values
    kf = model_selection.StratifiedKFold(n_splits=5)

    for f, (t_, v_) in enumerate(kf.split(X=df, y=y)):
        df.loc[v_, 'fold'] = f

    df.to_csv("train_kfold.csv", index=False)
    return df
df=make_csv()

In [None]:
#Augmentation
from albumentations import ( HorizontalFlip, IAAPerspective, ShiftScaleRotate, 
CLAHE, RandomRotate90, Transpose, ShiftScaleRotate, Blur, OpticalDistortion, 
GridDistortion, HueSaturationValue, IAAAdditiveGaussianNoise, GaussNoise, 
MotionBlur, MedianBlur, RandomBrightnessContrast, IAAPiecewiseAffine, IAASharpen, 
IAAEmboss, Flip, OneOf, Compose, Rotate, Cutout, HorizontalFlip, Normalize ) 

mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)
def get_aug(mode="train"):
    if mode =="train":
        aug=Compose([Rotate(15),
            OneOf([IAAAdditiveGaussianNoise(),GaussNoise(),], p=0.2),
            #OneOf([MotionBlur(p=0.2),MedianBlur(blur_limit=3, p=0.1),Blur(blur_limit=3, p=0.1),], p=0.2),
            ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.2, rotate_limit=15, p=0.2),
            OneOf([OpticalDistortion(p=0.3),GridDistortion(p=0.1),IAAPiecewiseAffine(p=0.3),], p=0.2),
            OneOf([
                CLAHE(clip_limit=2),
                IAASharpen(),
                IAAEmboss(),
                RandomBrightnessContrast(),
            ], p=0.3),
            HueSaturationValue(p=0.3),
            Flip(0.5),
            HorizontalFlip(0.5),
            Normalize(mean, std, max_pixel_value=255.0, always_apply=True),
            ])
    else:
        aug=Compose([Normalize(mean, std, max_pixel_value=255.0, always_apply=True),])

    return aug  

In [None]:
from PIL import Image
class ClassificationLoader:
    def __init__(self, image_paths, targets, resize, augmentations=None):
        self.image_paths = image_paths
        self.targets = targets
        self.resize = resize
        self.augmentations = augmentations

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, item):
        image = Image.open(self.image_paths[item])
        targets = self.targets[item]

        image = np.array(image)
        image = self.augmentations(image=image)["image"]
        image = np.transpose(image, (2, 0, 1)).astype(np.float32)
        return torch.tensor(image, dtype=torch.float),torch.tensor(targets, dtype=torch.long)

In [None]:
#Dataset
train_bs=108
valid_bs=64
def get_dataset(df,mode="train",path=None):
    imgs=df.image_name.values.tolist()
    imgs=[path+file+".jpg" for file in imgs]
    if mode =="test":
        tar=np.zeros(len(imgs))
    else:
        tar=df.target.values

    aug=get_aug(mode)

    dataset=ClassificationLoader(
        image_paths=imgs,targets=tar,resize=None,augmentations=aug
    )

    batch_size = train_bs if mode=="train" else valid_bs
    shuffle=True if mode=="train" else False
    
    sampler = torch.utils.data.distributed.DistributedSampler(dataset,
                                                                  num_replicas = xm.xrt_world_size(),
                                                                  rank = xm.get_ordinal(),
                                                                  shuffle = shuffle)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             sampler=sampler,
                                             num_workers=4)
    return dataloader,tar

In [None]:
class AverageMeter:
    """
    Computes and stores the average and current value
    """

    def __init__(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [None]:
from tqdm import tqdm
class Engine:
    @staticmethod
    def train(
        data_loader,
        model,
        optimizer,
        device,
        epoch,
        criterion
    ):
        
        losses = AverageMeter()
        predictions = []
        model.train()
        
#         para_loader = pl.ParallelLoader(data_loader, [device])
#         tk0 = para_loader.per_device_loader(device)
        tk0 = tqdm(data_loader, total=len(data_loader), disable=False, ascii=True)
        for b_idx, (image,targets) in enumerate(tk0):
            image=image.to(device)
            targets = targets.to(device)
            optimizer.zero_grad()
            out = model(image)
            loss=criterion(out,targets.unsqueeze(1).type_as(out))
            del image,targets
            gc.collect()
            
            losses.update(loss.item(), data_loader.batch_size)
            loss.backward()
            xm.optimizer_step(optimizer, barrier=True)

        tk0.set_postfix(loss=losses.avg)
        return losses.avg

    @staticmethod
    def evaluate(data_loader, model, device, use_tpu=False):
        losses = AverageMeter()
        final_predictions = []
        model.eval()
        with torch.no_grad():
            tk0 = tqdm(data_loader, total=len(data_loader), ascii=True)
            for b_idx, (image,targets) in enumerate(tk0):
                image=image.to(device)
                targets = targets.to(device)

                predictions = model(image)
                predictions = predictions.cpu()

                del image,targets
                gc.collect()
                final_predictions.append(predictions)
        return final_predictions, losses.avg

    @staticmethod
    def predict(data_loader, model, device, use_tpu=False):
        model.eval()
        final_predictions = []
        with torch.no_grad():
            tk0 = tqdm(data_loader, total=len(data_loader), ascii=True)
            for b_idx, (image,targets) in enumerate(tk0):
                image=image.to(device)
                predictions = model(image)
                predictions = predictions.cpu()

                del image,targets
                gc.collect()

                final_predictions.append(predictions)
        return final_predictions

In [None]:
def train(model,fold):
    df=pd.read_csv("/kaggle/working/train_kfold.csv")
    device = xm.xla_device() 
    epochs=20

    df_train=df[df.fold!=fold].reset_index(drop=True)
    df_valid=df[df.fold==fold].reset_index(drop=True)

    path="../input/simm-isic-224-224/train3/"
    train_loader,train_tar=get_dataset(df_train,"train",path=path)
    valid_loader,valid_tar=get_dataset(df_valid,"valid",path=path)
 
#     model = Effi()
    model=model.to(device)

    optimizer=torch.optim.Adam(model.parameters(),lr=1e-4)
    scheduler=torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,patience=3,threshold=0.001,mode="max",
    )
    es=EarlyStopping(patience=3,mode="max",tpu=True)
    criterion = nn.BCEWithLogitsLoss()

    for epoch in range(epochs):
        train_loss=Engine.train(train_loader, model, optimizer, device, epoch, criterion)
        preds,valid_loss=Engine.evaluate(valid_loader,model, device)
        preds=np.vstack(preds).flatten()
        auc=roc_auc_score(valid_tar,preds)
        print(f"Epoch:{epoch}, AUC: {auc}")
        scheduler.step(auc)
        es(auc,model,model_path=f"model_fold_{fold}.bin")
        if es.early_stop:
            print("early stop")
            break

In [None]:
# model = Effi()
# train(model,0)
model = Effi()
train(model,1)
model = Effi()
train(model,2)
model = Effi()
train(model,3)
model = Effi()
train(model,4)


In [None]:
 def predict_uploaded(fold):
    
    df=pd.read_csv("../input/siim-isic-melanoma-classification/test.csv")
    device = xm.xla_device() 

    path="../input/simm-isic-224-224/test3/"
    test_loader,_ = get_dataset(df,"test",path=path)

    model=Effi()
    model_save_path=f"../input/model-fold-0bin/model_fold_0.bin"
    model.load_state_dict(torch.load(model_save_path))
    model=model.to(device)

    preds=Engine.predict(test_loader,model, device)
    preds=np.vstack(preds).flatten()
    return preds

In [None]:
def predict(fold):
    
    df=pd.read_csv("../input/siim-isic-melanoma-classification/test.csv")
    device = xm.xla_device() 

    path="../input/simm-isic-224-224/test3/"
    test_loader,_ = get_dataset(df,"test",path=path)

    model=Effi()
    model_save_path=f"./model_fold_{fold}.bin"
    model.load_state_dict(torch.load(model_save_path))
    model=model.to(device)

    preds=Engine.predict(test_loader,model, device)
    preds=np.vstack(preds).flatten()
    return preds

In [None]:
p1 = predict_uploaded(0)

In [None]:

p2 = predict(1)
p3 = predict(2)
p4 = predict(3)
p5 = predict(4)

predictions = (p1 + p2 + p3 + p4 + p5) / 5
sample = pd.read_csv("../input/siim-isic-melanoma-classification/sample_submission.csv")
sample.loc[:, "target"] = predictions
sample.to_csv("sub_effi_b4_200817.csv", index=False)

In [None]:
import numpy as np
import pandas as pd
sub_resnext=pd.read_csv("../input/essenble/sub_200807_resnext_89.csv")
sub_xg=pd.read_csv("../input/essenble/sub_xgboost_0817.csv")
sub_effi=pd.read_csv("../input/essenble2/sub_effi_b4_200817.csv")
sample = pd.read_csv("../input/siim-isic-melanoma-classification/sample_submission.csv")


In [None]:
p1=sub_resnext.target.values
p2=sub_xg.target.values
p3=sub_effi.target.values
p=p1*0.45+p2*0.1+p3*0.45
sample.loc[:, "target"] = p
sample.to_csv("sub_essenble_200817.csv", index=False)

In [None]:
sub_xg

In [None]:
sub_effi