In [2]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
import matplotlib.pyplot as plt
import warnings
import seaborn as sns

img_row,img_col = 224,224
color_type = 3
batch_size=48
epochs=300
num_classes=4
subject='Melanoma'
main_path=os.path.join("E:\\kaggle_imgs",subject)
img_path=os.path.join(main_path,"images")
data_path=os.path.join(main_path,"Data")
saved_path=os.path.join(main_path,"saved_models")
paths=[main_path, img_path,saved_path,data_path]
for fp in paths:
    print(fp)
    if not os.path.exists(fp):        
        os.mkdir(fp)
file_best=os.path.join(saved_path,"200628__epoch_ 0_acc_92.00")
train_info=os.path.join(data_path,"df_train_fold.csv")



E:\kaggle_imgs\Melanoma
E:\kaggle_imgs\Melanoma\images
E:\kaggle_imgs\Melanoma\saved_models
E:\kaggle_imgs\Melanoma\Data


In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

In [3]:
import cv2
from PIL import Image

In [4]:
from wtfml.utils import EarlyStopping
from wtfml.engine import Engine
from wtfml.data_loaders.image import ClassificationLoader

## Load train Data


In [5]:
def read_train_info():
    fp=train_info
    if False and os.path.exists(fp):
        try:
            print('loading train data from csv', flush=True)
            df_train=pd.read_csv(fp)
            print('complete!', flush=True)
        except EOFError:
            print('EOFError raised.', flush=True)
    else:
        df_train=pd.read_csv(data_path+"/train.csv")
        X=df_train.sample(frac=1).reset_index(drop=True)
        y=df_train['target'].values
        skf=StratifiedKFold(n_splits=5,random_state=22)        
        df_train["fold"]=-1
        for i, (trn_idx,val_idx) in enumerate(skf.split(X,y)):
            df_train.loc[val_idx,"fold"]=i
    return df_train

df_train=read_train_info()
df_train.to_csv(train_info,index=False)



## Augmentation

In [6]:
from albumentations import (
    HorizontalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90,
    Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, HueSaturationValue,
    IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, RandomBrightnessContrast, IAAPiecewiseAffine,
    IAASharpen, IAAEmboss, Flip, OneOf, Compose, Rotate, Cutout, VerticalFlip, Normalize
)
from albumentations.pytorch import ToTensor

mean=(0.485,0.456,0.406)
std=(0.229,0.224,0.225)
train_aug= Compose([
    Normalize(mean,std,max_pixel_value=255.0,always_apply=True),
    ShiftScaleRotate(shift_limit=0.0625,scale_limit=0.1,rotate_limit=15),
    HorizontalFlip(p=0.5),VerticalFlip(p=0.5),
    ToTensor()])
valid_aug=Compose([
    Normalize(mean,std,max_pixel_value=255.0,always_apply=True),
    ToTensor()])
test_aug=Compose([
    Normalize(mean,std,max_pixel_value=255.0,always_apply=True),
    ToTensor()])

## Dataset

In [8]:
from torch.utils.data import Dataset
class PlantDataset(Dataset):
    def __init__(self, df, tr=None,Test=False,folder="/train/"):
        self.df = df
        self.tr=tr
        self.folder =folder
        self.isTest=1 if "test" in folder else 0
    def __len__(self):
        return self.df.shape[0]
    def __getitem__(self, idx):
        name=self.df.iloc[idx].image_name
        image_src=img_path+self.folder+name+".jpg"
        img = Image.open(image_src)
        img=np.array(img)
        
        labels=0
        labels = self.df.iloc[idx].target if self.isTest is 0 else 0
        img=self.tr(image=img)["image"]
        
        return img, labels

## Dataset and DataLoader

In [9]:
def get_images_by_fold(fold):
    trn_fold=df_train[df_train.fold != fold].reset_index(drop=True)
    val_fold=df_train[df_train.fold == fold].reset_index(drop=True)
    trainset = PlantDataset(df=trn_fold, tr=train_aug,folder="/train3/")
    validset =PlantDataset(df=val_fold, tr=valid_aug,folder="/train3/")

    train_loader = torch.utils.data.DataLoader(trainset,
                                              batch_size=batch_size,
                                              shuffle=True, num_workers=0)
    valid_loader = torch.utils.data.DataLoader(validset,
                                              batch_size=batch_size,
                                              shuffle=False, num_workers=0)
    return train_loader,valid_loader

## Model

In [10]:
import torchvision.models as models

In [40]:
def get_model(pretrained=True):
    resnext50_32x4d = models.resnext50_32x4d(pretrained=pretrained)
    num_ftrs = resnext50_32x4d.fc.in_features
    resnext50_32x4d.fc = nn.Linear(num_ftrs, 2)
    return resnext50_32x4d

In [12]:
criterion=nn.CrossEntropyLoss().to(device)
optimizer=torch.optim.SGD(resnext50_32x4d.parameters(),lr=1e-2,momentum=0.9,weight_decay=5e-4)
lr_sched=optim.lr_scheduler.StepLR(optimizer,step_size=10,gamma=0.5)

## define acc check func

In [29]:
best_acc=0
def acc_check(net, test_set, epoch,fold):
    correct = 0
    total = 0
    global best_acc
    global cur_best
    with torch.no_grad():
        for images, labels in test_set:
            images = images.to(device)
            labels = labels.to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    acc = (100 * correct / total)
    if True or best_acc<acc:
        print("acc",acc)
        best_acc=acc
        bf=saved_path+"/fold_%d_200711.pth"%(fold)
        state = {
        'epoch': epoch,
        'state_dict': resnext50_32x4d.state_dict(),
        'optimizer': optimizer.state_dict(),
        }
        torch.save(state, bf)
        cur_best=bf
    return acc


## Best Model Load

In [None]:
 if os.path.isfile(file_best):
    checkpoint = torch.load(file_best)
    resnext50_32x4d.load_state_dict(checkpoint['state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer'])
    print("Load Complete",cur_best)

## Train

In [33]:
import time
def train(fold):
    epochs = 2
    start=time.time()
    train_loader,valid_loader=get_images_by_fold(fold)
    resnext50_32x4d=get_model()
    resnext50_32x4d=resnext50_32x4d.to(device)
    for epoch in range(epochs):  # loop over the dataset multiple times
        running_loss = 0.0
        lr_sched.step()

        for i, (inputs, labels)  in enumerate(train_loader):
            # get the inputs
            inputs = inputs.to(device)
            labels = labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = resnext50_32x4d(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if (i +1)% 100 ==0:    # print every 30 mini-batches
                print('[%d, %5d] loss: %.3f, time:%d' % (epoch + 1, i + 1, running_loss / 100, time.time()-start))
                running_loss = 0.0
                start=time.time()
            
            #Check Accuracy
        acc = acc_check(resnext50_32x4d, valid_loader, epoch, fold)

In [28]:
train(0)

[1,   100] loss: 0.076, time:58
[1,   200] loss: 0.067, time:57
[1,   300] loss: 0.061, time:57
[1,   400] loss: 0.057, time:58
[1,   500] loss: 0.061, time:57
acc 98.2493208572291
[2,   100] loss: 0.064, time:121
[2,   200] loss: 0.062, time:56
[2,   300] loss: 0.068, time:57


KeyboardInterrupt: 

In [36]:
train(1)
train(2)
train(3)
train(4)

[1,   100] loss: 0.869, time:58
[1,   200] loss: 0.870, time:57
[1,   300] loss: 0.871, time:57
[1,   400] loss: 0.871, time:56
[1,   500] loss: 0.869, time:57
acc 15.71320754716981
[2,   100] loss: 0.869, time:121
[2,   200] loss: 0.869, time:57
[2,   300] loss: 0.870, time:57
[2,   400] loss: 0.872, time:57
[2,   500] loss: 0.870, time:56
acc 15.71320754716981
[1,   100] loss: 0.566, time:58
[1,   200] loss: 0.565, time:59
[1,   300] loss: 0.566, time:58
[1,   400] loss: 0.566, time:58
[1,   500] loss: 0.567, time:58
acc 87.01886792452831
[2,   100] loss: 0.565, time:125
[2,   200] loss: 0.565, time:57
[2,   300] loss: 0.566, time:57
[2,   400] loss: 0.567, time:57
[2,   500] loss: 0.566, time:56
acc 87.01886792452831
[1,   100] loss: 0.680, time:58
[1,   200] loss: 0.681, time:57
[1,   300] loss: 0.679, time:58
[1,   400] loss: 0.681, time:57
[1,   500] loss: 0.681, time:60
acc 57.26792452830189
[2,   100] loss: 0.680, time:126
[2,   200] loss: 0.681, time:58
[2,   300] loss: 0.680,

In [49]:
def predict(fold):
    df_test = pd.read_csv(data_path+"/test.csv")
    testset = PlantDataset(df=df_test, tr=test_aug,folder="/test3/")
    test_loader = torch.utils.data.DataLoader(testset,
                                              batch_size=batch_size,
                                              shuffle=False, num_workers=0)
    model_path=saved_path+"/fold_%d_200711.pth"%(fold)
    model = get_model(pretrained=False)
    
    checkpoint = torch.load(model_path)
    model.load_state_dict(checkpoint['state_dict'])
    model.to(device)

    pred=[]
    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            pred.append(predicted.to("cpu"))
    return pred

In [50]:
p0=predict(0)

In [53]:
np0=np.array(p0)

ValueError: only one element tensors can be converted to Python scalars

In [63]:
all_data=[]
for t in p0:
    all_data.append(t.numpy())

In [67]:
all_data

[array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0], dtype=int64),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0], dtype=int64),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0], dtype=int64),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0], dtype=int64),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0], dtype=int64),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 