In [1]:
# https://github.com/selimsef/dfdc_deepfake_challenge/blob/master/training/pipelines/train_classifier.py
import argparse
import json
import os
import sys
import itertools
from collections import defaultdict, OrderedDict
import platform
PATH = '/Users/dhanley/Documents/rsnastr' \
        if platform.system() == 'Darwin' else '/data/rsnastr'
os.chdir(PATH)
sys.path.append(PATH)
import warnings
warnings.filterwarnings("ignore")
from sklearn.metrics import log_loss
from utils.logs import get_logger
from utils.utils import RSNAWEIGHTS, RSNA_CFG as CFG
from training.tools.config import load_config
import pandas as pd
import cv2

import torch
from torch.backends import cudnn
from torch.nn import DataParallel
from torch.utils.data import DataLoader
from torch.cuda.amp import autocast


from tqdm import tqdm
import torch.distributed as dist
from training.datasets.classifier_dataset import RSNAClassifierDataset, \
        nSampler, valSeedSampler, collatefn
from training.zoo import classifiers
from training.zoo.classifiers import validate
from training.tools.utils import create_optimizer, AverageMeter
from training.losses import getLoss
from training import losses

from tensorboardX import SummaryWriter

os.environ["MKL_NUM_THREADS"] = "1"
os.environ["NUMEXPR_NUM_THREADS"] = "1"
os.environ["OMP_NUM_THREADS"] = "1"

cv2.ocl.setUseOpenCL(False)
cv2.setNumThreads(0)
import numpy as np
import albumentations as A
from albumentations.pytorch import ToTensor
logger = get_logger('Train', 'INFO') 

In [2]:
'''
aug = A.Compose([
        # A.HorizontalFlip(p=1.), right/left
        A.VerticalFlip(p=1.),
        A.Transpose(p=0.),
    ])
fname = 'data/jpeg/train/4f632056046b/03dbda10118a/53ccebd24e14.jpg'
img = cv2.imread(fname)[:,:,::-1]
img = cv2.resize(img, (360, 360))
from PIL import Image
Image.fromarray(img)
Image.fromarray(aug(image=img)['image'])
'''

import sys; sys.argv=['']; del sys
logger.info('Load args')
parser = argparse.ArgumentParser("PyTorch Xview Pipeline")
arg = parser.add_argument
arg('--config', metavar='CONFIG_FILE', help='path to configuration file')
arg('--workers', type=int, default=6, help='number of cpu threads to use')
arg('--device', type=str, default='cpu' if platform.system() == 'Darwin' else 'cuda', help='device for model - cpu/gpu')
arg('--gpu', type=str, default='0', help='List of GPUs for parallel training, e.g. 0,1,2,3')
arg('--output-dir', type=str, default='weights/')
arg('--resume', type=str, default='')
arg('--fold', type=int, default=0)
arg('--accum', type=int, default=1)
arg('--batchsize', type=int, default=4)
arg('--labeltype', type=str, default='all') # or 'single'
arg('--augextra', type=str, default=False) # or 'single'
arg('--mixup_beta', type=float, default = 0.)
arg('--prefix', type=str, default='classifier_')
arg('--data-dir', type=str, default="data")
arg('--folds-csv', type=str, default='folds.csv.gz')
arg('--crops-dir', type=str, default='jpegip')
arg('--label-smoothing', type=float, default=0.01)
arg('--logdir', type=str, default='logs/b2_1820')
arg('--distributed', action='store_true', default=False)
arg('--freeze-epochs', type=int, default=0)
arg("--local_rank", default=0, type=int)
arg("--seed", default=777, type=int)
arg("--opt-level", default='O1', type=str)
arg("--test_every", type=int, default=1)
arg('--from-zero', action='store_true', default=False)
args = parser.parse_args()

2020-10-04 20:35:21,105 - Train - INFO - Load args


In [3]:
args.device='cuda' 
args.fold=0 
args.accum=4 
args.batchsize=32 
args.logdir='logs/zoo' 
args.augextra=False  
args.label_smoothing=0.0 
args.config='configs/effnetb5_lr5e4_binary_accum.json'

In [4]:
if False:
    args.config = 'configs/b2.json'
    args.config = 'configs/b2_binary.json'
    args.config = 'configs/rnxt101_binary.json'
conf = load_config(args.config)

# Try using imagenet means
if not args.augextra:
    def create_train_transforms(size=300, distort = False):
        return A.Compose([
            #A.HorizontalFlip(p=0.5),   # right/left
            A.VerticalFlip(p=0.5), 
            A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, value = 0,
                                 rotate_limit=20, p=0.5, border_mode = cv2.BORDER_CONSTANT),
            # A.Cutout(num_holes=40, max_h_size=size//7, max_w_size=size//7, fill_value=128, p=0.5), 
            #A.Transpose(p=0.5), # swing in -90 degrees
            A.Resize(size, size, p=1), 
            A.Normalize(mean=conf['normalize']['mean'], 
                        std=conf['normalize']['std'], max_pixel_value=255.0, p=1.0),
            ToTensor()
        ])
else:
    def create_train_transforms(size=300, distort = False):
        return A.Compose([
            #A.HorizontalFlip(p=0.5),   # right/left
            A.VerticalFlip(p=0.5), 
            A.OneOf([
                A.RandomCrop(int(size*0.8), int(size*0.8), p = 0.5), 
                A.RandomCrop(int(size*0.9), int(size*0.9), p = 0.5), 
            ], p=1.0),
            A.OneOf([
                A.ElasticTransform(p=0.5, alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03),
                A.GridDistortion(p=0.5),
                A.OpticalDistortion(p=1, distort_limit=2, shift_limit=0.5),
            ], p=0.5),
            A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, value = 0,
                                 rotate_limit=20, p=0.5, border_mode = cv2.BORDER_CONSTANT),
            # A.Cutout(num_holes=40, max_h_size=size//7, max_w_size=size//7, fill_value=128, p=0.5), 
            #A.Transpose(p=0.5), # swing in -90 degrees
            A.Resize(size, size, p=1), 
            A.Normalize(mean=conf['normalize']['mean'], 
                        std=conf['normalize']['std'], max_pixel_value=255.0, p=1.0),
            ToTensor()
        ])

def create_val_transforms(size=300, HFLIPVAL = 1.0, TRANSPOSEVAL = 1.0):
    return A.Compose([
        #A.HorizontalFlip(p=HFLIPVAL),
        #A.Transpose(p=TRANSPOSEVAL),
        A.Normalize(mean=conf['normalize']['mean'], 
                    std=conf['normalize']['std'], max_pixel_value=255.0, p=1.0),
        ToTensor()
    ])

In [5]:
logger.info('Create traindatasets')
trndataset = RSNAClassifierDataset(mode="train",
                                       fold=args.fold,
                                       imgsize = conf['size'],
                                       crops_dir=args.crops_dir,
                                       imgclasses=CFG["image_target_cols"],
                                       studyclasses=CFG['exam_target_cols'],
                                       data_path=args.data_dir,
                                       label_smoothing=args.label_smoothing,
                                       folds_csv=args.folds_csv,
                                       transforms=create_train_transforms(conf['size']))
logger.info('Create valdatasets')
valdataset = RSNAClassifierDataset(mode="valid",
                                    fold=args.fold,
                                    crops_dir=args.crops_dir,
                                    imgclasses=CFG["image_target_cols"],
                                    studyclasses=CFG['exam_target_cols'],
                                    imgsize = conf['size'],
                                    data_path=args.data_dir,
                                    folds_csv=args.folds_csv,
                                    transforms=create_val_transforms(conf['size']))

2020-10-04 20:35:21,201 - Train - INFO - Create traindatasets
2020-10-04 20:35:24,027 - Train - INFO - Create valdatasets


In [6]:
valsampler = valSeedSampler(valdataset.data, N = 5000, seed = args.seed)
logger.info(50*'-')
logger.info(valdataset.data.loc[valsampler.sampler]['pe_present_on_image'].value_counts())
loaderargs = {'num_workers' : 8, 'pin_memory': False, 'drop_last': False, 'collate_fn' : collatefn}
valloader = DataLoader(valdataset, batch_size=args.batchsize, sampler = valsampler, **loaderargs)

logger.info('Create model and optimisers')
nclasses = len(CFG["image_target_cols"]) + len(CFG['exam_target_cols'])
model = classifiers.__dict__[conf['network']](encoder=conf['encoder'],nclasses = nclasses)
model = model.to(args.device)

'''
reduction = "mean"
losstype = list(conf['losses'].keys())[0]
criterion = getLoss("BCEWithLogitsLoss", args.device)
'''
bce_wts = torch.tensor([1.] + CFG['exam_weights']).to(args.device)
criterion = torch.nn.BCEWithLogitsLoss(reduction='mean', weight = bce_wts)

optimizer, scheduler = create_optimizer(conf['optimizer'], model)
bce_best = 100
start_epoch = 0
batch_size = conf['optimizer']['batch_size']

2020-10-04 20:35:26,783 - Train - INFO - --------------------------------------------------
2020-10-04 20:35:26,798 - Train - INFO - 0    10000
1     5000
Name: pe_present_on_image, dtype: int64
2020-10-04 20:35:26,799 - Train - INFO - Create model and optimisers


In [7]:
os.makedirs(args.logdir, exist_ok=True)
summary_writer = SummaryWriter(args.logdir + '/' + conf.get("prefix", args.prefix) + conf['encoder'] + "_" + str(args.fold))

if args.from_zero:
    start_epoch = 0
current_epoch = start_epoch

if conf['fp16'] and args.device != 'cpu':
    scaler = torch.cuda.amp.GradScaler()
    
snapshot_name = "{}{}_{}_{}_".format(conf.get("prefix", args.prefix), conf['network'], conf['encoder'], args.fold)
max_epochs = conf['optimizer']['schedule']['epochs']

logger.info('Start training')
epoch_img_names = defaultdict(list)

2020-10-04 20:35:30,021 - Train - INFO - Start training


In [None]:
'''
alldf = pd.read_csv('data/train.csv.zip')
allsampler = nSampler(alldf, pe_weight = 0.66, nmin = 2, nmax = 4, seed = None)
len(allsampler.sample(alldf)) * 0.8
'''
seenratio=0  # Ratio of seen in images in previous epochs

for epoch in range(start_epoch, max_epochs):
    '''
    Here we took out a load of things, check back 
    https://github.com/selimsef/dfdc_deepfake_challenge/blob/9925d95bc5d6545f462cbfb6e9f37c69fa07fde3/training/pipelines/train_classifier.py#L188-L201
    '''
    
    '''
    TRAIN
    '''
    ep_samps={'tot':0,'pos':0}
    losses = AverageMeter()
    max_iters = conf["batches_per_epoch"]
    trnsampler = nSampler(trndataset.data, 
                          pe_weight = conf['pe_ratio'], 
                          nmin = conf['studynmin'], 
                          nmax = conf['studynmax'], 
                          seed = None)
    if current_epoch == 0: 
        trncts = trndataset.data.iloc[trnsampler.sample(trndataset.data)].pe_present_on_image.value_counts()
        valcts = valdataset.data.iloc[valsampler.sample(valdataset.data)].pe_present_on_image.value_counts()
        logger.info(f'Train class balance:\n{trncts}')
        logger.info(f'Valid class balance:\n{valcts}')
    trnloader = DataLoader(trndataset, batch_size=args.batchsize, sampler = trnsampler, **loaderargs)
    model.train()
    pbar = tqdm(enumerate(trnloader), total=max_iters, desc="Epoch {}".format(current_epoch), ncols=0)
    if conf["optimizer"]["schedule"]["mode"] == "current_epoch":
        scheduler.step(current_epoch)
    for i, sample in pbar:
        epoch_img_names[current_epoch] += sample['img_name']
        imgs = sample["image"].to(args.device)
        # logger.info(f'Mean {imgs.mean()} std {imgs.std()} ')
        labels = sample["labels"].to(args.device).float()
        if conf['fp16'] and args.device != 'cpu':
            with autocast():
                out = model(imgs)
                loss = criterion(out, labels) # 0.6710
            scaler.scale(loss).backward()
            if (i % args.accum) == 0:
                scaler.step(optimizer)
                scaler.update()
                optimizer.zero_grad()
        else:
            out = model(imgs)
            loss = criterion(out, labels)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        losses.update(loss.item(), imgs.size(0))
        pbar.set_postfix({"lr": float(scheduler.get_lr()[-1]), "epoch": current_epoch, 
                          "loss": losses.avg, 'seen_prev': seenratio })
        
        if conf["optimizer"]["schedule"]["mode"] in ("step", "poly"):
            scheduler.step(i + current_epoch * max_iters)
        if i == max_iters - 1:
            break
    pbar.close()
    if epoch > 0:
        seen = set(epoch_img_names[epoch]).intersection(
            set(itertools.chain(*[epoch_img_names[i] for i in range(epoch)])))
        seenratio = len(seen)/len(epoch_img_names[epoch])

    for idx, param_group in enumerate(optimizer.param_groups):
        lr = param_group['lr']
        summary_writer.add_scalar('group{}/lr'.format(idx), float(lr), global_step=current_epoch)
        summary_writer.add_scalar('train/loss', float(losses.avg), global_step=current_epoch)
    model = model.eval()
    bce, acc, probdf = validate(model, valloader, device = args.device, logger = logger, half = False)

    if args.local_rank == 0:
        summary_writer.add_scalar('val/bce', float(bce), global_step=current_epoch)
        if bce < bce_best:
            print("Epoch {} improved from {:.5f} to {:.5f}".format(current_epoch, bce_best, bce))
            if args.output_dir is not None:
                torch.save({
                    'epoch': current_epoch + 1,
                    'state_dict': model.state_dict(),
                    'bce_best': bce,
                }, args.output_dir + snapshot_name + f"_fold{args.fold}_best_dice___testme")
            bce_best = bce
            probdf.to_csv(args.output_dir + snapshot_name + f"_fold{args.fold}_best_probs___testme.csv", index = False)
        print("Epoch: {} bce: {:.5f}, bce_best: {:.5f}".format(current_epoch, bce, bce_best))
    torch.save({
        'epoch': current_epoch + 1,
        'state_dict': model.state_dict(),
        'bce_best': bce,
        }, args.output_dir + snapshot_name + f"_fold{args.fold}_epoch{current_epoch}")
    current_epoch += 1

2020-10-04 20:35:31,155 - Train - INFO - Train class balance:
0    10976
1     3390
Name: pe_present_on_image, dtype: int64
2020-10-04 20:35:31,156 - Train - INFO - Valid class balance:
0    10000
1     5000
Name: pe_present_on_image, dtype: int64
Epoch 0:  90% 449/500 [01:58<00:13,  3.80it/s, lr=0.0005, epoch=0, loss=0.0656, seen_prev=0]
142it [00:17,  8.73it/s]

In [None]:
criterion(out, labels)