In [1]:
#default_exp train

In [2]:
#export
from rsna_retro.imports import *
from rsna_retro.metadata import *
from rsna_retro.preprocess import *

In [3]:
torch.cuda.set_device(3)

In [4]:
df_comb.head()

Unnamed: 0,SOPInstanceUID,Modality,PatientID,StudyInstanceUID,SeriesInstanceUID,StudyID,ImagePositionPatient,ImageOrientationPatient,SamplesPerPixel,PhotometricInterpretation,...,MultiWindowCenter,WindowCenter1,MultiWindowWidth,WindowWidth1,any,epidural,intraparenchymal,intraventricular,subarachnoid,subdural
0,ID_352e89f1c,CT,ID_d557ddd2,ID_05074a0d95,ID_be6165332c,,-125.0,1.0,1,MONOCHROME2,...,,,,,0,0,0,0,0,0
1,ID_3cf4fb50f,CT,ID_16b2ad86,ID_c3a404ea2e,ID_2c1454e208,,-125.0,1.0,1,MONOCHROME2,...,1.0,36.0,1.0,80.0,0,0,0,0,0,0
2,ID_e3674b189,CT,ID_eb712bf0,ID_db83193795,ID_e1facea145,,-125.0,1.0,1,MONOCHROME2,...,,,,,0,0,0,0,0,0
3,ID_2a8702d25,CT,ID_ff137633,ID_d17053848c,ID_7098f7c836,,-126.437378,1.0,1,MONOCHROME2,...,,,,,1,0,1,1,0,0
4,ID_7be0f1b3c,CT,ID_cd9169c2,ID_b42de79024,ID_f5bd86b25b,,-125.0,1.0,1,MONOCHROME2,...,,,,,0,0,0,0,0,0


In [5]:
#export
def get_pil_fn(p):
    def _f(fn): 
        img_id = df_comb.loc[fn].SOPInstanceUID
        return PILCTScan.create(p/f'{img_id}.jpg')
    return _f

In [6]:
# #export
# def patient_cv(idx): return np.concatenate([patient_grps[o] for o in range_of(patient_grps) if o!=idx])
# def fn2label(fn): return df_comb.loc[fn][htypes].values.astype(np.float32)

In [7]:
#export
def fn2label(fn): return df_comb.loc[fn][htypes].values.astype(np.float32)

In [8]:
#export
def get_data_gen(fns, bs, img_tfm, mean, std, splits, sz=None, nw=8,
        wgts=None, batch_xtra=None, after_item=None, with_aug=True, **kwargs):
    tfms = [[img_tfm, ToTensor], [fn2label,EncodedMultiCategorize(htypes)]]
    dsrc = DataSource(fns, tfms, splits=splits)
    nrm = Normalize.from_stats(mean,std)
    batch_tfms = L(IntToFloatTensor, nrm, Cuda()) + L(batch_xtra)
    if with_aug: batch_tfms += aug_transforms(**kwargs)
    if sz is not None:
        batch_tfms = batch_tfms+[RandomResizedCropGPU(sz, min_scale=0.7, ratio=(1.,1.), valid_scale=0.9)]
    if wgts is None:
        return dsrc.databunch(bs=bs, num_workers=nw, after_item=after_item, after_batch=batch_tfms)
    else:
        return dsrc.weighted_databunch(wgts, bs=bs, num_workers=nw, after_item=after_item, after_batch=batch_tfms)


In [9]:
#export
mean = [ 56.2214,  62.1220, 141.9133]
std = [93.0132, 77.8876, 50.8730]

In [10]:
def filename(o): return os.path.splitext(os.path.basename(o))[0]
# fns = L(list(df_comb.fname)).map(filename)

In [11]:
fns = L(list(df_comb.index))

In [12]:
def get_data(bs, sz):
    img_dir = 'nocrop_jpg256' if sz <= 256 else 'nocrop_jpg'
    return get_data_gen(fns, bs=bs, img_tfm=get_pil_fn(path/img_dir), 
                        sz=sz, mean=mean, std=std, splits=splits)

In [13]:
# dbch.one_batch()

## Training

In [14]:
def accuracy_any(inp, targ, thresh=0.5, sigmoid=True):
    inp,targ = flatten_check(inp[:,0],targ[:,0])
    if sigmoid: inp = inp.sigmoid()
    return ((inp>thresh)==targ.bool()).float().mean()


def get_loss(scale=None):
    num_classes = 6
    loss_weights = to_device(tensor(2.0, 1, 1, 1, 1, 1))
    loss_weights = loss_weights/loss_weights.sum()*num_classes
    
    if scale is not None: scale = to_device(tensor([scale]*num_classes))
    return BaseLoss(nn.BCEWithLogitsLoss, weight=loss_weights, #pos_weight=scale,
                    floatify=True, flatten=False, is_2d=False, activation=torch.sigmoid)


In [15]:
#export
def get_learner(dbch, arch, lf, pretrained=True, opt_func=None, metrics=None, fp16=True, config=None):
    if metrics is None: metrics=[accuracy_multi,accuracy_any]
    if opt_func is None: opt_func = partial(Adam, wd=1e-5, eps=1e-4, sqr_mom=0.999)
    if config is None: config=dict(ps=0., lin_ftrs=[], concat_pool=False)
    learn = cnn_learner(dbch, arch, pretrained=pretrained, loss_func=lf, lr=3e-3,
                        opt_func=opt_func, metrics=metrics, config=config)
    return learn.to_fp16() if fp16 else learn

In [16]:
# def fit_tune(bs, sz, epochs, lr):
#     learn.dbunch = get_data(bs, sz)
#     do_fit(learn, epochs, lr, freeze=False)

In [17]:
#export
def do_fit(learn, epochs, lr, freeze=True, do_slice=False, **kwargs):
    if do_slice: lr = slice(lr*3,lr)
    if freeze:
        learn.freeze()
        learn.fit_one_cycle(1, lr, div=2, div_final=1, pct_start=0.1)
    learn.unfreeze()
    learn.fit_one_cycle(epochs, lr, **kwargs)

In [18]:
name = 'baseline'

In [19]:
dbch = get_data(512, 128)
learn = get_learner(dbch, xresnet34, get_loss(), pretrained=False)
do_fit(learn, 20, 4e-2, freeze=False)
learn.save(f'runs/{name}-1')

epoch,train_loss,valid_loss,accuracy_multi,accuracy_any,time
0,0.16287,7.570532,0.529087,0.145647,13:04
1,0.126649,0.637827,0.942404,0.854353,10:04
2,0.113041,1.62964,0.942404,0.854353,07:55
3,0.107699,0.426019,0.942404,0.854353,07:52
4,0.103225,0.44078,0.942404,0.854353,14:47
5,0.101744,1.985129,0.279975,0.145647,08:15
6,0.099383,0.601782,0.942404,0.854353,07:56
7,0.098325,0.358947,0.942404,0.854353,07:55
8,0.094793,4.170136,0.631901,0.854353,07:54
9,0.093495,0.415557,0.942404,0.854353,09:48


In [20]:
learn.dbunch = get_data(512, 224)
do_fit(learn, 12, 5e-3, freeze=False)
learn.save(f'runs/{name}-2')

epoch,train_loss,valid_loss,accuracy_multi,accuracy_any,time
0,0.076842,0.490044,0.935474,0.850391,15:48
1,0.07572,0.476649,0.942404,0.854353,15:42
2,0.072822,0.597649,0.942404,0.854353,15:39
3,0.071933,4.806316,0.377434,0.145647,15:39
4,0.071165,0.368568,0.93876,0.854353,15:39
5,0.068153,0.785758,0.942404,0.854353,15:40
6,0.066191,0.271979,0.932308,0.794656,15:40
7,0.063084,0.191709,0.94827,0.873055,15:44
8,0.059921,0.723891,0.781237,0.527979,15:39
9,0.054106,0.099686,0.970429,0.940579,15:43


In [21]:
learn.dbunch = get_data(256, 384)
do_fit(learn, 4, 1e-3, freeze=False)
learn.save(f'runs/{name}-3')

epoch,train_loss,valid_loss,accuracy_multi,accuracy_any,time
0,0.066193,0.354325,0.94145,0.854034,47:17
1,0.064231,0.805654,0.819606,0.721301,45:29
2,0.059756,0.36733,0.948943,0.877569,46:26
3,0.052612,0.075097,0.976327,0.955713,47:05


## Submission

In [3]:
#export
def submission(df_tst, preds, fn='submission'):
    ids,labels = [],[]
    for idx,pred in zip(df_tst.index, preds):
        for i,label in enumerate(htypes):
            ids.append(f"{idx}_{label}")
            labels.append('{0:1.10f}'.format(pred[i].item()))
    df_csv = pd.DataFrame({'ID': ids, 'Label': labels})
    df_csv.to_csv(f'{fn}.csv', index=False)
    return df_csv

In [None]:
pred_csv = submission(df_tst, probs)

In [None]:
from IPython.display import FileLink, FileLinks
FileLink(f'subm/{nm}.csv')

In [None]:
from kaggle import api
api.competition_submit(f'subm/{nm}.csv', '0.0744,rn34_repl (scl 0.88)', 'rsna-intracranial-hemorrhage-detection')

## Export

In [3]:
#hide
from nbdev.export import notebook2script
notebook2script()

Converted 01_data_01_metadata_stage2.ipynb.
Converted 01_data_02_preprocess_windows.ipynb.
Converted 02_train_01_train.ipynb.
Converted 04_orig_replace_ashaw_refactor.ipynb.
Converted 04_replace_ashaw_refactor.ipynb.
Converted 04b_orig_replace_ashaw_refactor.ipynb.
Converted 10_qure.ipynb.
Converted 12_merge.ipynb.
Converted 14_xgboost.ipynb.
Converted 16_slice_e2e-shallow.ipynb.
Converted 16b_orig_slice_e2e-shallow.ipynb.
This cell doesn't have an export destination and was ignored:
e
Converted 17_slice_model-deep.ipynb.
Converted 21_cleanup-nocrop2.ipynb.
Converted 26_submit_final.ipynb.
Converted 27_ensemble_tabular_nn.ipynb.
Converted 99_index.ipynb.
Converted cleanup-combine-qure.ipynb.
Converted delete_03b_cleanup-tif.ipynb.
Converted submit.ipynb.
Converted walkthru.ipynb.
Converted x00_tcia-ct-segm-prep.ipynb.
Converted x00_tcia-ct-segm-train.ipynb.
