In [1]:
import torch
from torch.nn import functional as F
import numpy as np
# from tqdm import tqdm_notebook as tqdm
from tqdm import tqdm
from rsna19.configs.second_level import Config
from sklearn.metrics import log_loss
import pandas as pd
from scipy.signal import windows

In [2]:
# train_folds = [0, 1, 2, 3] # for testing 
train_folds = [0, 1, 2, 3, 4]  # for stage 2 submission
val_folds = [4]

cache_dir = '/home/dmytro/ml/kaggle-rsna-2019/output/cache_stage2'

train_x = torch.cat([torch.tensor(np.load(f'{cache_dir}/fold{f}/x.npy'), dtype=torch.float32) for f in train_folds], dim=0)
train_y = torch.cat([torch.tensor(np.load(f'{cache_dir}/fold{f}/y.npy'), dtype=torch.float32) for f in train_folds], dim=0)
val_x = torch.cat([torch.tensor(np.load(f'{cache_dir}/fold{f}/x.npy'), dtype=torch.float32) for f in val_folds], dim=0)
val_y = torch.cat([torch.tensor(np.load(f'{cache_dir}/fold{f}/y.npy'), dtype=torch.float32) for f in val_folds], dim=0)

n_models = 11
class_weights = torch.tensor([1, 1, 1, 1, 1, 2], dtype=torch.float32) * 6 / 7
loss_fn = F.binary_cross_entropy

In [3]:
# undo sigmoid

# train_x[train_x > 0] = torch.log(train_x[train_x > 0] / (1-train_x[train_x > 0]))
# val_x[val_x > 0] = torch.log(val_x[val_x > 0] / (1-val_x[val_x > 0]))
# loss_fn = F.binary_cross_entropy_with_logits

In [4]:
print('train')
preds = []
for model_id in range(n_models):
    preds.append(train_x[:, model_id*30+12:model_id*30+18])
    loss = loss_fn(preds[-1], train_y, weight=class_weights)
    print(f'model {model_id}: {loss}')

mean_preds = torch.mean(torch.stack(preds), dim=0)
loss = loss_fn(mean_preds, train_y, weight=class_weights)
print(f'averaged ensemble: {loss}')

print('\nval')
preds = []
for model_id in range(n_models):
    preds.append(val_x[:, model_id*30+12:model_id*30+18])
    loss = loss_fn(preds[-1], val_y, weight=class_weights)
    print(f'model {model_id}: {loss}')

mean_preds = torch.mean(torch.stack(preds), dim=0)
loss = loss_fn(mean_preds, val_y, weight=class_weights)
print(f'averaged ensemble: {loss}')

train
model 0: 0.06121179461479187
model 1: 0.06256157159805298
model 2: 0.06239280849695206
model 3: 0.06186671555042267
model 4: 0.06123921647667885
model 5: 0.06273935735225677
model 6: 0.06325805187225342
model 7: 0.060901932418346405
model 8: 0.060555413365364075
model 9: 0.06571821123361588
model 10: 0.06511762738227844
averaged ensemble: 0.05699021369218826

val
model 0: 0.061941273510456085
model 1: 0.06328877061605453
model 2: 0.06281528621912003
model 3: 0.06388996541500092
model 4: 0.06146755814552307
model 5: 0.06464578211307526
model 6: 0.06419891119003296
model 7: 0.0613483302295208
model 8: 0.0610339418053627
model 9: 0.06970273703336716
model 10: 0.06656711548566818
averaged ensemble: 0.05792373791337013


In [5]:
# train on middle slice only

# train_x_1slice = []
# for model_id in range(n_models):
#     train_x_1slice.append(train_x[:, model_id*30+12:model_id*30+18])
# train_x_1slice = torch.cat(train_x_1slice, dim=1)

# val_x_1slice = []
# for model_id in range(n_models):
#     val_x_1slice.append(val_x[:, model_id*30+12:model_id*30+18])
# val_x_1slice = torch.cat(val_x_1slice, dim=1)

# train_x = train_x_1slice
# val_x = val_x_1slice

In [6]:
hidden = 128
features_out = 6

# model = torch.nn.Sequential(
#         torch.nn.Linear(train_x.shape[1], features_out)
# )

class Model(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.w1 = torch.nn.Linear(train_x.shape[1], features_out, bias=False)

    def forward(self, x):
        x = F.linear(x, torch.abs(self.w1.weight) / torch.sum(torch.abs(self.w1.weight), 1, keepdim=True), self.w1.bias)
#         x = F.linear(x, self.w1.weight / torch.sum(self.w1.weight, 1, keepdim=True), self.w1.bias)
        return torch.clamp(x, 0, 1)


model = Model()
print(model.w1.weight.shape)
# model = torch.nn.Sequential(
#         torch.nn.Linear(train_x.shape[1], hidden),
#         torch.nn.ReLU(),
# #         torch.nn.Dropout(0.2),
#         torch.nn.Linear(hidden, features_out),
# )

train_x = train_x.cuda()
train_y = train_y.cuda()
val_x = val_x.cuda()
val_y = val_y.cuda()
model = model.cuda()

optimizer = torch.optim.Adam(model.parameters(), 0.0001)
val_log_loss = 0
class_weights = class_weights.cuda()

for i in tqdm(range(10000)):
    optimizer.zero_grad()

    y_hat = model(train_x)
    loss = F.binary_cross_entropy(y_hat, train_y, weight=class_weights)
    loss.backward()
    optimizer.step()

    if i % 100 == 0:
        model.eval()
        val_y_hat = model(val_x)
        val_loss = F.binary_cross_entropy(val_y_hat, val_y, weight=class_weights)
        model.train()
        
        print(f'{i:04d}: train: {loss.item():.04f}, val: {val_loss.item():.04f}')

torch.Size([6, 330])


  0%|          | 22/10000 [00:00<01:36, 103.51it/s]

0000: train: 0.1286, val: 0.1292


  1%|          | 121/10000 [00:01<01:36, 102.66it/s]

0100: train: 0.1133, val: 0.1138


  2%|▏         | 220/10000 [00:02<01:35, 102.62it/s]

0200: train: 0.0947, val: 0.0951


  3%|▎         | 319/10000 [00:03<01:34, 102.94it/s]

0300: train: 0.0738, val: 0.0742


  4%|▍         | 418/10000 [00:04<01:33, 102.60it/s]

0400: train: 0.0640, val: 0.0646


  5%|▌         | 517/10000 [00:05<01:32, 102.57it/s]

0500: train: 0.0627, val: 0.0634


  6%|▌         | 616/10000 [00:05<01:31, 102.57it/s]

0600: train: 0.0622, val: 0.0629


  7%|▋         | 715/10000 [00:06<01:30, 102.56it/s]

0700: train: 0.0617, val: 0.0624


  8%|▊         | 814/10000 [00:07<01:29, 102.42it/s]

0800: train: 0.0611, val: 0.0618


  9%|▉         | 913/10000 [00:08<01:28, 102.43it/s]

0900: train: 0.0606, val: 0.0613


 10%|█         | 1012/10000 [00:09<01:27, 103.02it/s]

1000: train: 0.0602, val: 0.0608


 11%|█         | 1122/10000 [00:10<01:26, 102.48it/s]

1100: train: 0.0597, val: 0.0604


 12%|█▏        | 1221/10000 [00:11<01:25, 102.56it/s]

1200: train: 0.0593, val: 0.0599


 13%|█▎        | 1320/10000 [00:12<01:24, 102.60it/s]

1300: train: 0.0589, val: 0.0596


 14%|█▍        | 1419/10000 [00:13<01:24, 102.08it/s]

1400: train: 0.0587, val: 0.0593


 15%|█▌        | 1518/10000 [00:14<01:24, 100.94it/s]

1500: train: 0.0585, val: 0.0591


 16%|█▌        | 1617/10000 [00:15<01:22, 101.89it/s]

1600: train: 0.0583, val: 0.0589


 17%|█▋        | 1716/10000 [00:16<01:21, 101.56it/s]

1700: train: 0.0581, val: 0.0587


 18%|█▊        | 1815/10000 [00:17<01:21, 101.05it/s]

1800: train: 0.0579, val: 0.0585


 19%|█▉        | 1914/10000 [00:18<01:20, 101.03it/s]

1900: train: 0.0577, val: 0.0583


 20%|██        | 2013/10000 [00:19<01:18, 101.22it/s]

2000: train: 0.0576, val: 0.0582


 21%|██        | 2112/10000 [00:20<01:17, 102.04it/s]

2100: train: 0.0574, val: 0.0580


 22%|██▏       | 2211/10000 [00:21<01:16, 101.98it/s]

2200: train: 0.0573, val: 0.0579


 23%|██▎       | 2321/10000 [00:22<01:15, 102.09it/s]

2300: train: 0.0573, val: 0.0579


 24%|██▍       | 2420/10000 [00:23<01:14, 101.97it/s]

2400: train: 0.0572, val: 0.0578


 25%|██▌       | 2519/10000 [00:24<01:13, 102.22it/s]

2500: train: 0.0572, val: 0.0578


 26%|██▌       | 2618/10000 [00:25<01:12, 101.99it/s]

2600: train: 0.0572, val: 0.0578


 27%|██▋       | 2717/10000 [00:26<01:11, 101.44it/s]

2700: train: 0.0572, val: 0.0578


 28%|██▊       | 2816/10000 [00:27<01:10, 102.17it/s]

2800: train: 0.0572, val: 0.0577


 29%|██▉       | 2915/10000 [00:28<01:09, 101.79it/s]

2900: train: 0.0571, val: 0.0577


 30%|███       | 3014/10000 [00:29<01:08, 102.05it/s]

3000: train: 0.0571, val: 0.0577


 31%|███       | 3113/10000 [00:30<01:07, 102.12it/s]

3100: train: 0.0571, val: 0.0577


 32%|███▏      | 3212/10000 [00:31<01:06, 102.42it/s]

3200: train: 0.0571, val: 0.0577


 33%|███▎      | 3322/10000 [00:32<01:05, 102.25it/s]

3300: train: 0.0571, val: 0.0577


 34%|███▍      | 3421/10000 [00:33<01:04, 102.19it/s]

3400: train: 0.0571, val: 0.0577


 35%|███▌      | 3520/10000 [00:34<01:03, 102.18it/s]

3500: train: 0.0571, val: 0.0577


 36%|███▌      | 3619/10000 [00:35<01:02, 102.11it/s]

3600: train: 0.0571, val: 0.0577


 37%|███▋      | 3718/10000 [00:36<01:01, 101.89it/s]

3700: train: 0.0571, val: 0.0577


 38%|███▊      | 3817/10000 [00:37<01:00, 101.89it/s]

3800: train: 0.0571, val: 0.0576


 39%|███▉      | 3916/10000 [00:38<00:59, 101.84it/s]

3900: train: 0.0571, val: 0.0576


 40%|████      | 4015/10000 [00:39<00:58, 102.11it/s]

4000: train: 0.0571, val: 0.0576


 41%|████      | 4114/10000 [00:40<00:57, 102.07it/s]

4100: train: 0.0571, val: 0.0576


 42%|████▏     | 4213/10000 [00:41<00:56, 102.03it/s]

4200: train: 0.0571, val: 0.0576


 43%|████▎     | 4312/10000 [00:42<00:55, 102.22it/s]

4300: train: 0.0570, val: 0.0576


 44%|████▍     | 4422/10000 [00:43<00:54, 101.89it/s]

4400: train: 0.0570, val: 0.0576


 45%|████▌     | 4521/10000 [00:44<00:53, 102.11it/s]

4500: train: 0.0570, val: 0.0576


 46%|████▌     | 4620/10000 [00:45<00:52, 102.10it/s]

4600: train: 0.0570, val: 0.0576


 47%|████▋     | 4719/10000 [00:46<00:51, 102.11it/s]

4700: train: 0.0570, val: 0.0576


 48%|████▊     | 4818/10000 [00:47<00:51, 101.36it/s]

4800: train: 0.0570, val: 0.0576


 49%|████▉     | 4917/10000 [00:48<00:50, 101.24it/s]

4900: train: 0.0570, val: 0.0576


 50%|█████     | 5016/10000 [00:49<00:49, 101.46it/s]

5000: train: 0.0570, val: 0.0576


 51%|█████     | 5115/10000 [00:50<00:48, 101.58it/s]

5100: train: 0.0570, val: 0.0576


 52%|█████▏    | 5214/10000 [00:51<00:47, 101.68it/s]

5200: train: 0.0570, val: 0.0576


 53%|█████▎    | 5313/10000 [00:52<00:46, 101.63it/s]

5300: train: 0.0570, val: 0.0576


 54%|█████▍    | 5412/10000 [00:53<00:44, 102.07it/s]

5400: train: 0.0570, val: 0.0576


 55%|█████▌    | 5511/10000 [00:53<00:44, 101.66it/s]

5500: train: 0.0570, val: 0.0576


 56%|█████▌    | 5621/10000 [00:55<00:43, 101.80it/s]

5600: train: 0.0570, val: 0.0576


 57%|█████▋    | 5720/10000 [00:56<00:42, 101.65it/s]

5700: train: 0.0570, val: 0.0576


 58%|█████▊    | 5819/10000 [00:57<00:41, 100.97it/s]

5800: train: 0.0570, val: 0.0576


 59%|█████▉    | 5918/10000 [00:58<00:40, 99.96it/s] 

5900: train: 0.0570, val: 0.0576


 60%|██████    | 6017/10000 [00:58<00:39, 101.66it/s]

6000: train: 0.0570, val: 0.0576


 61%|██████    | 6116/10000 [00:59<00:38, 101.73it/s]

6100: train: 0.0570, val: 0.0576


 62%|██████▏   | 6215/10000 [01:00<00:37, 101.78it/s]

6200: train: 0.0570, val: 0.0576


 63%|██████▎   | 6314/10000 [01:01<00:36, 101.56it/s]

6300: train: 0.0570, val: 0.0576


 64%|██████▍   | 6413/10000 [01:02<00:35, 101.37it/s]

6400: train: 0.0570, val: 0.0576


 65%|██████▌   | 6512/10000 [01:03<00:34, 101.75it/s]

6500: train: 0.0570, val: 0.0576


 66%|██████▌   | 6611/10000 [01:04<00:33, 101.52it/s]

6600: train: 0.0570, val: 0.0576


 67%|██████▋   | 6721/10000 [01:05<00:32, 101.68it/s]

6700: train: 0.0570, val: 0.0576


 68%|██████▊   | 6820/10000 [01:06<00:31, 101.49it/s]

6800: train: 0.0570, val: 0.0576


 69%|██████▉   | 6919/10000 [01:07<00:30, 101.79it/s]

6900: train: 0.0570, val: 0.0576


 70%|███████   | 7018/10000 [01:08<00:29, 102.02it/s]

7000: train: 0.0570, val: 0.0576


 71%|███████   | 7117/10000 [01:09<00:28, 100.88it/s]

7100: train: 0.0570, val: 0.0576


 72%|███████▏  | 7216/10000 [01:10<00:27, 100.82it/s]

7200: train: 0.0570, val: 0.0576


 73%|███████▎  | 7315/10000 [01:11<00:26, 102.02it/s]

7300: train: 0.0570, val: 0.0576


 74%|███████▍  | 7414/10000 [01:12<00:25, 102.24it/s]

7400: train: 0.0570, val: 0.0576


 75%|███████▌  | 7513/10000 [01:13<00:24, 102.05it/s]

7500: train: 0.0570, val: 0.0576


 76%|███████▌  | 7612/10000 [01:14<00:23, 101.70it/s]

7600: train: 0.0570, val: 0.0576


 77%|███████▋  | 7711/10000 [01:15<00:22, 100.90it/s]

7700: train: 0.0570, val: 0.0576


 78%|███████▊  | 7821/10000 [01:16<00:21, 100.88it/s]

7800: train: 0.0570, val: 0.0576


 79%|███████▉  | 7920/10000 [01:17<00:20, 101.28it/s]

7900: train: 0.0570, val: 0.0576


 80%|████████  | 8019/10000 [01:18<00:19, 101.79it/s]

8000: train: 0.0570, val: 0.0576


 81%|████████  | 8118/10000 [01:19<00:18, 101.69it/s]

8100: train: 0.0570, val: 0.0576


 82%|████████▏ | 8217/10000 [01:20<00:17, 101.26it/s]

8200: train: 0.0570, val: 0.0576


 83%|████████▎ | 8316/10000 [01:21<00:16, 101.81it/s]

8300: train: 0.0570, val: 0.0576


 84%|████████▍ | 8415/10000 [01:22<00:15, 101.56it/s]

8400: train: 0.0570, val: 0.0576


 85%|████████▌ | 8514/10000 [01:23<00:14, 101.49it/s]

8500: train: 0.0570, val: 0.0576


 86%|████████▌ | 8613/10000 [01:24<00:13, 101.45it/s]

8600: train: 0.0570, val: 0.0576


 87%|████████▋ | 8712/10000 [01:25<00:12, 102.03it/s]

8700: train: 0.0570, val: 0.0576


 88%|████████▊ | 8811/10000 [01:26<00:11, 101.55it/s]

8800: train: 0.0570, val: 0.0576


 89%|████████▉ | 8921/10000 [01:27<00:10, 101.61it/s]

8900: train: 0.0570, val: 0.0576


 90%|█████████ | 9020/10000 [01:28<00:09, 101.89it/s]

9000: train: 0.0570, val: 0.0575


 91%|█████████ | 9119/10000 [01:29<00:08, 101.64it/s]

9100: train: 0.0570, val: 0.0576


 92%|█████████▏| 9218/10000 [01:30<00:07, 101.64it/s]

9200: train: 0.0570, val: 0.0576


 93%|█████████▎| 9317/10000 [01:31<00:06, 101.15it/s]

9300: train: 0.0570, val: 0.0575


 94%|█████████▍| 9416/10000 [01:32<00:05, 100.22it/s]

9400: train: 0.0570, val: 0.0575


 95%|█████████▌| 9515/10000 [01:33<00:04, 101.39it/s]

9500: train: 0.0570, val: 0.0576


 96%|█████████▌| 9614/10000 [01:34<00:03, 100.96it/s]

9600: train: 0.0570, val: 0.0575


 97%|█████████▋| 9713/10000 [01:35<00:02, 101.09it/s]

9700: train: 0.0570, val: 0.0575


 98%|█████████▊| 9812/10000 [01:36<00:01, 100.84it/s]

9800: train: 0.0570, val: 0.0575


 99%|█████████▉| 9911/10000 [01:37<00:00, 100.84it/s]

9900: train: 0.0570, val: 0.0575


100%|██████████| 10000/10000 [01:38<00:00, 101.82it/s]


In [14]:
torch.abs(model.w1.weight) / torch.sum(torch.abs(model.w1.weight), 1, keepdim=True)

tensor([[1.9564e-06, 2.1241e-06, 3.5336e-08,  ..., 1.7863e-06, 5.2180e-06,
         3.6598e-06],
        [1.6304e-06, 3.6274e-06, 6.9784e-06,  ..., 1.2331e-05, 9.9599e-06,
         4.8471e-06],
        [6.4633e-06, 7.1205e-06, 9.2582e-07,  ..., 7.6014e-06, 1.0359e-05,
         4.1163e-06],
        [1.2718e-05, 4.5825e-06, 9.0938e-07,  ..., 1.1724e-05, 5.2204e-06,
         7.0850e-06],
        [5.4611e-06, 1.0649e-05, 5.8644e-06,  ..., 1.0276e-05, 4.7437e-06,
         3.5321e-06],
        [2.3052e-06, 2.2436e-06, 1.1978e-06,  ..., 3.3766e-06, 1.2293e-05,
         1.9789e-06]], device='cuda:0', grad_fn=<DivBackward0>)

Apply the trained model on the L1 test predictions:

TODO: implement apply to test and call generate_submission with clip_eps 1e-5

Load test prediction of the same level 1 models and combine using trained l2 model:

In [27]:
test_x = np.mean([np.load(f'{cache_dir}/fold{f}/x_test.npy') for f in [0,1,2,3,4]], axis=0)

In [28]:
test_pred = model(torch.tensor(test_x, dtype=torch.float32).cuda()).cpu().detach().numpy()

In [29]:
# load one of the l1 model predictions as the base for l2 csv file:
import glob
config = Config()
pred1_path = sorted(glob.glob(f'/home/dmytro/ml/kaggle-rsna-2019/output/prediction/{config.models[-1]}/fold1/predictions_stage2/test_*.csv'))[0]
pred1 = pd.read_csv(pred1_path)
pred1_path

'/home/dmytro/ml/kaggle-rsna-2019/output/prediction/se_preresnext26b_400/fold1/predictions_stage2/test_h_flip.csv'

In [30]:
pred1 = pred1.sort_values(by=['study_id', 'slice_num'])

In [31]:
for i,c in enumerate(config.pred_columns):
    pred1[c] = test_pred[:, i]

In [32]:
pred1.to_csv('/home/dmytro/ml/kaggle-rsna-2019/output/pred_l2.csv', index=False)