In [13]:
# *** UPDATE THE FOLLOWING LINE! ***

In [1]:
#export
CELL_INDEX = 0  # {0, 1, 2, 3}

In [2]:
#export
CELL_TYPES = ['HEPG2', 'RPE', 'HUVEC', 'U2OS']
CELL_TYPE = CELL_TYPES[CELL_INDEX]

In [3]:
!python -m jupytools export -nb "14a_densenet_cell_type.ipynb" -o .
!mv densenet_cell_type.py "densenet121_{CELL_TYPE}.py"

Exported: 14a_densenet_cell_type.ipynb -> densenet_cell_type.py
1 notebook(s) exported into folder: .


In [4]:
%reload_ext autoreload
%autoreload 2

In [4]:
# https://www.kaggle.com/tanlikesmath/rcic-fastai-starter

In [5]:
#export
print(f'Fine-tuning on cell type: {CELL_TYPE}')

Fine-tuning on cell type: HEPG2


## Imports 

In [6]:
#export
from collections import defaultdict, OrderedDict
import json
import os
from os.path import dirname, join
from functools import reduce
from pdb import set_trace

import cv2 as cv
import jupytools
import jupytools.syspath
import numpy as np
import pandas as pd
import PIL.Image
import matplotlib.pyplot as plt

from catalyst.utils import get_one_hot
from imageio import imread
import pretrainedmodels
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from visdom import Visdom

jupytools.syspath.add(join(dirname(os.getcwd()), 'protein_project'))
jupytools.syspath.add('rxrx1-utils')
if jupytools.is_notebook():
    from tqdm import tqdm_notebook as tqdm
else:
    from tqdm import tqdm as tdqm
    
from basedir import ROOT, NUM_CLASSES
from dataset import build_stats_index

In [7]:
#export
torch.set_default_tensor_type(torch.FloatTensor)

## Dataset Reader

In [8]:
#export
from augmentation import JoinChannels, SwapChannels, Resize, ToFloat, Rescale
from augmentation import VerticalFlip, HorizontalFlip, PixelStatsNorm, composer
from augmentation import AugmentedImages, bernoulli

In [9]:
#export
default_open_fn = imread  # PIL.Image.open

In [10]:
#export
class RxRxImages(Dataset):
    def __init__(self, meta_df, img_dir, site=1, channels=(1, 2, 3, 4, 5, 6), 
                 open_image=default_open_fn, n_classes=NUM_CLASSES, train=True):
        
        self.records = meta_df.to_records(index=False)
        self.img_dir = img_dir
        self.site = site
        self.channels = channels
        self.n = len(self.records)
        self.open_image = open_image
        self.n_classes = n_classes
        self.train = train
        
    def _get_image_path(self, index, channel):
        r = self.records[index]
        exp, plate, well = r.experiment, r.plate, r.well
        subdir = 'train' if self.train else 'test'
        path = f'{self.img_dir}/{subdir}/{exp}/Plate{plate}/{well}_s{self.site}_w{channel}.png'
        return path
    
    def __getitem__(self, index):
        paths = [self._get_image_path(index, ch) for ch in self.channels]
        images = [self.open_image(p) for p in paths]

        try:
            img = np.stack(images)
        except (TypeError, ValueError) as e:
            print(f'Warning: cannot concatenate images! {e.__class__.__name__}: {e}')
            for filename, image in zip(paths, images):
                print(f'\tpath={filename}, size={image.size}')
            index = (index + 1) % len(self)
            print(f'Skipping instance {index} and trying another one...')
            return self[index]
        finally:
            for image in images:
                if hasattr(image, 'close'):
                    image.close()
            
        img = img.astype(np.float32)
        img = img.transpose(1, 2, 0)
        r = self.records[index]
        if self.train:
            sirna = r.sirna
            target = int(sirna)
            onehot = get_one_hot(target, num_classes=self.n_classes)
            return {'features': img, 'targets': target, 
                    'targets_one_hot': onehot, 'id_code': r.id_code,
                    'site': self.site}
        else:
            id_code = r.id_code
            return {'features': img, 'id_code': id_code, 'site': self.site}
    
    def __len__(self): 
        return self.n

In [11]:
#export
class TwoSiteImages(Dataset):
    def __init__(self, ds1, ds2, swap=0.0):
        assert len(ds1) == len(ds2)
        self.ds1, self.ds2 = ds1, ds2
        self.swap = swap
        self.size = len(ds1)
        
    def __getitem__(self, index):
        s1, s2 = self.ds1[index], self.ds2[index]
        if self.swap and bernoulli(self.swap) == 1:
            s1, s2 = s2, s1
        return {'site1': s1, 'site2': s2}
    
    def __len__(self):
        return self.size

## Pipeline Preparation

In [12]:
#export
from split import DataFrameSplit

def get_cell_type(x): return x.split('-')[0]

class HoldoutSplit(DataFrameSplit):
    def apply(self, data_frame):
        experiments = data_frame.experiment.unique()
        cell_types = defaultdict(list)
        for exp in experiments:
            cell_type, _ = exp.split('-')
            cell_types[cell_type].append(exp)
        training, holdout = [], []
        for cell_type, subset in cell_types.items():
            valid = np.random.choice(subset)
            subset.remove(valid)
            training.extend(subset)
            holdout.append(valid)
        training_df = data_frame[data_frame.experiment.isin(training)].copy()
        valid_df = data_frame[data_frame.experiment.isin(holdout)].copy()
        return training_df, valid_df

sub_df = pd.read_csv(ROOT/'train.csv')
tst_df = pd.read_csv(ROOT/'test.csv')
stats = build_stats_index(ROOT/'pixel_stats.csv')

splitter = HoldoutSplit()
cell_df = sub_df.loc[sub_df.experiment.map(get_cell_type) == CELL_TYPE].copy()
trn_df, val_df = splitter(cell_df)
print(trn_df.shape, val_df.shape, len(val_df)/len(trn_df))

(6642, 5) (1108, 5) 0.16681722372779284


In [13]:
#export
sz = 512
trn_ds = TwoSiteImages(
    ds1=AugmentedImages(ds=RxRxImages(trn_df, ROOT, site=1), tr=composer([
        HorizontalFlip(p=0.1),
        VerticalFlip(p=0.1),
        PixelStatsNorm(stats, channels_first=False),
    ], resize=sz, rescale=False)),
    ds2=AugmentedImages(ds=RxRxImages(trn_df, ROOT, site=2), tr=composer([
        HorizontalFlip(p=0.1),
        VerticalFlip(p=0.1),
        PixelStatsNorm(stats, channels_first=False),
    ], resize=sz, rescale=False))
)
val_ds = TwoSiteImages(
    ds1=AugmentedImages(ds=RxRxImages(val_df, ROOT, site=1), tr=composer([
        PixelStatsNorm(stats, channels_first=False)
    ],resize=sz, rescale=False)),
    ds2=AugmentedImages(ds=RxRxImages(val_df, ROOT, site=2), tr=composer([
        PixelStatsNorm(stats, channels_first=False)
    ],resize=sz, rescale=False))
)

In [14]:
tst_ds = TwoSiteImages(
    ds1=AugmentedImages(ds=RxRxImages(tst_df, ROOT, site=1, train=False), tr=composer([
        PixelStatsNorm(stats, channels_first=False)
    ], resize=sz, rescale=False)),
    ds2=AugmentedImages(ds=RxRxImages(tst_df, ROOT, site=2, train=False), tr=composer([
        PixelStatsNorm(stats, channels_first=False)
    ], resize=sz, rescale=False))
)

In [15]:
#export
def new_loader(ds, bs, drop_last=False, shuffle=True, num_workers=12):
    return DataLoader(ds, batch_size=bs, drop_last=drop_last, 
                      shuffle=shuffle, num_workers=num_workers)

## Model

In [16]:
#export
def densenet(name='densenet121', n_classes=NUM_CLASSES):
    model_fn = pretrainedmodels.__dict__[name]
    model = model_fn(num_classes=1000, pretrained='imagenet')
    new_conv = nn.Conv2d(6, 64, 7, 2, 3, bias=False)
    conv0 = model.features.conv0.weight
    with torch.no_grad():
        new_conv.weight[:, :] = torch.stack([torch.mean(conv0, 1)]*6, dim=1)
    model.features.conv0 = new_conv
    return model

In [17]:
#export
from catalyst.contrib.modules import GlobalConcatPool2d
class DenseNet_TwoSites(nn.Module):
    def __init__(self, name, n_classes=NUM_CLASSES):
        super().__init__()
        
        base = densenet(name=name, n_classes=n_classes)
        feat_dim = base.last_linear.in_features
        
        self.base = base 
        self.pool = GlobalConcatPool2d()
        self.head = nn.Sequential(
            nn.Linear(feat_dim * 2, feat_dim * 2),
            nn.BatchNorm1d(feat_dim * 2),
            nn.ReLU(inplace=True),
            nn.Dropout(0.25),
            nn.Linear(feat_dim * 2, n_classes)
        )
        
    def forward(self, s1, s2):
        f1 = self.base.features(s1)
        f2 = self.base.features(s2)
        f_merged = self.pool(f1 + f2)
        out = self.head(f_merged.squeeze())
        return out

In [18]:
#export
def freeze_all(model):
    for name, child in model.named_children():
        print('Freezing layer:', name)
        for param in child.parameters():
            param.requires_grad = False

In [19]:
#export
def unfreeze_all(model):
    for name, child in model.named_children():
        print('Un-freezing layer:', name)
        for param in child.parameters():
            param.requires_grad = True

In [20]:
#export
def unfreeze_layers(model, names):
    for name, child in model.named_children():
        if name not in names:
            continue
        print('Un-freezing layer:', name)
        for param in child.parameters():
            param.requires_grad = True

## Train

In [20]:
#export
path = 'densenet121_long_training/train.29.pth'
model = DenseNet_TwoSites('densenet121')
state = torch.load(path, map_location=lambda loc, storage: loc)
model.load_state_dict(state['model'])
unfreeze_all(model)

Un-freezing layer: base
Un-freezing layer: pool
Un-freezing layer: head


In [30]:
#export
from visdom import Visdom

In [31]:
#export
class RollingLoss:
    def __init__(self, smooth=0.98):
        self.smooth = smooth
        self.prev = 0
    def __call__(self, curr, batch_no):
        a = self.smooth
        avg_loss = a*self.prev + (1 - a)*curr
        debias_loss = avg_loss/(1 - a**batch_no)
        self.prev = avg_loss
        return debias_loss

In [32]:
#export
def create_loaders(batch_size):
    trn_dl = new_loader(trn_ds, bs=batch_size, shuffle=True)
    val_dl = new_loader(val_ds, bs=batch_size, shuffle=False)
    return OrderedDict([('train', trn_dl), ('valid', val_dl)])

In [33]:
#export
class Checkpoint:
    def __init__(self, output_dir):
        if os.path.exists(output_dir):
            print('Warning! Output folder already exists.')
        os.makedirs(output_dir, exist_ok=True)
        self.output_dir = output_dir
    
    def __call__(self, epoch, **objects):
        filename = os.path.join(self.output_dir, f'train.{epoch}.pth')
        checkpoint = {}
        for k, v in objects.items():
            if hasattr(v, 'state_dict'):
                v = v.state_dict()
            checkpoint[k] = v
        torch.save(checkpoint, filename)
        return filename

In [34]:
#export
from torch.optim.lr_scheduler import _LRScheduler
class CosineDecay(_LRScheduler):
    def __init__(self, optimizer, total_steps,
                 linear_start=0,
                 linear_frac=0.1, min_lr=1e-6,
                 last_epoch=-1):
        
        self.optimizer = optimizer
        self.total_steps = total_steps
        self.linear_start = linear_start
        self.linear_frac = linear_frac
        self.min_lr = min_lr
        self.linear_steps = total_steps * linear_frac
        self.cosine_steps = total_steps - self.linear_steps
        super().__init__(optimizer, last_epoch)
        
    def get_lr(self):
        step = self.last_epoch
        if step <= self.linear_steps:
            b = self.linear_start
            return [(step/self.linear_steps) * (base_lr - b) + b for base_lr in self.base_lrs]
        else:
            t = self.last_epoch - self.linear_steps
            T = self.cosine_steps
            return [self.min_lr + (base_lr - self.min_lr)*(1 + np.cos(t*np.pi/T))/2
                    for base_lr in self.base_lrs]

In [35]:
#export
loss_fn = nn.CrossEntropyLoss()
device = torch.device('cuda:0')

In [None]:
#export
from lookahead import Lookahead

epochs = 30
patience = 10
base_lr = 3e-4

opt = Lookahead(torch.optim.AdamW(params=[
    {'params': model.head.parameters(),                      'lr': base_lr     },
    {'params': model.base.features.denseblock4.parameters(), 'lr': base_lr / 3 },
    {'params': model.base.features.denseblock3.parameters(), 'lr': base_lr / 5 },
    {'params': model.base.features.denseblock2.parameters(), 'lr': base_lr / 5 },
    {'params': model.base.features.denseblock1.parameters(), 'lr': base_lr / 10},
]))
model = model.to(device)
rolling_loss = dict(train=RollingLoss(), valid=RollingLoss())
steps = dict(train=0, valid=0)

trials = 0
best_metric = -np.inf
history = []
stop = False

vis = Visdom(server='0.0.0.0', port=9090,
             username=os.environ['VISDOM_USERNAME'],
             password=os.environ['VISDOM_PASSWORD'])

loaders = create_loaders(batch_size=7)

sched = CosineDecay(
    opt,
    total_steps=len(loaders['train']) * epochs,
    linear_start=base_lr / 100,
    linear_frac=0.2, 
    min_lr=base_lr / 300)

checkpoint = Checkpoint(f'densenet121_{CELL_TYPE}')

unfreeze_all(model)

log_freq = 25

for epoch in range(1, epochs+1):
    print(f'Epoch [{epoch}/{epochs}]')
    
    iteration = dict(epoch=epoch, train_loss=list(), valid_loss=list())
    
    for name, loader in loaders.items():
        is_training = name == 'train'
        count = 0
        metric = 0.0
        
        with torch.set_grad_enabled(is_training):
            for batch_no, batch in enumerate(loader):
                steps[name] += 1
                opt.zero_grad()

                y = batch['site1']['targets'].to(device)
                
                out = model(
                    batch['site1']['features'].to(device),
                    batch['site2']['features'].to(device)
                )
                
                if is_training:
                    loss = loss_fn(out, y)
                    loss.backward()
                    opt.step()
                    sched.step()
                    
                    curr_lr = opt.param_groups[0]['lr']
                    vis.line(X=[steps[name]], Y=[curr_lr], win='lr', name='lr', update='append')    
                
                avg_loss = rolling_loss[name](loss.item(), steps[name])
                iteration[f'{name}_loss'].append(avg_loss)
                y_pred = out.softmax(dim=1).argmax(dim=1)
                acc = (y_pred == y).float().mean().item()
                metric += acc
                count += len(batch)
                if batch_no % log_freq == 0:
                    vis.line(X=[steps[name]], Y=[avg_loss], name=f'{name}_loss', 
                             win=f'{name}_loss', update='append', 
                             opts=dict(title=f'Running Loss [{name}]'))
        
        metric /= count
        iteration[f'{name}_acc'] = metric
        vis.line(X=[epoch], Y=[avg_loss], name=f'{name}', win='avg_loss',
                 update='append', opts=dict(title='Average Epoch Loss'))
        vis.line(X=[epoch], Y=[metric], name=f'{name}', win='accuracy', 
                 update='append', opts=dict(title=f'Accuracy'))
        
        last_loss = iteration[f'{name}_loss'][-1]
        
        print(f'{name} metrics: accuracy={metric:2.3%}, loss={last_loss:.4f}')
          
        if is_training:
            pass
          
        else:
            if metric > best_metric:
                trials = 0
                best_metric = metric
                print('Score improved!')
                checkpoint(epoch, model=model, opt=opt)

            else:
                trials += 1
                if trials >= patience:
                    stop = True
                    break
    
    history.append(iteration)
    
    print('-' * 80)
    
    if stop:
        print(f'Early stopping on epoch: {epoch}')
        break

torch.save(history, f'{checkpoint.output_dir}/history.pth')

## Test

### Multi-Model

In [21]:
device = torch.device('cuda:0')

In [22]:
from operator import itemgetter

In [23]:
cell_types = tst_df.experiment.map(lambda x: x.split('-')[0]).unique()

In [25]:
models = {}
for ct in cell_types:
    print(f'Loading fine-tuned model for cell type: {ct}')
    model = DenseNet_TwoSites('densenet121')
    model = model.to(device)
    dirname = f'densenet121_{ct}'
    weights = [(fn, int(fn.split('.')[1])) for fn in os.listdir(dirname) if fn != 'history.pth']
    if not weights:
        print('Warning: weights not found! Skipping cell type...')
        continue
    print(f'Loading file: {best}')
    [best, _] = sorted(weights, key=itemgetter(1), reverse=True)[0]
    state = torch.load(os.path.join(dirname, best), map_location=lambda loc, storage: loc)
    model.load_state_dict(state['model'])
    freeze_all(model)
    _ = model.eval()
    models[ct] = model

Loading fine-tuned model for cell type: HEPG2
Loading file: train.13.pth
Freezing layer: base
Freezing layer: pool
Freezing layer: head
Loading fine-tuned model for cell type: HUVEC
Loading file: train.17.pth
Freezing layer: base
Freezing layer: pool
Freezing layer: head
Loading fine-tuned model for cell type: RPE
Loading file: train.10.pth
Freezing layer: base
Freezing layer: pool
Freezing layer: head
Loading fine-tuned model for cell type: U2OS
Loading file: train.10.pth
Freezing layer: base
Freezing layer: pool
Freezing layer: head


In [34]:
with torch.no_grad():
    preds = {}
    probs = {}
    for cell_type, model in models.items():
        print(f'Running model for cell type: {cell_type}')
        ct_df = tst_df[tst_df.id_code.str.startswith(cell_type)].copy()
        print(f'Number of test records for this type: {len(ct_df)}')
        tst_ds = TwoSiteImages(
            ds1=AugmentedImages(ds=RxRxImages(ct_df, ROOT, site=1, train=False), tr=composer([
                PixelStatsNorm(stats, channels_first=False)
            ], resize=sz, rescale=False)),
            ds2=AugmentedImages(ds=RxRxImages(ct_df, ROOT, site=2, train=False), tr=composer([
                PixelStatsNorm(stats, channels_first=False)
            ], resize=sz, rescale=False))
        )
        test_dl = new_loader(tst_ds, shuffle=False, bs=64)
        for batch in tqdm(test_dl):
            s1 = batch['site1']['features']
            s2 = batch['site2']['features']
            if s1.shape[0] == 1:
                print('Warning! Batch size 1, duplicating to deal with BN')
                s1 = torch.cat([s1, s1])
                s2 = torch.cat([s2, s2])
                out = model(s1.to(device), s2.to(device))
                out = out[0].unsqueeze(0)
            else:
                out = model(s1.to(device), s2.to(device))
            y_prob = out.softmax(dim=-1).cpu().numpy()
            y_pred = y_prob.argmax(axis=-1)
            probs.update(dict(zip(batch['site1']['id_code'], y_prob)))
            preds.update(dict(zip(batch['site1']['id_code'], y_pred)))

Running model for cell type: HEPG2
Number of test records for this type: 4429


HBox(children=(IntProgress(value=0, max=70), HTML(value='')))

Running model for cell type: HUVEC
Number of test records for this type: 8846


HBox(children=(IntProgress(value=0, max=139), HTML(value='')))

Running model for cell type: RPE
Number of test records for this type: 4417


HBox(children=(IntProgress(value=0, max=70), HTML(value='')))

Running model for cell type: U2OS
Number of test records for this type: 2205


HBox(children=(IntProgress(value=0, max=35), HTML(value='')))

In [35]:
preds_df = pd.DataFrame([
    {'id_code': id_code, 'sirna': sirna} 
    for id_code, sirna in preds.items()])
preds_df.head(5)

Unnamed: 0,id_code,sirna
0,HEPG2-08_1_B03,855
1,HEPG2-08_1_B04,7
2,HEPG2-08_1_B05,836
3,HEPG2-08_1_B06,249
4,HEPG2-08_1_B07,585


In [36]:
preds_df.to_csv('cell_typed.csv', index=False)

### Leak

In [37]:
trn_csv = pd.read_csv(ROOT/'train.csv')
tst_csv = pd.read_csv(ROOT/'test.csv')

plate_groups = np.zeros((1108,4), int)
for sirna in range(1108):
    grp = trn_csv.loc[trn_csv.sirna==sirna,:].plate.value_counts().index.values
    assert len(grp) == 3
    plate_groups[sirna,0:3] = grp
    plate_groups[sirna,3] = 10 - grp.sum()

subfile = 'cell_typed'

sub = pd.read_csv(f'{subfile}.csv')

all_test_exp = tst_csv.experiment.unique()

group_plate_probs = np.zeros((len(all_test_exp),4))

for idx in range(len(all_test_exp)):
    preds = sub.loc[tst_csv.experiment == all_test_exp[idx],'sirna'].values
    pp_mult = np.zeros((len(preds),1108))
    pp_mult[range(len(preds)),preds] = 1
    
    sub_test = tst_csv.loc[tst_csv.experiment == all_test_exp[idx],:]
    assert len(pp_mult) == len(sub_test)
    
    for j in range(4):
        mask = np.repeat(plate_groups[np.newaxis, :, j], len(pp_mult), axis=0) == \
               np.repeat(sub_test.plate.values[:, np.newaxis], 1108, axis=1)
        
        group_plate_probs[idx,j] = np.array(pp_mult)[mask].sum()/len(pp_mult)

In [38]:
pd.DataFrame(group_plate_probs, index=all_test_exp)

Unnamed: 0,0,1,2,3
HEPG2-08,0.130081,0.094851,0.132791,0.642276
HEPG2-09,0.158845,0.499097,0.177798,0.16426
HEPG2-10,0.710289,0.09657,0.104693,0.088448
HEPG2-11,0.75859,0.076854,0.085895,0.078662
HUVEC-17,0.777978,0.072202,0.069495,0.080325
HUVEC-18,0.66757,0.122855,0.102078,0.107498
HUVEC-19,0.117329,0.091155,0.67509,0.116426
HUVEC-20,0.032491,0.030686,0.905235,0.031588
HUVEC-21,0.123646,0.109206,0.101986,0.665162
HUVEC-22,0.793321,0.064982,0.07852,0.063177


In [39]:
exp_to_group = group_plate_probs.argmax(1)
print(exp_to_group)

[3 1 0 0 0 0 2 2 3 0 0 3 1 0 0 0 2 3]


In [49]:
stacked = np.row_stack(list(probs.values()))

In [50]:
stacked.shape

(19897, 1108)

In [51]:
def select_plate_group(pp_mult, idx):
    sub_test = tst_csv.loc[tst_csv.experiment == all_test_exp[idx],:]
    assert len(pp_mult) == len(sub_test)
    mask = np.repeat(plate_groups[np.newaxis, :, exp_to_group[idx]], len(pp_mult), axis=0) != \
           np.repeat(sub_test.plate.values[:, np.newaxis], 1108, axis=1)
    pp_mult[mask] = 0
    return pp_mult

In [52]:
sub = sub.set_index('id_code')

In [53]:
for idx in range(len(all_test_exp)):
    indexes = tst_csv.experiment == all_test_exp[idx]
    preds = stacked[indexes, :].copy()
    preds = select_plate_group(preds, idx)
    sub.loc[tst_csv.id_code[indexes], 'sirna'] = preds.argmax(1)

In [54]:
sub = sub.reset_index()

In [55]:
(sub.sirna == pd.read_csv(f'{subfile}.csv').sirna).mean()

0.667487560938835

In [56]:
from IPython.display import FileLink
sub.to_csv(f'{subfile}_leak.csv', index=False, columns=['id_code', 'sirna'])
FileLink(f'{subfile}_leak.csv')

### Single Model

In [None]:
device = torch.device('cuda:0')
model = DenseNet_TwoSites('densenet121')
model = model.to(device)
state = torch.load('densenet121_15_cw/train.14.pth', map_location=lambda loc, storage: loc)
model.load_state_dict(state)
freeze_all(model)
_ = model.eval()

In [None]:
with torch.no_grad():
    test_dl = new_loader(tst_ds, shuffle=False, bs=64)
    preds = {}
    for batch in tqdm(test_dl):
        s1 = batch['site1']['features']
        s2 = batch['site2']['features']
        out = model(s1.to(device), s2.to(device))
        y_hat = out.argmax(dim=-1).cpu().numpy()
        preds.update(dict(zip(batch['site1']['id_code'], y_hat.astype(int))))

In [None]:
preds_df = pd.DataFrame([
    {'id_code': id_code, 'sirna': sirna} 
    for id_code, sirna in preds.items()])
preds_df.head(5)

In [None]:
len(preds_df)

In [None]:
filename = 'densenet121_two_way_512.csv'
preds_df.to_csv(filename, index=False, columns=['id_code', 'sirna'])
from IPython.display import FileLink
FileLink(filename)