# fastai starter

Many thanks to [Basic EDA + Data Visualization 🧠 ](https://www.kaggle.com/marcovasquez/basic-eda-data-visualization) for the code to load the data.

## Imports

In [None]:
from torchvision.models import *
!pip install pretrainedmodels
import pretrainedmodels as pm
from fastai import *
from fastai.vision import *
from fastai.vision.models import *
from fastai.vision.learner import model_meta
import fastai
from utils import *
import sys
import torch
fastai.__version__

In [None]:
import glob, pylab, pandas as pd
import pydicom, numpy as np
from os import listdir
from os.path import isfile, join
import matplotlib.pylab as plt
import seaborn as sns
from tqdm import tqdm_notebook as tqdm

In [None]:
from scipy import ndimage
import scipy.misc
from skimage import morphology
from skimage.segmentation import slic
from skimage import measure
from skimage.transform import resize, warp
from skimage import exposure
# Some machine learning as a treat
from sklearn.preprocessing import MaxAbsScaler
from sklearn.linear_model import LogisticRegressionCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score, StratifiedKFold

## Load and preprocess data

We will transform the data into a nice space separated label format.

In [None]:
# copy pretrained weights to the folder fastai will search by default
Path('/tmp/.cache/torch/checkpoints/').mkdir(exist_ok=True, parents=True)
model_path = '/tmp/.cache/torch/checkpoints/efficientNet.pth'
!cp ../input/efficientnet*/efficientNet_*.pth {model_path}

In [None]:
package_path = '../input/efficientnet-pytorch/efficientnet-pytorch/EfficientNet-PyTorch-master'
sys.path.append(package_path)

from efficientnet_pytorch import EfficientNet

In [None]:
# FastAI adapators to retrain our model without lossing its old head ;)
def EfficientNetB0(pretrained=True):
    """Constructs a EfficientNet model for FastAI.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = EfficientNet.from_name('efficientnet-b3', override_params={'num_classes':6})

    if pretrained:
        model_state = torch.load("../input/efficientnet-pytorch/efficientnet-b3-c8376fa2.pth")
        # load original weights apart from its head
        if '_fc.weight' in model_state.keys():
            model_state.pop('_fc.weight')
            model_state.pop('_fc.bias')
            res = model.load_state_dict(model_state, strict=False)
            assert str(res.missing_keys) == str(['_fc.weight', '_fc.bias']), 'issue loading pretrained weights'
        else:
            # A basic remapping is required
            from collections import OrderedDict
            mapping = { i:o for i,o in zip(model_state.keys(), model.state_dict().keys()) }
            mapped_model_state = OrderedDict([
                (mapping[k], v) for k,v in model_state.items() if not mapping[k].startswith('_fc')
            ])
            res = model.load_state_dict(mapped_model_state, strict=False)
            print(res)
    return model
# create model
model = EfficientNetB0(pretrained=True)
# print model structure (hidden)

In [None]:
DATA = Path("../input/rsna-intracranial-hemorrhage-detection")

In [None]:
df= pd.read_csv('../input/rsna-intracranial-hemorrhage-detection/stage_1_train.csv')

In [None]:
print(df.shape) # shape of original dataset
df=df.drop(df.index[df[df.ID.str.startswith('ID_6431af929')].index.values])
print(df.shape) # shape after deleting the corrupt
df.index=np.arange(len(df))

In [None]:
newtable=df.copy()
new = newtable["ID"].str.split("_", n = 1, expand = True)
newX = new[1].str.split("_", n = 1, expand = True)
newX[1]
newtable['Image_ID'] = newX[0]
newtable['Sub_type'] = newX[1]

In [None]:
newtable

In [None]:
image_ids = newtable.Image_ID.unique()
labels = ["" for _ in range(len(image_ids))]
new_df = pd.DataFrame(np.array([image_ids, labels]).transpose(), columns=["id", "labels"])

In [None]:
lbls = {i : "" for i in image_ids}

In [None]:
new = newtable[newtable.Label == 1]

In [None]:
new.Sub_type.value_counts()

In [None]:
newtable = newtable[newtable.Label == 1]
#newtable = newtable[newtable.Sub_type != "any"]

i = 0
for name, group in newtable.groupby("Image_ID"):
    lbls[name] = " ".join(group.Sub_type)
    if i % 10000 == 0: print(i)
    i += 1

In [None]:
new_df = pd.DataFrame(np.array([list(lbls.keys()), list(lbls.values())]).transpose(), columns=["id", "labels"])

In [None]:
del lbls
del newtable
del newX
del new
gc.collect()

In [None]:
new_df

# fastai Dataset

Thanks to this kernel for the code to apply the windowing: [EDA: View dicom images with correct windowing](https://www.kaggle.com/omission/eda-view-dicom-images-with-correct-windowing)

In [None]:
#https://www.kaggle.com/omission/eda-view-dicom-images-with-correct-windowing

def window_image(img, window_center,window_width, intercept, slope):

    img = (img*slope +intercept)
    img_min = window_center - window_width/2
    img_max = window_center + window_width/2
    img[img<img_min] = img_min
    img[img>img_max] = img_max
    return img

def get_first_of_dicom_field_as_int(x):
    #get x[0] as in int is x is a 'pydicom.multival.MultiValue', otherwise get int(x)
    if type(x) == pydicom.multival.MultiValue:
        return int(x[0])
    else:
        return int(x)

def get_windowing(data):
    dicom_fields = [data[('0028','1050')].value, #window center
                    data[('0028','1051')].value, #window width
                    data[('0028','1052')].value, #intercept
                    data[('0028','1053')].value] #slope
    return [get_first_of_dicom_field_as_int(x) for x in dicom_fields]

In [None]:
def image_resample(image, dicom_header, new_spacing=[1,1]):
    # Code from https://www.raddq.com/dicom-processing-segmentation-visualization-in-python/
    # Adapted to work for pixels.
    spacing = map(float, dicom_header.PixelSpacing)
    spacing = np.array(list(spacing))
    resize_factor = spacing / new_spacing
    new_real_shape = image.shape * resize_factor
    new_shape = np.round(new_real_shape)
    real_resize_factor = new_shape / image.shape
    new_spacing = spacing / real_resize_factor
    
    image = scipy.ndimage.interpolation.zoom(image, real_resize_factor)
    
    return image

In [None]:
new_df.id = "ID_" + new_df.id + ".dcm"

In [None]:
def image_crop(image):
    # Based on this stack overflow post: https://stackoverflow.com/questions/26310873/how-do-i-crop-an-image-on-a-white-background-with-python
    mask = image == 0

    # Find the bounding box of those pixels
    coords = np.array(np.nonzero(~mask))
    top_left = np.min(coords, axis=1)
    bottom_right = np.max(coords, axis=1)

    out = image[top_left[0]:bottom_right[0],
                top_left[1]:bottom_right[1]]
    
    return out

In [None]:
def sigmoid_window(img, window_center, window_width, U=1.0, eps=(1.0 / 255.0)):
    _, _, intercept, slope = get_windowing(img)
    img = img.pixel_array * slope + intercept
    ue = np.log((U / eps) - 1.0)
    W = (2 / window_width) * ue
    b = ((-2 * window_center) / window_width) * ue
    z = W * img + b
    img = U / (1 + np.power(np.e, -1.0 * z))
    img = (img - np.min(img)) / (np.max(img) - np.min(img))
    return img

def map_to_gradient_sig(grey_img):
    rainbow_img = np.zeros((grey_img.shape[0], grey_img.shape[1], 3))
    rainbow_img[:, :, 0] = np.clip(4*grey_img - 2, 0, 1.0) * (grey_img > 0.01) * (grey_img <= 1.0)
    rainbow_img[:, :, 1] =  np.clip(4*grey_img * (grey_img <=0.75), 0,1) + np.clip((-4*grey_img + 4) * (grey_img > 0.75), 0, 1)
    rainbow_img[:, :, 2] = np.clip(-4*grey_img + 2, 0, 1.0) * (grey_img > 0.01) * (grey_img <= 1.0)
    return rainbow_img

def sigmoid_rainbow_bsb_window(img):
    brain_img = sigmoid_window(img, 40, 80)
    subdural_img = sigmoid_window(img, 80, 200)
    bone_img = sigmoid_window(img, 600, 2000)
    combo = (brain_img*0.35 + subdural_img*0.5 + bone_img*0.15)
    combo_norm = (combo - np.min(combo)) / (np.max(combo) - np.min(combo))
    return map_to_gradient_sig(combo_norm)

In [None]:
def new_open_image(path, div=True, convert_mode=None, after_open=None):
    dcm = pydicom.dcmread(str(path))
    im=sigmoid_rainbow_bsb_window(dcm)
    #window_center, window_width, intercept, slope = get_windowing(dcm)
    #im = window_image(dcm.pixel_array,window_center, window_width, intercept, slope)
    #img = image_resample(img,dcm,new_spacing=[1,1])
    #im = np.stack((img,)*3, axis=-1)
    im -= im.min()
    im_max = im.max()
    if im_max != 0: im = im / im.max()
    x = Image(pil2tensor(im, dtype=np.float32))
    #if div: x.div_(2048)  # ??
    return x
vision.data.open_image = new_open_image

In [None]:
df_train = pd.concat([new_df[new_df.labels == ""][:], new_df[new_df.labels != ""][:]])

In [None]:
def se_resnext50_32x4d(pretrained=True):
    pretrained = 'imagenet' if pretrained else None
    model = pm.xception(pretrained=pretrained)
    return nn.Sequential(*list(model.children()))

In [None]:
class AccumulateOptimWrapper(OptimWrapper):
    def step(self):           pass
    def zero_grad(self):      pass
    def real_step(self):      super().step()
    def real_zero_grad(self): super().zero_grad()
        
def acc_create_opt(self, lr:Floats, wd:Floats=0.):
        "Create optimizer with `lr` learning rate and `wd` weight decay."
        self.opt = AccumulateOptimWrapper.create(self.opt_func, lr, self.layer_groups,
                                         wd=wd, true_wd=self.true_wd, bn_wd=self.bn_wd)
Learner.create_opt = acc_create_opt   

@dataclass
class AccumulateStep(LearnerCallback):
    """
    Does accumlated step every nth step by accumulating gradients
    """
    def __init__(self, learn:Learner, n_step:int = 1):
        super().__init__(learn)
        self.n_step = n_step

    def on_epoch_begin(self, **kwargs):
        "init samples and batches, change optimizer"
        self.acc_batches = 0
        
    def on_batch_begin(self, last_input, last_target, **kwargs):
        "accumulate samples and batches"
        self.acc_batches += 1
    def on_backward_end(self, **kwargs):
        "step if number of desired batches accumulated, reset samples"
        if (self.acc_batches % self.n_step) == self.n_step - 1:
            for p in (self.learn.model.parameters()):
                if p.requires_grad: p.grad.div_(self.acc_batches)
    
            self.learn.opt.real_step()
            self.learn.opt.real_zero_grad()
            self.acc_batches = 0
    
    def on_epoch_end(self, **kwargs):
        "step the rest of the accumulated grads"
        if self.acc_batches > 0:
            for p in (self.learn.model.parameters()):
                if p.requires_grad: p.grad.div_(self.acc_batches)
            self.learn.opt.real_step()
            self.learn.opt.real_zero_grad()
            self.acc_batches = 0
def set_BN_momentum(model,momentum=0.1*16/64):
    for i, (name, layer) in enumerate(model.named_modules()):
        if isinstance(layer, nn.BatchNorm2d) or isinstance(layer, nn.BatchNorm1d):
            layer.momentum = momentum

In [None]:
from fastai.callbacks import *
import cv2
bs = 32
im_list = ImageList.from_df(df_train, path=DATA/"stage_1_train_images")
test_fnames = pd.DataFrame("ID_" + pd.read_csv(DATA/"stage_1_sample_submission.csv")["ID"].str.split("_", n=2, expand = True)[1].unique() + ".dcm")
test_im_list = ImageList.from_df(test_fnames, path=DATA/"stage_1_test_images")

tfms = get_transforms(do_flip=True, 
             flip_vert=False, 
             max_rotate=10)
info=[]
skf=StratifiedKFold(n_splits=5,shuffle=True,random_state=2019)
for train_id,val_id in skf.split(df_train.index,df_train['labels']):
    info.append([train_id,val_id])
i=0
for train_id,val_id in skf.split(df_train.index,df_train['labels']):
        data = (im_list.split_by_idxs(info[3][0],info[3][1])
               .label_from_df(label_delim=" ")
               .transform(tfms, size=332)
               .add_test(test_im_list)
               .databunch(bs=bs)
               .normalize(imagenet_stats))
        f="sub"+str(i)+'.csv'
        print(f)
        learn = Learner(data, model,metrics=[accuracy_thresh])
        n_acc=2
        learn.callbacks=[SaveModelCallback(learn, every='epoch', monitor='accuracy_thresh'),AccumulateStep(learn,n_acc)] 
        learn.split( lambda m: (model._conv_head,) )
        learn.freeze()
        learn.model_dir="/kaggle/working/"
        learn.fit_one_cycle(1,2e-3,wd=.0001)
        learn.save('models')
        print('done')
        """submission = pd.read_csv(DATA/"stage_1_sample_submission.csv")
        preds = learn.TTA(ds_type=DatasetType.Test)
        preds = np.array(preds[0])
        any_probs = 1 - np.prod(1 - preds, axis=1)
        submission.Label = np.hstack([preds, np.expand_dims(any_probs, -1)]).reshape(-1)
        submission.head()
        submission.to_csv(f, index=False)"""
        i=i+1
        print(i)
        learn.destroy()
        del learn
        torch.cuda.empty_cache()
        #d=int(input())
        print('enter 1 if you donot want to coninue')
        """one epoch will take 70 minutes in total 6 hours for one fold CAREFULL OF STABLE INTERNEL IF YOU LOOSE CONNECTION
        THAN YOU ARE F****D WITH 6 HOURS OF GPU""" 
        #if d==1:
        break

In [None]:
#learn = Learner(data, model,metrics=[accuracy_thresh])

In [None]:
#learn.load("/tmp/0/bestmodel_0")

In [None]:
#filehandler = open('fold.pkl',"wb")
#pickle.dump(learn,filehandler)

In [None]:
"""from IPython.display import HTML
import pandas as pd
import numpy as np

def create_download_link(title = "Download pickle file", filename="fold.pkl"):  
    html = '<a href={filename}>{title}</a>'
    html = html.format(title=title,filename=filename)
    return HTML(html)
create_download_link()"""

In [None]:
from fastai.basic_train import Learner
from fastai.callbacks.one_cycle import OneCycleScheduler
from fastai.core import Floats,Any


class PartialOneCycleScheduler(OneCycleScheduler):
    def __init__(self, learn:Learner, lr_max:float,                  
                 moms:Floats=(0.95,0.85), 
                 div_factor:float=25., pct_start:float=0.3,
                 tot_epochs:int=-1, start_epoch:int=0):
        super().__init__(learn, lr_max, moms, div_factor, pct_start)
        self.start_epoch = start_epoch        
        self.tot_epochs = tot_epochs
            
    def on_train_begin(self, n_epochs:int, **kwargs:Any)->None:
        if self.tot_epochs < 0: self.tot_epochs = n_epochs + self.start_epoch        
        super().on_train_begin(self.tot_epochs, **kwargs)
                      
        self.start_iter = len(self.learn.data.train_dl) * self.start_epoch                
        for _ in range(self.start_iter):
            super().on_batch_end(True) 