In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import torch
torch.cuda.is_available()

True

In [3]:
# misc
import os
from tqdm import tqdm, tnrange, tqdm_notebook
from pathlib import Path
import re
import numpy as np
import matplotlib.pyplot as plt
import cv2
import sys
import scipy.ndimage
import math
import pandas as pd
#import PIL
from PIL import Image

In [4]:
# dicom and nii
import pydicom
from pydicom.data import get_testdata_files
from pydicom.filereader import read_dicomdir
import pydicom.pixel_data_handlers.gdcm_handler as gdcm_handler
## ! gdcm must be installed with conda install (conda install -c conda-forge gdcm)
pydicom.config.image_handlers = ['gdcm_handler']
import nibabel as nib

In [5]:
# fast.ai lib
from fastai.vision import *
from fastai.metrics import *
from fastai.callbacks import *

In [6]:
# multithreading
from multiprocessing import Pool

## Define paths

In [7]:
path_str = '/home/ubuntu/sfr-challenge/lungs/dataset'
# path_str = '/Users/igorgarbuz/SoftDev/sfr-challenge/dataset'

In [8]:
path = Path(path_str)

In [9]:
path_seg = path/'seg_3d'

In [10]:
path_p = path/'Pathologiques'

In [11]:
path_n = path/'Normaux'

In [12]:
path_train = path/'train'
path_train_1 = path_train/'1'
path_train_0 = path_train/'0'
if not os.path.exists(path_train_1): os.makedirs(path_train_1)
if not os.path.exists(path_train_0): os.makedirs(path_train_0)

In [13]:
test_path = path_str + '/Pathologiques/N7Q0jai/N7Q0jai'

## Define fixed random seed

In [14]:
np.random.seed(42)

## Patients to exclude

In [15]:
exclude = {'1fmJ54h', '4PgdJEv', '93pFUjH', 'AoNf6az', 'H8cPI77', 'kalMNNP', 'mDb8hKS', 'SMwf6qh', 'V3Gli1y'}

In [16]:
exclude_nii3d = {'Seg_fhCY9du.nii.gz', 'Seg_cUXgmoh.nii.gz', 'Seg_G2YmH9u.nii.gz', 'Seg_gk033Ar.nii.gz', 'Seg_WI7IMWW.nii.gz', 'Seg_S2KbtNT.nii.gz', 'Seg_X5lwqWZ.nii.gz', 'Seg_cRJUBEy.nii.gz'}

## Test section ==>

## <== End of test section

## Define file loaders

In [17]:
# debug sorting https://pydicom.github.io/pydicom/dev/auto_examples/image_processing/reslice.html
# order reversed must be based on patients orientation / origin from the metadata
# ImagePositionPatient is used to sort slides
def load_dicom(path):
    slices = [pydicom.dcmread(path + '/' + s) for s in os.listdir(path) if ".dcm" in s.lower()]
    try:
        slices.sort(key = lambda x: x.ImagePositionPatient[2], reverse=False)
    except:
        slices.sort(key = lambda x: x.InstanceNumber, reverse=True)
        print("No ImagePositionPatient[2] at:", path)
    return slices

In [18]:
#     except ValueError: can be used to 
def get_pixels_hu(slices, path):
    arr = np.array([], dtype=np.int16)
    # convert list of slices into ndarray
    try:
        arr = np.stack([s.pixel_array for s in slices])
    except:
        print('cannot stack at: ', path)
    arr = arr.astype(np.int16)
    # set outside-of-scann pixels to zero (200 by default)
    # see housfield units for more details https://en.wikipedia.org/wiki/Hounsfield_scale
    arr[arr == -2000] = 0
    # read Intercept and slope convert to Housfield units (HU)
    try:
        intercept = slices[0].RescaleIntercept
    except ValueError:
        print('cannot get intercept at: ', path)
    try:
        slope = slices[0].RescaleSlope
    except:
        print('cannot get slope at: ', path)
    if slope != 1:
        arr = slope * arr.astype(np.float64)
        arr = arr.astype(np.int16)
    arr += np.int16(intercept)
    return np.array(arr, dtype=np.int16)

## Below section: create RGB image from 3D numpy array

In [19]:
def get_rgb_image(label, img_3d, stride=1):
    step = stride + 1
    slice_n_minus_1 = img_3d[label - step]
    slice_n = img_3d[label]
    slice_n_plus_1 = img_3d[label + step]
    img = np.array([slice_n_minus_1,\
                    slice_n,\
                    slice_n_plus_1])
    return np.moveaxis(img, 0, 2)

In [31]:
# normalize to [0; 65535]
# alternative normalization functions are in the "Test" section
# use second image normalize line for 8 bit images
MIN = -1000.0
MAX = 400.0
def normalize_cv2(image):
    image[image > MAX] = MAX
    image[image < MIN] = MIN
    #image = cv2.normalize(image, dst=None, alpha=0, beta=65535, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_16U)
    image = cv2.normalize(image, dst=None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U)
    return image

In [21]:
def resize(img, slc, new_spacing=[0.75, 0.75]):
    init_spacing = [slc.PixelSpacing[0], slc.PixelSpacing[1]]
    spacing = map(float, init_spacing)
    spacing = np.array(list(spacing))
    
    resize_factor = spacing / new_spacing
    new_shape_float = img.shape[:2] * resize_factor
    new_shape = np.round(new_shape_float)
    
    resize_factor_real = new_shape / img.shape[:2]
    new_spacing = spacing / resize_factor_real
    img = cv2.resize(img, None, fx=resize_factor_real[0], fy=resize_factor_real[1])
    return img, resize_factor_real

In [22]:
def crop_center(img_nda, new_width=256, new_height=256):
    shape = img_nda.shape
    width = shape[0]
    height = shape[1]
    left = round((width - new_width) / 2)
    top = round((height - new_height) / 2)
    right = round((width + new_width) / 2)
    bottom = round((height + new_height) / 2)
    if (left > 0 and right > left and top > 0 and bottom > top): 
        crop_img = img_nda[top:bottom, left:right]
    else:
        crop_img = img_nda
    return (crop_img)

In [23]:
def split_quart(img, patient, qt_width=224, qt_height=224, pad_w=5, pad_h=5):
    shape = img.shape
    width = shape[0]
    height = shape[1]
    center_w = round(width / 2)
    center_h = round(height / 2)
    if center_h + pad_h - qt_height <= 0 or center_w + pad_w - qt_width <= 0:
        pad_w = qt_width - center_w
        pad_h = qt_height - center_h
        print('split_quart: image: ', patient, ' too small: ', shape)
    qt_upl = img[center_h + pad_h - qt_height : center_h + pad_h, center_w + pad_w - qt_width : center_w + pad_w]
    qt_upr = img[center_h + pad_h - qt_height : center_h + pad_h, center_w - pad_w : center_w - pad_w + qt_width]
    qt_dwl = img[center_h - pad_h : center_h - pad_h + qt_height, center_w + pad_w - qt_width : center_w + pad_w]
    qt_dwr = img[center_h - pad_h : center_h - pad_h + qt_height, center_w - pad_w : center_w - pad_w + qt_width]
    return qt_upl, qt_upr, qt_dwl, qt_dwr

In [24]:
def get_image_from_seg(label_slice):
    label_img = cv2.normalize(label_slice, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U)
    # orient = _slice.ImageOrientationPatient - original patient orientation data
    label_img = np.rot90(label_img)
    label_img = np.flipud(label_img)
    return (label_img)

## Below section : functions to get slices from nii

In [25]:
# function count sequences of unique numbers and returns a list of sequences' length
# [0] corresponds to no sequences, [1] - sequence of length 1, [3,4] - two sequences of length 3 and 4
def get_unique_nodules(uniques_z):
    labels = []
    i = 1
    if len(uniques_z) == 0: return [0]
    elif len(uniques_z) == 1: return [i]
    prev = uniques_z[0]
    for s in uniques_z[1:]:
        if s == prev + 1:
            prev = s
            i += 1
        else:
            labels += [i]
            i = 1
            prev = s
        if s == uniques_z[-1]:
            labels += [i]
            return np.array(labels)

In [26]:
# target depth is a target thickness in mm of the rgb x3 layers stack;
# It is basically the distance between exterior layers R and B
def get_stride(_slice):
    s = _slice.SliceThickness
    #print('SliceThickness:', s)
    if s >= 2 and s <= 3: stride = 0
    elif s > 0.9 and s <= 2: stride = 1
    elif s > 0.6 and s <= 0.9: stride = 2
    elif s > 0.5 and s <= 0.6: stride = 3
    else:
        print('SliceThickness is: ', s, ' at: ', _slice.PatientName)
        stride = 3
        print('stride out of range')
    #print('stride: ', stride)
    return int(stride)

In [27]:
# returns an array with indexes of z slices containing labels
# step in mm [+- 0.25]; min_sz in mm [+- 1]
def get_z_slices(nii_path, stride=1):
    step = 1 + stride
    pad = 1
    min_sz = 3 + 2 * stride + 2 * pad
    # below try to replace by path_patholog = path_p
    path_patholog='/home/ubuntu/sfr-challenge/lungs/dataset/Pathologiques'
    slices = []
    raw_pixels = nib.load(nii_path).get_fdata() 
    labels_3d = raw_pixels.nonzero()
    uniques_z = np.unique(labels_3d[2])
    nodules = get_unique_nodules(uniques_z)
    for nod in nodules:
        if nod >= min_sz:
            slc = slice(step + pad, nod - pad - step, step)
            slices += list(uniques_z[slc])
        else:
            slices += [uniques_z[int(nod/2)]]
        uniques_z = uniques_z[nod:]
    return slices, raw_pixels

## Create list of patients from .nii segmentations

In [28]:
patients_segs = [str(path_seg) + '/' + p for p in os.listdir(path_seg) if p not in exclude_nii3d]

## Generator method 1 : Read .dcm, get slices from .nii, save .tif and label .jpg

In [30]:
def check_label(img):
    nonz = img.nonzero()
    unq = np.unique(nonz)
    return len(unq)

In [32]:
# generate .tif + .jpg for ORIGINAL patient's 3D
# it whold iterate over a list of patients' segmentations .nii.gz
def generate_training_set(patient_seg):
    # get patient name and directory
    pat = "(Seg\w*_)(\w+)"
    patient = re.search(pat, patient_seg)[2]
    patient_dir = path_p/patient/patient 
    
    #load patient 3D data
    slices = load_dicom(str(patient_dir))
    img_3d = get_pixels_hu(slices, patient_dir)
    stride = get_stride(slices[0])
    
    # get slices to extract
    slices_nii, raw_pxl_nii = get_z_slices(patient_seg, stride=stride)
    for s in slices_nii:
        img_rgb = normalize_cv2(get_rgb_image(s, img_3d, stride=stride))
        #img_rgb, scale = resize(img_rgb, slices[0], new_spacing=[0.76, 0.76])
        img_rgb, scale = resize(img_rgb, slices[0], new_spacing=[1.1, 1.1])
        img_seg = get_image_from_seg(raw_pxl_nii[:,:,s])
        img_seg = cv2.resize(img_seg, None, fx=scale[0], fy=scale[1])
        img_seg = crop_center(img_seg)
        # Generate whole-slide picture
        img_rgb = crop_center(img_rgb)
        cv2.imwrite(str(path_train) + '/1/' + patient + '_' + str(s) + '.tif', img_rgb)
        cv2.imwrite(str(path_train) + '/1/' + patient + '_' + str(s) + '.jpg', img_seg)
    return patient_dir    
    

In [None]:
for p in tqdm_notebook(patients_segs[:10]):
    generate_training_set(p)

In [33]:
prc = Pool(4)

start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_p2htVYq.nii.gz
start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_LW80yRq.nii.gz
start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Segmentation_05dzgcM.nii.gz
start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_D2WoCVS.nii.gz
finished:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Segmentation_05dzgcM.nii.gz
start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_eziNTD1.nii.gz
finished:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_p2htVYq.nii.gz
start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_npe17Yu.nii.gz
finished:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_D2WoCVS.nii.gz
start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_FRgGtP3.nii.gz
finished:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_LW80yRq.nii.gz
start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_13mzME7.nii.gz
finished:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_eziNTD1

finished:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_mLMHGGY.nii.gz
start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_D18yBIZ.nii.gz
finished:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_6zF8f9i.nii.gz
start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_yM65KQs.nii.gz
finished:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_D18yBIZ.nii.gz
start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_X6X68KJ.nii.gz
finished:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_j4xjocW.nii.gz
start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_FbmXrAO.nii.gz
finished:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_yM65KQs.nii.gz
start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_PneRDnP.nii.gz
finished:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_X6X68KJ.nii.gz
start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_wScsmJq.nii.gz
finished:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_FbmXrAO.nii.gz
star

start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_IimckK3.nii.gz
finished:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_OH35cRj.nii.gz
start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Segmentation_50YOICs.nii.gz
finished:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Segmentation_3YbmwLf.nii.gz
start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_9dKKuQw.nii.gz
finished:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_IimckK3.nii.gz
start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_m8OufDU.nii.gz
finished:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_6VeF4wx.nii.gz
start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_cjJK4yx.nii.gz
finished:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_9dKKuQw.nii.gz
start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_Ezv3nfz.nii.gz
finished:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_m8OufDU.nii.gz
start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_hKNP

start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_HBbQZWz.nii.gz
finished:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_A6J8sUe.nii.gz
start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_XiTTGqx.nii.gz
finished:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_JCixCqV.nii.gz
start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_Mscp3Tp.nii.gz
finished:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_XiTTGqx.nii.gz
start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Segmentation_8n6hNT0.nii.gz
finished:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_mJTnBtp.nii.gz
start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Segmentation_3JWbWXd.nii.gz
finished:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_Mscp3Tp.nii.gz
start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_ISRkHIE.nii.gz
finished:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Segmentation_8n6hNT0.nii.gz
start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d

start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Segmentation_5QtRXr7.nii.gz
finished:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_iOFomH7.nii.gz
start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_FDB58hy.nii.gz
finished:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_0clDduI.nii.gz
start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_m8jS4Kj.nii.gz
finished:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_ozDdqkE.nii.gz
start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_ztmIQaQ.nii.gz
finished:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Segmentation_5QtRXr7.nii.gz
start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_FOzMVBB.nii.gz
finished:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_m8jS4Kj.nii.gz
start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_fJcZ06P.nii.gz
finished:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_FDB58hy.nii.gz
start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_3pfs

start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_w0vrbn9.nii.gz
finished:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_6pjUXBe.nii.gz
start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_c85J9jh.nii.gz
finished:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_hvd3c2p.nii.gz
start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Segmentation_9H6JLVw.nii.gz
finished:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_w0vrbn9.nii.gz
finished:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_dk5Ob4j.nii.gz
start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_vnn92kc.nii.gz
finished:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_c85J9jh.nii.gz
start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Segmentation_2k55L8P.nii.gz
finished:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Segmentation_9H6JLVw.nii.gz
finished:  /home/ubuntu/sfr-challenge/lungs/dataset/seg_3d/Seg_vnn92kc.nii.gz
start:  /home/ubuntu/sfr-challenge/lungs/dataset/seg

In [34]:
%%time
# total time 12min 55sec for 278 patients
prc.map(generate_training_set, patients_segs)

CPU times: user 1.04 s, sys: 333 ms, total: 1.37 s
Wall time: 13min 42s


[PosixPath('/home/ubuntu/sfr-challenge/lungs/dataset/Pathologiques/p2htVYq/p2htVYq'),
 PosixPath('/home/ubuntu/sfr-challenge/lungs/dataset/Pathologiques/npe17Yu/npe17Yu'),
 PosixPath('/home/ubuntu/sfr-challenge/lungs/dataset/Pathologiques/t5s2zYO/t5s2zYO'),
 PosixPath('/home/ubuntu/sfr-challenge/lungs/dataset/Pathologiques/lzXtbK8/lzXtbK8'),
 PosixPath('/home/ubuntu/sfr-challenge/lungs/dataset/Pathologiques/aUgn9NV/aUgn9NV'),
 PosixPath('/home/ubuntu/sfr-challenge/lungs/dataset/Pathologiques/NQC48Ei/NQC48Ei'),
 PosixPath('/home/ubuntu/sfr-challenge/lungs/dataset/Pathologiques/Bekufkk/Bekufkk'),
 PosixPath('/home/ubuntu/sfr-challenge/lungs/dataset/Pathologiques/0qvjgFQ/0qvjgFQ'),
 PosixPath('/home/ubuntu/sfr-challenge/lungs/dataset/Pathologiques/YKdnl1h/YKdnl1h'),
 PosixPath('/home/ubuntu/sfr-challenge/lungs/dataset/Pathologiques/LVPaMXM/LVPaMXM'),
 PosixPath('/home/ubuntu/sfr-challenge/lungs/dataset/Pathologiques/bEcHl76/bEcHl76'),
 PosixPath('/home/ubuntu/sfr-challenge/lungs/dataset/P

### Analyze generated images

In [None]:
imgs_ds = os.listdir(path_train_1)
imgs_ds.sort()

In [None]:
plt.hist(res, bins=10)

## Loop through Normaux patients folders and create train dataset

In [35]:
def get_rand_slices_idx(img_3d, stride=1, pics_n=10):
    step = stride + 1
    pad = step * 5
    low_idx = step + pad + 1
    high_idx = img_3d.shape[0] - step - 1 - pad
    rand_idx = np.random.randint(low=low_idx, high=high_idx, size=pics_n)
    return(rand_idx)

In [38]:
def generate_training_set_from_norm(patient):
    print('started: ', patient)
    # get 3D image
    slices = load_dicom(str(path_n/patient))
    img_3d = get_pixels_hu(slices, path_p/patient)
    stride = get_stride(slices[0])
    # get slices to extract
    random_slices_idx = get_rand_slices_idx(img_3d, pics_n=17)
    for s in random_slices_idx:
        img_rgb = normalize_cv2(get_rgb_image(s, img_3d, stride=stride))
        img_rgb, scale = resize(img_rgb, slices[0], new_spacing=[1.1, 1.1])
        img_rgb = crop_center(img_rgb)
        cv2.imwrite(str(path_train) + '/0/' + patient + '_' + str(s) + '.tif', img_rgb)
    print('finished: ', patient)
    return patient

### Generate list of patients

In [39]:
pats_norm = os.listdir(path_n)
len(pats_norm)

345

In [None]:
for p in tqdm_notebook(pats_norm[:50]):
    generate_training_set_from_norm(p)

In [40]:
proc = Pool(4)

started:  jLKrkWO
started:  0L2mt7E
started:  bU8K5D1
started:  CHRE
finished:  CHRE
started:  k6GmW4k
finished:  bU8K5D1
started:  P14YqgJ
finished:  0L2mt7E
started:  dRIjnzq
finished:  k6GmW4k
started:  Byo4nG7
finished:  jLKrkWO
started:  tWT9t9c
finished:  P14YqgJ
started:  RVkgSTh
finished:  Byo4nG7
started:  yFnM4cV
finished:  dRIjnzq
started:  qa8bfmZ
finished:  RVkgSTh
started:  ZpIeYZj
finished:  yFnM4cV
started:  qRFr2ep
finished:  qa8bfmZ
started:  HA3GCPH
finished:  ZpIeYZj
started:  ypACSsB
finished:  ypACSsB
started:  gGj2s7u
finished:  HA3GCPH
started:  yrkau4M
finished:  tWT9t9c
started:  EP3d1dK
finished:  gGj2s7u
started:  T308psJ
finished:  yrkau4M
started:  lkq3IJ0
finished:  EP3d1dK
started:  VjN6duO
finished:  T308psJ
started:  rtsTEYp
finished:  lkq3IJ0
started:  iO8yoGR
finished:  VjN6duO
started:  PBx54GA
finished:  qRFr2ep
started:  dyPaCMp
finished:  iO8yoGR
started:  TFj38B4
finished:  rtsTEYp
started:  SMDlKGe
finished:  dyPaCMp
started:  r3rdKbn
finished:

started:  AnyK8ix
finished:  gQrRrnO
finished:  cLucmlX
started:  E0TGtvo
started:  T4OICET
finished:  MAS_
started:  RXydGpB
finished:  RXydGpB
started:  MILO_
finished:  T4OICET
started:  tptcZ3a
finished:  E0TGtvo
started:  sKtPMew
finished:  sKtPMew
started:  Vu54vFr
finished:  MILO_
started:  hUyX3g9
finished:  hUyX3g9
started:  lLSY6CY
finished:  AnyK8ix
started:  wdTvE5V
finished:  lLSY6CY
started:  WR5ac2q
finished:  wdTvE5V
started:  l6EAvDE
finished:  WR5ac2q
started:  oMgQXgj
finished:  tptcZ3a
started:  wJcgHMB
finished:  l6EAvDE
started:  JSvcCFR
finished:  Vu54vFr
started:  4BbsuxC
finished:  wJcgHMB
started:  dfJnj8P
finished:  JSvcCFR
started:  a8QmsRG
finished:  oMgQXgj
started:  MAH_
finished:  a8QmsRG
started:  nfnhn7k
finished:  dfJnj8P
started:  EhyAazM
finished:  4BbsuxC
started:  KEDD_
finished:  MAH_
started:  cpDGFVA
finished:  nfnhn7k
started:  97U6ngS
finished:  EhyAazM
started:  ipORNqu
finished:  KEDD_
started:  11YNVGp
finished:  97U6ngS
started:  GZ4yVA8


In [41]:
%%time
# total time 10 min for 345 patients
proc.map(generate_training_set_from_norm, pats_norm)

CPU times: user 1.11 s, sys: 775 ms, total: 1.88 s
Wall time: 8min 46s


['0L2mt7E',
 'dRIjnzq',
 'qa8bfmZ',
 'HA3GCPH',
 'yrkau4M',
 'lkq3IJ0',
 'iO8yoGR',
 'TFj38B4',
 'oOLyTG5',
 '5ufH2fy',
 'IBylZrm',
 'A8cfYAg',
 'ToeNaft',
 'BLX5RkJ',
 'KveJCZB',
 'siIhuK0',
 'NvPnrGS',
 '5boVDPi',
 'LUDtN51',
 'a7YzZVx',
 'AeIkyAp',
 'oDBGFXu',
 'jLKrkWO',
 'tWT9t9c',
 'EP3d1dK',
 'VjN6duO',
 'PBx54GA',
 'JBhGRIu',
 'akrQ1Ms',
 'cUU39yf',
 'EG7nSmW',
 '9m1A1Bd',
 'JN6KDJL',
 'IwYYQy8',
 'AlGRbCe',
 'a6nvfdp',
 'FQvqT3b',
 'gMOf2Ug',
 'JKKzVcG',
 'SOEMoQ9',
 'eoNleXS',
 'JrERPL5',
 'nnKcsM4',
 'Z6qQl1j',
 'bU8K5D1',
 'P14YqgJ',
 'RVkgSTh',
 'ZpIeYZj',
 'ypACSsB',
 'gGj2s7u',
 'T308psJ',
 'rtsTEYp',
 'SMDlKGe',
 'oWJfm6o',
 'PXIWWSr',
 '4mqxCcQ',
 'fBsljHW',
 'OPT_',
 'KlAW8oN',
 'Cvy1b15',
 'p64ICrp',
 'nmToeFp',
 'Dzi65my',
 'Sd7fZKN',
 'mAbDJAe',
 'u738iXG',
 'CHRE',
 'k6GmW4k',
 'Byo4nG7',
 'yFnM4cV',
 'qRFr2ep',
 'dyPaCMp',
 'r3rdKbn',
 'IlwzGS1',
 'XvDykCC',
 'DjFF2mV',
 '8zotgWC',
 'GsMFQFA',
 'mHyTL9J',
 'N338fqS',
 '84x7SrL',
 '2hxpoSO',
 'GAG',
 'lDfODZB',
 '