## Inception with uncertainty

In [1]:
import pandas as pd
train_class = pd.read_csv('data/train_split_v4.csv', sep=' ')#, header=True)#, names=['patientID', 'image_path', 'class'])
test_class = pd.read_csv('data/test_split_v4.csv', sep=' ')#, header=True)#, names=['patientID', 'image_path', 'class'])

In [2]:
def load_pictures (data_frame,data_dir,channels):
    import numpy as np
    from PIL import Image
    pics = []
    for img in data_frame['image_path'][:1500].values:
        if channels ==3:
            pics.append(np.array(Image.open(data_dir + img))[:, :,:3])
        else:
            pics.append(np.array(Image.open(data_dir + img))[:, :,0])

    return np.array(pics)

In [3]:
num_channels = 3
X_train = load_pictures(train_class,"data/train/",channels = num_channels)
X_test = load_pictures(test_class,"data/test/",channels = num_channels)
print("Number of images:", X_train.shape, X_test.shape)

Number of images: (1500, 200, 200, 3) (1500, 200, 200, 3)


In [4]:
from codvidutils.imageproc import map_categorical
import numpy as np
values_dict = {'COVID-19': 1,'pneumonia': 2,'normal': 0}
test_class['class_categorical'] = test_class['class'].apply(map_categorical, args=(values_dict,))
train_class['class_categorical'] = train_class['class'].apply(map_categorical, args=(values_dict,))
diseaseID_train = np.asarray(train_class["class_categorical"][:1500])
diseaseID_test = np.asarray(test_class["class_categorical"][:1500])
print('shape X: {} {},  disease_ID (Y): {} {}'.format(X_train.shape[0], X_test.shape[0], diseaseID_train.shape[0], diseaseID_test.shape[0] ))

shape X: 1500 1500,  disease_ID (Y): 1500 1500


In [5]:
from collections import Counter
counter = Counter (diseaseID_train)
print(counter)
#dicto = {2: 500, 0: 500, 1:counter[1]}
dicto = counter
print(dicto)

Counter({0: 1398, 1: 87, 2: 15})
Counter({0: 1398, 1: 87, 2: 15})


In [6]:
from imblearn.under_sampling import RandomUnderSampler
#X = X.reshape(X,X.shape[0],-1)
X_train = X_train.reshape(X_train.shape[0],-1)
print(X_train.shape)
under = RandomUnderSampler(sampling_strategy =dicto)
X_train, diseaseID_train = under.fit_resample(X_train, diseaseID_train)
# summarize class distribution
if num_channels == 3:
    X_train = X_train.reshape(X_train.shape[0],200,200,3)
else:
    X_train = X_train.reshape(X_train.shape[0],200,200)
    
print(diseaseID_train.shape)
print(X_train.shape)

(1500, 120000)
(1500,)
(1500, 200, 200, 3)


In [7]:
print("Normal train: ",diseaseID_train[diseaseID_train==0].shape)
print("Pneumonia train: ",diseaseID_train[diseaseID_train==2].shape)
print("COVID train: ",diseaseID_train[diseaseID_train==1].shape)
print("*******************************************************")
print("Normal test: ",diseaseID_test[diseaseID_test==0].shape)
print("Pneumonia test: ",diseaseID_test[diseaseID_test==2].shape)
print("COVID test: ",diseaseID_test[diseaseID_test==1].shape)

Normal train:  (1398,)
Pneumonia train:  (15,)
COVID train:  (87,)
*******************************************************
Normal test:  (885,)
Pneumonia test:  (591,)
COVID test:  (24,)


In [8]:
def adding_images(X, Y,strides= 5):#kernel =None ):
    from codvidutils import nwpic as nw
    new_X = nw.new_pictures_arrays(X[Y==1],strides)
    X = X[:,10:190,10:190]
    new_Y = np.ones(new_X.shape[0])
    X = np.concatenate([X,new_X],axis=0)
    Y = np.concatenate([Y,new_Y],axis=0)
    return X, Y

In [9]:
from codvidutils import nwpic as nw
"""
News images to train 
"""
X_train, diseaseID_train = adding_images(X_train, diseaseID_train, strides=10 )
X_test, diseaseID_test = adding_images(X_test, diseaseID_test, strides=10 )

In [10]:
X_test = X_test/255
X_train = X_train/255

In [11]:
Y_train = np.copy(diseaseID_train)
del diseaseID_train
Y_train[Y_train==2]=0
Y_test = np.copy(diseaseID_test)
Y_test[Y_test==2]=0

In [12]:
def inception_block_R(previous_layer, p_drop,p_l2,filters =[64,128,32,32]):
    from tensorflow.keras.layers import concatenate, Conv2D, Dropout, MaxPooling2D
    from tensorflow.keras.regularizers import l2
    
    previous_drop = Dropout(rate = p_drop)(previous_layer, training=True)
    rama_1 = Conv2D(filters=filters[0], kernel_size=(1,1), activation='relu',
                             bias_regularizer = l2(p_l2), kernel_regularizer=l2(p_l2))(previous_drop)
    #rama_1 = Dropout(rate = p_drop)(rama_1)
    
    rama_2 = Conv2D(filters=filters[1], kernel_size=(1,1), activation='relu',
                             bias_regularizer = l2(p_l2), kernel_regularizer=l2(p_l2))(previous_drop)
    rama_2 = Dropout(rate = p_drop)(rama_2, training=True)
    rama_2 = Conv2D(filters=filters[1], kernel_size=(3,3), activation='relu',padding="same",
                             bias_regularizer = l2(p_l2), kernel_regularizer=l2(p_l2))(rama_2)
    #rama_2 = Dropout(rate = p_drop)(rama_2)
    
    rama_3 = Conv2D(filters=filters[2], kernel_size=(1,1), activation='relu',
                             bias_regularizer = l2(p_l2), kernel_regularizer=l2(p_l2))(previous_drop)
    rama_3 = Dropout(rate = p_drop)(rama_3, training=True)
    rama_3 = Conv2D(filters=filters[2], kernel_size=(5,5), activation='relu',padding="same",
                             bias_regularizer = l2(p_l2), kernel_regularizer=l2(p_l2))(rama_3)
    #rama_3 = Dropout(rate = p_drop)(rama_3)
    
    rama_4 = MaxPooling2D(pool_size=(3,3),padding="same",strides=1)(previous_layer)
    rama_4 = Dropout(rate = p_drop)(rama_4, training=True)
    rama_4 = Conv2D(filters=filters[3], kernel_size=(1,1), activation='relu',padding="same",
                             bias_regularizer = l2(p_l2), kernel_regularizer=l2(p_l2))(rama_4)
    #rama_4 = Dropout(rate = p_drop)(rama_4)
    
    inception = concatenate([rama_1, rama_2,rama_3,rama_4])
    return inception

In [13]:
"""
Incep_6 archiquecture using Functional api
"""
def deep_inception_R(p_drop,p_l2):
    from tensorflow.keras.layers import MaxPooling2D, Dense, Flatten, GlobalMaxPooling2D, Dropout, Input,concatenate
    from tensorflow.keras.models import Model
    from tensorflow.keras.regularizers import l2
    input_layer = Input(shape= (180,180,3,))
    #input_drop = Dropout(rate = p_drop)(input_layer)
    
    inception_1 = inception_block_R(input_layer,p_drop,p_l2)
    pool_1 = MaxPooling2D()(inception_1)
    
    inception_2 = inception_block_R(pool_1,p_drop,p_l2)
    pool_2 = MaxPooling2D()(inception_2)
    
    inception_3 = inception_block_R(pool_2,p_drop,p_l2)
    pool_3 = MaxPooling2D()(inception_3)
    
    inception_4 = inception_block_R(pool_3,p_drop,p_l2)
    pool_4 = MaxPooling2D()(inception_4)
    
    inception_5 = inception_block_R(pool_4,p_drop,p_l2)
    pool_5 = MaxPooling2D()(inception_5)
    
    inception_6 = inception_block_R(pool_5,p_drop,p_l2)
    
    gobal = GlobalMaxPooling2D()(inception_6)
    gobal = Dropout(rate= p_drop)(gobal, training=True)
    
    dense = Dense(units=256,activation="relu",bias_regularizer = l2(p_l2), kernel_regularizer=l2(p_l2))(gobal)
    dense = Dropout(rate= p_drop)(dense, training=True)
    dense = Dense(units=64,activation="relu",bias_regularizer = l2(p_l2), kernel_regularizer=l2(p_l2))(dense)
    dense = Dropout(rate= p_drop)(dense, training=True)
    out_layer= Dense(units =1, activation= "sigmoid",bias_regularizer = l2(p_l2), kernel_regularizer=l2(p_l2))(dense)
    
    deep = Model(inputs=input_layer, outputs=out_layer)
    return deep


In [14]:
print('Train shape X: {},  Y: {} \n Test shape X: {}, Y: {}'.format(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape))

Train shape X: (2283, 180, 180, 3),  Y: (2283,) 
 Test shape X: (1716, 180, 180, 3), Y: (1716,)


In [15]:
from codvidutils.utils import weigths2loss
dic_weights = weigths2loss(Y_train)
print(dic_weights)

{0: 0.38107752956636004, 1: 0.6189224704336399}


In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.optimizers import Adam
#import tensorflow.keras
from time import time
#import keras.backend as K
import tensorflow as tf
prob = []     
hist_for =[]
T = 4    
pdro = 0.05
pl2 = 0.000050

#***********************************************************
t0 = time()
print(pdro, pl2)
#strategy = tf.distribute.OneDeviceStrategy(device="/gpu:0")
for i in range(T):
    strategy = tf.distribute.MirroredStrategy()

    with strategy.scope():
    #for i in range(T):
        print("train number" , i, "****************************************************************************")
        filepath="Unc_"+str(i)+"_trained.hdf5"
        print("opened strategy")
        #tf.keras.backend.clear_session()

        print("Time paralisis ", time()-t0)
        model = deep_inception_R(p_drop = pdro, p_l2 = pl2)

        print("opened model")
        checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
        ASG = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, amsgrad=True)

        model.compile(optimizer=ASG, loss='binary_crossentropy', metrics=['acc',"mse"])
        print("compiled")
        print("Time paralisis ", time()-t0)

        #history = model.fit(X_train, Y_train, epochs = 30, batch_size=32, callbacks= [checkpoint],verbose=2, class_weight=dic_weights,validation_data=(X_test, Y_test))
        history = model.fit(X_train, Y_train, epochs=5, batch_size=32, callbacks= [checkpoint],verbose=1,validation_data=(X_test, Y_test))
        prob += [model.predict(X_test)]
        hist_for += [history]
        t0 = time()
        del strategy # <----cuando uno se enfada pasa esto
        
        

0.05 5e-05
INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1')
train number 0 ****************************************************************************
opened strategy
Time paralisis  1.0696301460266113
opened model
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
compiled
Time paralisis  3.8815362453460693
Train on 2283 samples, validate on 1716 samples
Epoch 1/5
INFO:tensorflow:batch_

In [None]:
del strategy

In [None]:
probs = []

l = 0.01
for _ in range(T):
    probs += [model.predict(X_test)]
    
predictive_mean = np.mean(prob, axis=0)
predictive_variance = np.var(prob, axis=0)
tau = l**2 * (1 - pdro) / (2 * T * pl2)
predictive_variance += tau**-1

In [None]:
tau =   (1 - pd) / (2 * 5 * pl2)
tau

In [None]:
hdf = pandas.DataFrame([])
for H in hist_for:
    df = pandas.DataFrame(H.history)
    hdf = pandas.concat([hdf,df],axis=0)#, ingore_index = True)

In [None]:
hdf.to_csv("histories_drop.txt", index= False, sep = " ")#, ignore_index = True)

In [None]:
print("Normal train: ",Y_train[Y_train==0].shape)
print("COVID train: ",Y_train[Y_train==1].shape)
print("*******************************************************")
print("Normal test: ",Y_test[Y_test==0].shape)
print("COVID test: ",Y_test[Y_test==1].shape)

In [None]:
import matplotlib.pyplot as plt
fig,ax = plt.subplots(nrows=1,ncols=1,figsize=(8,8))#,6
plt.style.use('seaborn-deep') 
plt.grid(True)
name="acc"
i=1
for h in hist_for:
    plt.plot(h.history[name] ,label="Itera "+str(i))
    i=i+1
    plt.plot(h.history["val_"+name], color= "orange" ,label="Testing data set")
    
plt.ylabel("Loss")
plt.xlabel('Epochs')
plt.legend(loc='center')
plt.show()

## Uncertainty

In [None]:
p_drop = 0.05
p_l2 = 0.000050
tau = (1 - p_drop) / (2 * 3 * p_l2)
Dsis = 1 / tau
print(tau)
print(Dsis)

In [None]:
"""
Means & desvs
"""
prob = np.asarray(prob)
Mean = np.mean(prob,axis =0)
Dsta = np.std(prob,axis = 0)
unc = Dsta + Dsis


In [None]:
def integ(x,m,d):
    from math import pi, sqrt, exp
    return 1 / ( sqrt(2 * pi) * d) * exp(-(m-x)**2 / (2 * d**2))

In [None]:
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import normalize
for P in prob:
    cm = confusion_matrix(Y_test, P.round())
    cm_norm = normalize(cm, norm = 'l1')
    print(cm, "\n", cm_norm)
    
print("******* MEAN values******")
cm = confusion_matrix(Y_test, Mean.round())
cm_norm = normalize(cm, norm = 'l1')
print(cm, "\n", cm_norm)

In [None]:
def integraciones(integ, intervalos,A, B):
    from scipy.integrate import quad
    from numpy import inf
    intervalos[intervalos==0], intervalos[intervalos==1] =-inf, inf
    
    proba = np.ones([1,len(intervalos)-1])
    for j in range(len(A)):
        probabilidad = []
        for it in range(len(intervalos)-1):

            I = quad(integ,intervalos[it], intervalos[it+1], args = (A[j], B[j]) )
            probabilidad += [I]

        probabilidad = np.array(probabilidad)[:,0]
        probabilidad = probabilidad.reshape([1,len(intervalos)-1])

        proba = np.concatenate([proba,probabilidad],axis=0)
    return proba[1:]

In [None]:
dic_cuentas = {0: [], 1:[] }
dic_dest = {0: [], 1:[] }
intervalos = np.linspace(0,1, num = 38)
A = Mean[Y_test==0]
B = unc[Y_test==0]
print(len(A),len(B))
prob = integraciones(integ,intervalos,A,B)
cuentas = np.nansum(prob,axis=0)
varianza = np.nansum((1-prob)*prob,axis=0)
dic_cuentas[0] = cuentas
dic_dest[0] = np.sqrt(varianza)
A = Mean[Y_test==1]
B = unc[Y_test==1]
print(len(A),len(B))
prob = integraciones(integ,intervalos,A,B)
cuentas = np.nansum(prob,axis=0)
varianza = np.nansum((1-prob)*prob,axis=0)
dic_cuentas[1] = cuentas
dic_dest[1] = np.sqrt(varianza)

In [None]:
prob

In [None]:
import matplotlib.pyplot as plt
pred = prob[0]#model.predict(X_test)
fig,ax = plt.subplots(nrows=1,ncols=1,figsize=(8,8))# 6,6
plt.style.use('seaborn-deep')
plt.hist(pred[(Y_test==0)],38,histtype='step',color='darkorange',lw=2,label= "No covid" )
plt.hist(pred[Y_test==1],38,histtype='step',color='blue',lw=1 ,label="Covid")
plt.ylabel('Number of events')
plt.xlabel('Score')
plt.legend(loc="upper right")
plt.show()
#plt.savefig(filename)

In [None]:
import matplotlib.pyplot as plt
fig,ax = plt.subplots(nrows=1,ncols=1,figsize=(8,8))# 6,6
intervalos=intervalos[:-1]
intervalos += intervalos[1]/2
plt.style.use('seaborn-deep')
plt.errorbar(intervalos, dic_cuentas[0] , yerr=dic_dest[0],lw=2, label ="No covid")
plt.errorbar(intervalos, dic_cuentas[1] , yerr=dic_dest[1],lw=2, label ="Covid")
plt.ylabel('Number of counts')
plt.xlabel('Score')
plt.legend(loc="upper center")
#plt.savefig(filename)

In [None]:
from sklearn.metrics import roc_curve, auc
fpr, tpr, thresholds = roc_curve(Y_test, Mean)
fpr_U, tpr_U, thr = roc_curve(Y_test,Mean+unc)
fpr_L, tpr_L, thr = roc_curve(Y_test,Mean-unc)
roc_auc = auc(fpr,tpr)
roc_auc_U = auc(fpr_U,tpr_U)
roc_auc_L = auc(fpr_L,tpr_L)

In [None]:
fig,ax = plt.subplots(nrows=1,ncols=1,figsize=(8,8))#,6
plt.style.use('seaborn-deep') 
#plt.title("Simulation", weight="bold", x=0.50)
plt.grid(True)
#yticks(np.arange(0.97, 1, step=0.005))
lw = 2
plt.plot(fpr, tpr, color='orange',lw=3, label='ROC-curve (AUC = %0.3f)' % (roc_auc))
plt.plot(fpr_U, tpr_U, color='blue',lw=1, label='ROC-curve Upper (AUC = %0.3f)' % (roc_auc_U))
plt.plot(fpr_L, tpr_L, color='blue',lw=1, label='ROC-curve Lower (AUC = %0.3f)' % (roc_auc_L))



plt.ylabel('True Positie Rate (TPR)')
plt.xlabel('False Positive Rate (FPR)')

#plt.title("Simulation Preliminary", color = '#e159e1',loc="left", weight="bold", size="large")
plt.legend(loc="lower left")
plt.show()