In [1]:
from fastai.vision.all import *
from fastai.vision.gan import *
import torch
torch.cuda.set_device(3)

In [2]:
path = Path('datasetBenignas/')

In [3]:
pathI = path/'images'

In [4]:
def get_dls(bs:int, size:int):
  "Generates two `GAN` DataLoaders"
  dblock = DataBlock(blocks=(ImageBlock, ImageBlock),
                   get_items=get_image_files,
                   get_y = lambda x: pathI/x.name,
                   splitter=RandomSplitter(),
                   item_tfms=Resize(size),
                   batch_tfms=[*aug_transforms(max_zoom=2.),
                               Normalize.from_stats(*imagenet_stats)])
  dls = dblock.dataloaders(pathI, bs=bs, path=path)
  dls.c = 3 # For 3 channel image
  return dls

In [5]:
dls_gen = get_dls(64, 64)

In [6]:
bbone = resnet34

def create_gen_learner():
  return unet_learner(dls_gen, bbone, loss_func=loss_gen,blur=True, norm_type=NormType.Weight, self_attention=True,
                  y_range=y_range)

In [7]:
wd, y_range, loss_gen = 1e-3, (-3., 3.), MSELossFlat()
learn_gen = create_gen_learner()

In [8]:
name_gen = 'image_gen'
path_gen = path/name_gen
path_gen.mkdir(exist_ok=True)

In [9]:
path_g = get_image_files(path/name_gen)
path_i = get_image_files(path/'images')
fnames = path_g + path_i

In [10]:
def get_crit_dls(fnames, bs:int, size:int):
  "Generate two `Critic` DataLoaders"
  splits = RandomSplitter(0.1)(fnames)
  dsrc = Datasets(fnames, tfms=[[PILImage.create], [parent_label, Categorize]],
                 splits=splits)
  tfms = [ToTensor(), Resize(size)]
  gpu_tfms = [IntToFloatTensor(), Normalize.from_stats(*imagenet_stats)]
  return dsrc.dataloaders(bs=bs, after_item=tfms, after_batch=gpu_tfms)

In [11]:
dls_crit = get_crit_dls(fnames, bs=64, size=64)

In [12]:
loss_crit = AdaptiveLoss(nn.BCEWithLogitsLoss())

In [13]:
def create_crit_learner(dls, metrics):
  return Learner(dls, gan_critic(), metrics=metrics, loss_func=loss_crit)

In [14]:
learn_crit = create_crit_learner(dls_crit, accuracy_thresh_expand)

In [15]:
ls_crit = get_crit_dls(fnames, bs=64, size=64)

In [16]:
learn_crit = create_crit_learner(dls_crit, metrics=None).load('critic-pre2')

In [17]:
learn_gen = create_gen_learner().load('gen-pre2')

In [18]:
class GANDiscriminativeLR(Callback):
    "`Callback` that handles multiplying the learning rate by `mult_lr` for the critic."
    def __init__(self, mult_lr=5.): self.mult_lr = mult_lr

    def begin_batch(self):
        "Multiply the current lr if necessary."
        if not self.learn.gan_trainer.gen_mode and self.training: 
            self.learn.opt.set_hyper('lr', learn.opt.hypers[0]['lr']*self.mult_lr)

    def after_batch(self):
        "Put the LR back to its value if necessary."
        if not self.learn.gan_trainer.gen_mode: self.learn.opt.set_hyper('lr', learn.opt.hypers[0]['lr']/self.mult_lr)

In [19]:
switcher = AdaptiveGANSwitcher(critic_thresh=.65)

In [27]:
learn = GANLearner.from_learners(learn_gen, learn_crit, weights_gen=(1.,50.), show_img=False, switcher=switcher,
                                 opt_func=partial(Adam, mom=0.), cbs=GANDiscriminativeLR(mult_lr=5.))

In [28]:
learn.load('gan')

<fastai.vision.gan.GANLearner at 0x7fd9f25957b8>

In [29]:
learn.validate()

(#3) [0.6588068008422852,0.6588068008422852,0.0]

In [24]:
import dill
learn.export('anomalydetector.pkl',pickle_module=dill)

In [30]:
preds = learn.get_preds()

In [33]:
learn1 = load_learner('anomalydetector.pkl',pickle_module=dill)

In [29]:
learn.predict('datasetMalignasTest/1005124.tif - Series 3_roi_0002-0049.tif')

  warn(f"You are shadowing an attribute ({name}) that exists in the learner. Use `self.learn.{name}` to avoid this")
  warn(f"You are shadowing an attribute ({name}) that exists in the learner. Use `self.learn.{name}` to avoid this")
  warn(f"You are shadowing an attribute ({name}) that exists in the learner. Use `self.learn.{name}` to avoid this")


(TensorImage([[[135, 132, 130,  ..., 186, 197, 196],
          [136, 133, 133,  ..., 186, 193, 195],
          [132, 131, 134,  ..., 188, 182, 187],
          ...,
          [201, 196, 193,  ..., 227, 231, 235],
          [204, 203, 197,  ..., 235, 231, 237],
          [202, 207, 198,  ..., 241, 239, 239]],
 
         [[107, 107, 109,  ..., 145, 150, 152],
          [106, 107, 111,  ..., 140, 143, 149],
          [111, 114, 106,  ..., 135, 144, 148],
          ...,
          [164, 160, 160,  ..., 236, 236, 237],
          [166, 162, 163,  ..., 237, 234, 236],
          [166, 165, 166,  ..., 244, 240, 241]],
 
         [[132, 135, 130,  ..., 143, 144, 144],
          [135, 138, 138,  ..., 145, 141, 142],
          [143, 144, 137,  ..., 132, 145, 147],
          ...,
          [176, 172, 172,  ..., 236, 239, 238],
          [178, 178, 171,  ..., 238, 237, 238],
          [177, 180, 169,  ..., 239, 241, 240]]]),
 TensorBase([[[ 0.2036,  0.1547,  0.1176,  ...,  1.0768,  1.2576,  1.2415],
 

In [None]:
preds[0].shape

In [25]:
np.mean(np.array((preds[0][0]-preds[1][0])**2))

0.0041958676

In [26]:
!ls datasetBenignas/images | wc -l

33689


In [35]:
dlTrain = dls_gen.train.new(shuffle=False, drop_last=False, 
                       after_batch=[IntToFloatTensor, Normalize.from_stats(*imagenet_stats)])
dlValid = dls_gen.valid.new(shuffle=False, drop_last=False, 
                       after_batch=[IntToFloatTensor, Normalize.from_stats(*imagenet_stats)])

In [23]:
len(dlValid.dataset.items)

6737

In [24]:
# Computing error for training dataset
errors = []
preds,real = learn.get_preds(dl=dlTrain)


for (image, recon) in zip(real, preds):
    # compute the mean squared error between the ground-truth image
    # and the reconstructed image, then add it to our list of errors
    mse = np.mean(np.array((image - recon) ** 2))
    errors.append(mse)
    
preds,real = learn.get_preds(dl=dlValid)

for (image, recon) in zip(real, preds):
    # compute the mean squared error between the ground-truth image
    # and the reconstructed image, then add it to our list of errors
    mse = np.mean(np.array((image - recon) ** 2))
    errors.append(mse)

  warn(f"You are shadowing an attribute ({name}) that exists in the learner. Use `self.learn.{name}` to avoid this")
  warn(f"You are shadowing an attribute ({name}) that exists in the learner. Use `self.learn.{name}` to avoid this")
  warn(f"You are shadowing an attribute ({name}) that exists in the learner. Use `self.learn.{name}` to avoid this")


In [25]:
thresh = np.quantile(errors, 0.999)

In [26]:
thresh

0.04763923677802065

We compute the errors of benign images

In [27]:
pathI = Path('datasetBenignasTest/')
dblock = DataBlock(blocks=(ImageBlock, ImageBlock),
               get_items=get_image_files,
               get_y = lambda x: pathI/x.name,
               splitter=RandomSplitter(),
               item_tfms=Resize(64),
               batch_tfms=[*aug_transforms(max_zoom=2.),
                           Normalize.from_stats(*imagenet_stats)])
dls_gen = dblock.dataloaders(pathI, bs=128, path=path)
dls_gen.c = 3

In [28]:
dlTrain = dls_gen.train.new(shuffle=False, drop_last=False, 
                       after_batch=[IntToFloatTensor, Normalize.from_stats(*imagenet_stats)])
dlValid = dls_gen.valid.new(shuffle=False, drop_last=False, 
                       after_batch=[IntToFloatTensor, Normalize.from_stats(*imagenet_stats)])

In [29]:
# Computing error for training dataset
errorsBenign = []
preds,real = learn.get_preds(dl=dlTrain)


for (image, recon) in zip(real, preds):
    # compute the mean squared error between the ground-truth image
    # and the reconstructed image, then add it to our list of errors
    mse = np.mean(np.array((image - recon) ** 2))
    errorsBenign.append(mse)
    
preds,real = learn.get_preds(dl=dlValid)

for (image, recon) in zip(real, preds):
    # compute the mean squared error between the ground-truth image
    # and the reconstructed image, then add it to our list of errors
    mse = np.mean(np.array((image - recon) ** 2))
    errorsBenign.append(mse)

In [30]:
idxs = np.where(np.array(errorsBenign) >= thresh)[0]

In [31]:
idxs

array([  633,   943,  1288,  1358,  1781,  2733,  5519,  5747,  7884,
        8090,  8652,  8994,  9305,  9594, 10189, 10233, 11542, 11805,
       12160])

In [32]:
benignas = dlTrain.dataset.items + dlValid.dataset.items

In [33]:
res = []
for idx in idxs:
    res.append([benignas[idx],errorsBenign[idx]])

We compute the errors of benign images

In [34]:
pathI = Path('datasetMalignasTest/')
dblock = DataBlock(blocks=(ImageBlock, ImageBlock),
               get_items=get_image_files,
               get_y = lambda x: pathI/x.name,
               splitter=RandomSplitter(),
               item_tfms=Resize(64),
               batch_tfms=[*aug_transforms(max_zoom=2.),
                           Normalize.from_stats(*imagenet_stats)])
dls_gen = dblock.dataloaders(pathI, bs=128, path=path)
dls_gen.c = 3

In [35]:
dlTrain = dls_gen.train.new(shuffle=False, drop_last=False, 
                       after_batch=[IntToFloatTensor, Normalize.from_stats(*imagenet_stats)])
dlValid = dls_gen.valid.new(shuffle=False, drop_last=False, 
                       after_batch=[IntToFloatTensor, Normalize.from_stats(*imagenet_stats)])

In [36]:
# Computing error for training dataset
errorsMalign = []
preds,real = learn.get_preds(dl=dlTrain)


for (image, recon) in zip(real, preds):
    # compute the mean squared error between the ground-truth image
    # and the reconstructed image, then add it to our list of errors
    mse = np.mean(np.array((image - recon) ** 2))
    errorsMalign.append(mse)
    
preds,real = learn.get_preds(dl=dlValid)

for (image, recon) in zip(real, preds):
    # compute the mean squared error between the ground-truth image
    # and the reconstructed image, then add it to our list of errors
    mse = np.mean(np.array((image - recon) ** 2))
    errorsMalign.append(mse)

In [37]:
idxs = np.where(np.array(errorsMalign) >= thresh)[0]

In [38]:
malignas = dlTrain.dataset.items + dlValid.dataset.items

In [39]:
for idx in idxs:
    res.append([malignas[idx],errorsMalign[idx]])

In [40]:
df = pd.DataFrame(res,columns=['name','error'])

In [41]:
def extractName(string):
    return string[string.find('/')+1:string.find(' - ')]

In [42]:
df['imageName'] = df['name'].apply(lambda x: extractName(str(x)))

In [43]:
df.groupby('imageName').size()

imageName
1005124.tif     165
1005125.tif     549
1005126.tif     805
1005127.tif    1217
1005128.tif    3272
1005129.tif    2149
1005130.tif    1103
1005131.tif      13
1005132.tif     355
1005133.tif      86
1005134.tif      14
1005205.tif      32
1005206.tif     681
1005207.tif     151
1005208.tif       3
1005209.tif     220
1005210.tif     145
1005211.tif     128
1005212.tif      33
14I.tif           1
20I.tif           8
75I.tif          10
dtype: int64

In [17]:
df[df.imageName=='75I.tif']

Unnamed: 0,name,error,imageName
0,datasetBenignasTest/75I.tif - Series 3_roi_667...,0.07194,75I.tif
2,datasetBenignasTest/75I.tif - Series 3_roi_266...,0.066904,75I.tif
3,datasetBenignasTest/75I.tif - Series 3_roi_053...,0.048802,75I.tif
4,datasetBenignasTest/75I.tif - Series 3_roi_438...,0.084209,75I.tif
7,datasetBenignasTest/75I.tif - Series 3_roi_517...,0.051724,75I.tif
10,datasetBenignasTest/75I.tif - Series 3_roi_425...,0.056597,75I.tif
11,datasetBenignasTest/75I.tif - Series 3_roi_003...,0.062762,75I.tif
15,datasetBenignasTest/75I.tif - Series 3_roi_415...,0.058255,75I.tif
16,datasetBenignasTest/75I.tif - Series 3_roi_375...,0.053862,75I.tif
17,datasetBenignasTest/75I.tif - Series 3_roi_068...,0.047645,75I.tif


Otros thresholds

In [45]:
# thresh = 0.047
# idxsBenign = np.where(np.array(errorsBenign) >= thresh)[0]
# idxsMalign = np.where(np.array(errorsMalign) >= thresh)[0]

# res = []
# for idx in idxsBenign:
#     res.append([benignas[idx],errorsBenign[idx]])
# for idx in idxsMalign:
#     res.append([malignas[idx],errorsMalign[idx]])
    
# df = pd.DataFrame(res,columns=['name','error'])
# df['imageName'] = df['name'].apply(lambda x: extractName(str(x)))
# df.groupby('imageName').size()

In [46]:
df.to_csv('results.csv',index=None)

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('results.csv')

In [19]:
for x in df[df.imageName=='1005208.tif'].name:
    print(x)

datasetMalignasTest/1005208.tif - Series 3_roi_3991-1154.tif
datasetMalignasTest/1005208.tif - Series 3_roi_3885-1360.tif
datasetMalignasTest/1005208.tif - Series 3_roi_0852-1446.tif
