## Iterative Data Distillation



In this notebook we train models using data distillation.

In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [0]:
from google.colab import files
uploaded = files.upload()
!unzip dataset.zip -d dataset

In [0]:
import warnings
import os
import shutil
import glob
import random
import random
import cv2
from fastai.vision import *
from fastai.utils.mem import *

warnings.filterwarnings("ignore", category=UserWarning, module="torch.nn.functional")

dataset="dataset"
classesPaths=sorted(glob.glob(dataset+'/*'))
classes=[pt.split(os.sep)[-1] for pt in classesPaths if os.path.isdir(pt)]
images=[pt for pt in classesPaths if not os.path.isdir(pt)]

os.makedirs(dataset+'/train')
os.makedirs(dataset+'/valid')
os.makedirs(dataset+'/images')

for im in images:
  shutil.move(im,dataset+'/images/')

for cl in classes:
  os.mkdir(dataset+'/train/'+cl)
  images=sorted(glob.glob(dataset+'/'+cl+'/*'))
  for i in range(int(len(images)*0.75)):
    images=sorted(glob.glob(dataset+'/'+cl+'/*'))
    j=random.randint(0,len(images)-1)
    shutil.move(images[j],dataset+'/train/'+cl)
  os.mkdir(dataset+'/valid/'+cl)
  images=sorted(glob.glob(dataset+'/'+cl+'/*'))
  for i in range(len(images)):
    shutil.move(images[i],dataset+'/valid/'+cl)

def learn_with_model(dataset,model):
  data=ImageDataBunch.from_folder(dataset,
        ds_tfms=get_transforms(), size=224,bs=32).normalize(imagenet_stats)
  learn = cnn_learner(data, model, metrics=accuracy)
  learn.fit_one_cycle(2)
  learn.unfreeze()
  learn.lr_find()
  lr=learn.recorder.lrs[np.argmin(learn.recorder.losses)]
  if lr<1e-05:
    lr=1e-03
  learn.fit_one_cycle(8,max_lr=slice(lr/100,lr))
  return learn,data

def moda(lista):
  tam=len(lista[0][2])
  x=np.zeros(tam)
  for l in lista:
    x=x+l[2].numpy()
  x=x/len(lista)
  maximo=x.argmax()
  return maximo, x[maximo]

def omniData(dataset,learn,th):
  images=sorted(glob.glob(dataset+"/images/*"))

  for image in images:
    im=cv2.imread(image,1)
    im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
    lista=[]
    n=Image(pil2tensor(im, dtype=np.float32).div_(255))
    pn=learn.predict(n)
    lista.append(pn)
    h_im=cv2.flip(im,0)
    h=Image(pil2tensor(h_im, dtype=np.float32).div_(255))
    ph=learn.predict(h)
    lista.append(ph)
    v_im=cv2.flip(im,1)
    v=Image(pil2tensor(v_im, dtype=np.float32).div_(255))
    pv=learn.predict(v)
    lista.append(pv)
    b_im=cv2.flip(im,-1)
    b=Image(pil2tensor(b_im, dtype=np.float32).div_(255))
    pb=learn.predict(b)
    lista.append(pb)
    blur_im=cv2.blur(im,(5,5))
    blur=Image(pil2tensor(blur_im, dtype=np.float32).div_(255))
    pblur=learn.predict(blur)
    lista.append(pblur)
    invGamma=1.0
    table=np.array([((i/255.0)**invGamma)*255 for i in np.arange(0,256)]).astype('uint8')
    gamma_im=cv2.LUT(im,table)
    gamma=Image(pil2tensor(gamma_im, dtype=np.float32).div_(255))
    pgamma=learn.predict(gamma)
    lista.append(pgamma)
    gblur_im=cv2.GaussianBlur(im,(5,5),cv2.BORDER_DEFAULT)
    gblur=Image(pil2tensor(gblur_im, dtype=np.float32).div_(255))
    pgblur=learn.predict(gblur)
    lista.append(pgblur)

    mod, predMax=moda(lista)
    if predMax>th:
      shutil.copyfile(image,dataset+'/train/'+data.classes[mod]+'/'+data.classes[mod]+'_'+image.split('/')[-1])
      os.remove(image)
      print(image+" --> "+dataset+'/train/'+data.classes[mod]+'/'+data.classes[mod]+'_'+image.split('/')[-1])

learner_resnet50,data=learn_with_model(dataset,models.resnet50)
shutil.copytree(dataset, 'dataset_resnet50')
omniData('dataset_resnet50',learner_resnet50,0.8)
learner1IDD_resnet50,data=learn_with_model('dataset_resnet50',models.resnet50)
omniData('dataset_resnet50',learner1IDD_resnet50,0.8)
learner2IDD_resnet50,data=learn_with_model('dataset_resnet50',models.resnet50)
learner2IDD_resnet50.export('/content/drive/My Drive/learnerIDD_resnet50.pkl')

learner_resnet34,data=learn_with_model(dataset,models.resnet34)
shutil.copytree(dataset, 'dataset_resnet34')
omniData('dataset_resnet34',learner_resnet34,0.8)
learner1IDD_resnet34,data=learn_with_model('dataset_resnet34',models.resnet34)
omniData('dataset_resnet34',learner1IDD_resnet34,0.8)
learner2IDD_resnet34,data=learn_with_model('dataset_resnet34',models.resnet34)
learner2IDD_resnet34.export('/content/drive/My Drive/learnerIDD_resnet34.pkl')

learner_resnet101,data=learn_with_model(dataset,models.resnet101)
shutil.copytree(dataset, 'dataset_resnet101')
omniData('dataset_resnet101',learner_resnet101,0.8)
learner1IDD_resnet101,data=learn_with_model('dataset_resnet101',models.resnet101)
omniData('dataset_resnet101',learner1IDD_resnet101,0.8)
learner2IDD_resnet101,data=learn_with_model('dataset_resnet101',models.resnet101)
learner2IDD_resnet101.export('/content/drive/My Drive/learnerIDD_resnet101.pkl')

