In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
cd /content/drive/MyDrive/lungCancer/local/

/content/drive/MyDrive/lungCancer/local


In [3]:
!ls -la

total 11
drwx------ 2 root root 4096 Aug 31 17:27 coreLib
-rw------- 1 root root 2258 Jan 16  2022 main.py
drwx------ 2 root root 4096 Aug 31 17:27 models


# Imports

In [4]:
import os
from glob import glob
from tqdm.auto import tqdm
from coreLib.utils import *
import numpy as np
import imgaug as ia
import imgaug.augmenters as iaa
import cv2
import itertools
import random
import matplotlib.pyplot as plt
%matplotlib inline


# Globals


In [5]:
data_dir="/content/drive/MyDrive/lungCancer/raw/"
save_path="/content/drive/MyDrive/lungCancer/"
save_path=create_dir(save_path,'data')

train_save=create_dir(save_path,'train')
test_save=create_dir(save_path,'test')
eval_save=create_dir(save_path,'eval')



# Test Train  Split

In [6]:
labels= os.listdir(data_dir)
for label in labels:
    create_dir(train_save,label)
    create_dir(test_save,label)
    create_dir(eval_save,label)
    

# Raw Images

In [7]:
img_dim=256
count=0
for label in labels:
 img_paths=[img_path for img_path in tqdm(glob(os.path.join(data_dir,label,"*.*")))]
 nb_train=int(len(img_paths)*0.7)
 train_paths=img_paths[:nb_train]
 test_paths=img_paths[nb_train:]
 for img_path in tqdm(train_paths):
       img=cv2.imread(img_path)
       img=cv2.resize(img,(img_dim,img_dim))
       cv2.imwrite(os.path.join(train_save,label,f"{count}.jpeg"),img)
       count+=1
 for img_path in tqdm(test_paths):
     img=cv2.imread(img_path)
     img=cv2.resize(img,(img_dim,img_dim))
     cv2.imwrite(os.path.join(test_save,label,f"{count}.jpeg"),img)
     cv2.imwrite(os.path.join(eval_save,label,f"{count}.jpeg "),img)
     count+=1

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

# Center Crop

In [8]:

 for label in labels:
     train_paths=[img_path for img_path in tqdm(glob(os.path.join(train_save,label,"*.*")))]
     for img_path in tqdm(train_paths):
         # center crop
         img=cv2.imread(img_path)
         # center crop
         img=img[64:192,64:192]
         img=cv2.resize(img,(img_dim,img_dim))
         cv2.imwrite(os.path.join(train_save,label,f"cen_{count}.jpeg"),img)
         count+=1
        
    
     eval_paths=[img_path for img_path in tqdm(glob(os.path.join(test_save,label,"*.*")))]
     for img_path in tqdm(eval_paths):
         img=cv2.imread(img_path)
         # center crop
         img=img[64:192,64:192]
         img=cv2.resize(img,(img_dim,img_dim))
         cv2.imwrite(os.path.join(eval_save,label,f"cen_{count}.jpeg"),img)
         count+=1

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

# comb Aug

In [9]:

def save_combinations(_paths,_save,data_dim=256,max_count=200):
     '''
        for eval and train images:
         * creates collages for 4 combs
         args:
             _paths   : the raw image paths
             _save    : the directory to save the images
             data_dim : the dimension of the data to save
             max_count: number of maximum combination images
     '''
     iden="comb"
     count=0
     cen_paths=[]
     back_paths=[]
     for _path in _paths:
         if "cen" in _path:
             cen_paths.append(_path)
         else:
             back_paths.append(_path)
    
     for i in range(len(cen_paths)*len(back_paths)//2):
        
         back=cv2.imread(random.choice(back_paths))
         cen =cv2.imread(random.choice(cen_paths))
         cen =cv2.resize(cen,(data_dim//2,data_dim//2))
         back[64:192,64:192]=cen
         # save
         cv2.imwrite(os.path.join(_save,f"cenComb_{count}.jpeg"),back)
         count+=1
     count=0
     for comb in tqdm(itertools.combinations(_paths, 4)):
         if count==max_count:
             break
         _img0=cv2.imread(comb[0])
         _img1=cv2.imread(comb[1])
         _img2=cv2.imread(comb[2])
         _img3=cv2.imread(comb[3])
         # width
         _img01=np.concatenate([_img0,_img1],axis=1)
         _img23=np.concatenate([_img2,_img3],axis=1)
         # height
         _img=np.concatenate([_img01,_img23],axis=0)
         _img=cv2.resize(_img,(data_dim,data_dim))
         # save
         cv2.imwrite(os.path.join(_save,f"comb_{count}.jpeg"),_img)
         count+=1
    
        
        
        
    
    

for label in tqdm(labels):
     train_paths=[img_path for img_path in glob(os.path.join(train_save,label,"*.*"))]
     eval_paths=[img_path for img_path in glob(os.path.join(eval_save,label,"*.*"))]
     # train
     save_combinations(train_paths,os.path.join(train_save,label))
     # eval
     save_combinations(eval_paths,os.path.join(eval_save,label),max_count=50)
    
    

  0%|          | 0/3 [00:00<?, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

# ImageAug

In [10]:

# image aug
ia.seed(1)
seq = iaa.Sequential([
     iaa.Fliplr(0.5), # horizontal flips
     # Small gaussian blur with random sigma between 0 and 0.5.
     # But we only blur about 50% of all images.
     iaa.Sometimes(
         0.5,
         iaa.GaussianBlur(sigma=(0, 0.5))
     ),
     # Strengthen or weaken the contrast in each image.
     iaa.LinearContrast((0.75, 1.5)),
     # Add gaussian noise.
     # For 50% of all images, we sample the noise once per pixel.
     # For the other 50% of all images, we sample the noise per pixel AND
     # channel. This can change the color (not only brightness) of the
     # pixels.
     iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5),
     # Make some images brighter and some darker.
     # In 20% of all cases, we sample the multiplier once per channel,
     # which can end up changing the color of the images.
     iaa.Multiply((0.8, 1.2), per_channel=0.2),
     # Apply affine transformations to each image.
     # Scale/zoom them, translate/move them, rotate them and shear them.
     #     iaa.Affine(
     #         scale={"x": (0.8, 1.2), "y": (0.8, 1.2)},
     #         translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)},
     #         rotate=(-25, 25),
     #         shear=(-8, 8)
     #     )
 ], random_order=True) # apply augmenters in random order


In [11]:
def save_augs(_paths,_save):
     '''
         for eval and train images:
         args:
             _paths  : the raw image paths
             _save   : the directory to save the images
             data_dim: the dimension of the data to save
     '''
     count=0
     for img_path in tqdm(_paths):
         img=cv2.imread(img_path)
         # aug
         images=np.array([img,img,img,img,img])
         images=seq(images=images)
         for img in images:
             # save
             cv2.imwrite(os.path.join(_save,f"aug_{count}.jpeg"),img)
             count+=1

for label in tqdm(labels):
     train_paths=[img_path for img_path in glob(os.path.join(train_save,label,"*.*"))]
     eval_paths=[img_path for img_path in glob(os.path.join(eval_save,label,"*.*"))]
    
     # train
     save_augs(train_paths,os.path.join(train_save,label))
     # eval
     save_augs(eval_paths,os.path.join(eval_save,label))
    

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/648 [00:00<?, ?it/s]

  0%|          | 0/146 [00:00<?, ?it/s]

  0%|          | 0/648 [00:00<?, ?it/s]

  0%|          | 0/146 [00:00<?, ?it/s]

  0%|          | 0/648 [00:00<?, ?it/s]

  0%|          | 0/146 [00:00<?, ?it/s]

# Tfrecords

In [12]:
from coreLib.data import Processor

In [13]:
data_path   =   "/content/drive/MyDrive/lungCancer/data/"
save_path   =   "/content/drive/MyDrive/lungCancer/data/"
fmt         =   "jpeg"
data_dim    =   256
image_type  =   "rgb"
data_size   =   1024
label_den   =   'train'
processor_obj=Processor(data_path,save_path,fmt,data_dim,image_type,data_size,label_den)
processor_obj.process()


100%|██████████| 11664/11664 [00:00<00:00, 1870406.86it/s]
100%|██████████| 2592/2592 [00:00<00:00, 1690767.65it/s]
100%|██████████| 36/36 [00:00<00:00, 380339.91it/s]


[32m#LOG     :[0m[34m/content/drive/MyDrive/lungCancer/data/tfrecords/train/0.tfrecord[0m


100%|██████████| 1024/1024 [00:09<00:00, 107.53it/s]


[32m#LOG     :[0m[34m/content/drive/MyDrive/lungCancer/data/tfrecords/train/1.tfrecord[0m


100%|██████████| 1024/1024 [00:08<00:00, 127.62it/s]


[32m#LOG     :[0m[34m/content/drive/MyDrive/lungCancer/data/tfrecords/train/2.tfrecord[0m


100%|██████████| 1024/1024 [00:07<00:00, 128.46it/s]


[32m#LOG     :[0m[34m/content/drive/MyDrive/lungCancer/data/tfrecords/train/3.tfrecord[0m


100%|██████████| 1024/1024 [00:07<00:00, 128.41it/s]


[32m#LOG     :[0m[34m/content/drive/MyDrive/lungCancer/data/tfrecords/train/4.tfrecord[0m


100%|██████████| 1024/1024 [00:07<00:00, 129.36it/s]


[32m#LOG     :[0m[34m/content/drive/MyDrive/lungCancer/data/tfrecords/train/5.tfrecord[0m


100%|██████████| 1024/1024 [00:07<00:00, 128.87it/s]


[32m#LOG     :[0m[34m/content/drive/MyDrive/lungCancer/data/tfrecords/train/6.tfrecord[0m


100%|██████████| 1024/1024 [00:07<00:00, 128.42it/s]


[32m#LOG     :[0m[34m/content/drive/MyDrive/lungCancer/data/tfrecords/train/7.tfrecord[0m


100%|██████████| 1024/1024 [00:08<00:00, 120.97it/s]


[32m#LOG     :[0m[34m/content/drive/MyDrive/lungCancer/data/tfrecords/train/8.tfrecord[0m


100%|██████████| 1024/1024 [00:08<00:00, 124.45it/s]


[32m#LOG     :[0m[34m/content/drive/MyDrive/lungCancer/data/tfrecords/train/9.tfrecord[0m


100%|██████████| 1024/1024 [00:08<00:00, 127.73it/s]


[32m#LOG     :[0m[34m/content/drive/MyDrive/lungCancer/data/tfrecords/train/10.tfrecord[0m


100%|██████████| 1024/1024 [00:07<00:00, 128.83it/s]


[32m#LOG     :[0m[34m/content/drive/MyDrive/lungCancer/data/tfrecords/train/11.tfrecord[0m


100%|██████████| 400/400 [00:03<00:00, 129.09it/s]


[32m#LOG     :[0m[34m/content/drive/MyDrive/lungCancer/data/tfrecords/eval/0.tfrecord[0m


100%|██████████| 1024/1024 [00:07<00:00, 130.30it/s]


[32m#LOG     :[0m[34m/content/drive/MyDrive/lungCancer/data/tfrecords/eval/1.tfrecord[0m


100%|██████████| 1024/1024 [00:07<00:00, 128.73it/s]


[32m#LOG     :[0m[34m/content/drive/MyDrive/lungCancer/data/tfrecords/eval/2.tfrecord[0m


100%|██████████| 544/544 [00:04<00:00, 129.43it/s]


[32m#LOG     :[0m[34m/content/drive/MyDrive/lungCancer/data/tfrecords/test/0.tfrecord[0m


100%|██████████| 36/36 [00:00<00:00, 119.39it/s]


In [14]:
!zip -r /content/drive/MyDrive/lungCancer/data/records.zip /content/drive/MyDrive/lungCancer/data/tfrecords

  adding: content/drive/MyDrive/lungCancer/data/tfrecords/ (stored 0%)
  adding: content/drive/MyDrive/lungCancer/data/tfrecords/train/ (stored 0%)
  adding: content/drive/MyDrive/lungCancer/data/tfrecords/train/0.tfrecord (deflated 0%)
  adding: content/drive/MyDrive/lungCancer/data/tfrecords/train/1.tfrecord (deflated 0%)
  adding: content/drive/MyDrive/lungCancer/data/tfrecords/train/2.tfrecord (deflated 0%)
  adding: content/drive/MyDrive/lungCancer/data/tfrecords/train/3.tfrecord (deflated 0%)
  adding: content/drive/MyDrive/lungCancer/data/tfrecords/train/4.tfrecord (deflated 0%)
  adding: content/drive/MyDrive/lungCancer/data/tfrecords/train/5.tfrecord (deflated 0%)
  adding: content/drive/MyDrive/lungCancer/data/tfrecords/train/6.tfrecord (deflated 0%)
  adding: content/drive/MyDrive/lungCancer/data/tfrecords/train/7.tfrecord (deflated 0%)
  adding: content/drive/MyDrive/lungCancer/data/tfrecords/train/8.tfrecord (deflated 0%)
  adding: content/drive/MyDrive/lungCancer/data/tfr

In [None]:
from google.colab import files
files.download("/content/drive/MyDrive/lungCancer/data/records.zip")