In [1]:
import sys
sys.path.append("..")

In [2]:
import numpy as np

from fastai.vision.all import *
from pathlib import Path
from tqdm.auto import tqdm
from shutil import copyfile, rmtree, move

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from urllib.request import urlopen
from io import BytesIO
from zipfile import ZipFile

def download_and_unzip(url, extract_to='.'):
    http_response = urlopen(url)
    zipfile = ZipFile(BytesIO(http_response.read()))
    zipfile.extractall(path=extract_to)

In [4]:
PROJ_PATH = Path.cwd().parent
DATA_PATH = PROJ_PATH/'data'
print(PROJ_PATH)
print(DATA_PATH)

/home/ubuntu/magnetic-tiles-defect
/home/ubuntu/magnetic-tiles-defect/data


In [5]:
download_and_unzip(url='https://github.com/abin24/Magnetic-tile-defect-datasets./archive/refs/heads/master.zip',
                   extract_to=DATA_PATH)

extract_dir = DATA_PATH/'Magnetic-tile-defect-datasets.-master'

In [6]:
dataset_path = DATA_PATH/'MAGNETIC_TILE_SURFACE_DEFECTS'
if dataset_path.is_dir():
    rmtree(dataset_path)
if extract_dir.is_dir():
    move(extract_dir, dataset_path)

In [7]:
dataset_path.ls()

(#9) [Path('/home/ubuntu/magnetic-tiles-defect/data/MAGNETIC_TILE_SURFACE_DEFECTS/MT_Uneven'),Path('/home/ubuntu/magnetic-tiles-defect/data/MAGNETIC_TILE_SURFACE_DEFECTS/MT_Break'),Path('/home/ubuntu/magnetic-tiles-defect/data/MAGNETIC_TILE_SURFACE_DEFECTS/MT_Fray'),Path('/home/ubuntu/magnetic-tiles-defect/data/MAGNETIC_TILE_SURFACE_DEFECTS/MT_Crack'),Path('/home/ubuntu/magnetic-tiles-defect/data/MAGNETIC_TILE_SURFACE_DEFECTS/MT_Free'),Path('/home/ubuntu/magnetic-tiles-defect/data/MAGNETIC_TILE_SURFACE_DEFECTS/MT_Blowhole'),Path('/home/ubuntu/magnetic-tiles-defect/data/MAGNETIC_TILE_SURFACE_DEFECTS/dataset.png'),Path('/home/ubuntu/magnetic-tiles-defect/data/MAGNETIC_TILE_SURFACE_DEFECTS/dataset.jpg'),Path('/home/ubuntu/magnetic-tiles-defect/data/MAGNETIC_TILE_SURFACE_DEFECTS/README.md')]

In [8]:
if (dataset_path/'MT_Free').exists():
    rmtree(dataset_path/'MT_Free')

In [9]:
classes = []
for ii in (dataset_path).ls():
    if ii.is_dir() and ii.stem.startswith('MT_'):
        classes.append(ii.stem)
classes

['MT_Uneven', 'MT_Break', 'MT_Fray', 'MT_Crack', 'MT_Blowhole']

In [10]:
classes_dict = {c:i+1 for i, c in enumerate(classes)}
classes_dict

{'MT_Uneven': 1, 'MT_Break': 2, 'MT_Fray': 3, 'MT_Crack': 4, 'MT_Blowhole': 5}

In [11]:
img_paths = [get_files(dataset_path/c, extensions='.jpg') for c in classes]
img_paths = [ii for sublist in img_paths for ii in sublist]
len(img_paths)

392

In [12]:
msk_paths = [get_files(dataset_path/c, extensions='.png') for c in classes]
msk_paths = [ii for sublist in msk_paths for ii in sublist]
len(msk_paths)

392

In [13]:
msk_paths

[Path('/home/ubuntu/magnetic-tiles-defect/data/MAGNETIC_TILE_SURFACE_DEFECTS/MT_Uneven/Imgs/exp1_num_274094.png'),
 Path('/home/ubuntu/magnetic-tiles-defect/data/MAGNETIC_TILE_SURFACE_DEFECTS/MT_Uneven/Imgs/exp5_num_24876.png'),
 Path('/home/ubuntu/magnetic-tiles-defect/data/MAGNETIC_TILE_SURFACE_DEFECTS/MT_Uneven/Imgs/exp3_num_24829.png'),
 Path('/home/ubuntu/magnetic-tiles-defect/data/MAGNETIC_TILE_SURFACE_DEFECTS/MT_Uneven/Imgs/exp5_num_311887.png'),
 Path('/home/ubuntu/magnetic-tiles-defect/data/MAGNETIC_TILE_SURFACE_DEFECTS/MT_Uneven/Imgs/exp6_num_124787.png'),
 Path('/home/ubuntu/magnetic-tiles-defect/data/MAGNETIC_TILE_SURFACE_DEFECTS/MT_Uneven/Imgs/exp4_num_573.png'),
 Path('/home/ubuntu/magnetic-tiles-defect/data/MAGNETIC_TILE_SURFACE_DEFECTS/MT_Uneven/Imgs/exp3_num_274138.png'),
 Path('/home/ubuntu/magnetic-tiles-defect/data/MAGNETIC_TILE_SURFACE_DEFECTS/MT_Uneven/Imgs/exp6_num_187011.png'),
 Path('/home/ubuntu/magnetic-tiles-defect/data/MAGNETIC_TILE_SURFACE_DEFECTS/MT_Uneve

In [14]:
assert len(img_paths) == len(msk_paths)

In [15]:
img_dir_path = dataset_path/'images'
mask_dir_path = dataset_path/'masks'

for p in [img_dir_path, mask_dir_path]:
    p.mkdir(exist_ok=True)

In [16]:
np.random.seed(42)
for img_path, msk_path in tqdm(zip(img_paths, msk_paths), total=len(img_paths)):
    c = msk_path.parent.parent.stem
    msk = np.array(Image.open(msk_path))
    msk[msk>0] = 1 # binary segmenation: defect/defect-free

    new_img_path = img_dir_path/img_path.name
    new_mask_path = mask_dir_path/msk_path.name
    copyfile(img_path, new_img_path)
    Image.fromarray(msk).save(new_mask_path)

100%|██████████| 392/392 [00:01<00:00, 308.99it/s]


In [17]:
assert len(get_files(img_dir_path, extensions='.jpg')) == len(get_files(mask_dir_path, extensions='.png'))

In [18]:
test_pct = 0.2

train_img_dir_path = dataset_path/'train_images'
train_mask_dir_path = dataset_path/'train_masks'
test_img_dir_path = dataset_path/'test_images'
test_mask_dir_path = dataset_path/'test_masks'

img_fpaths = get_files(img_dir_path, extensions='.jpg')
test_img_fpaths = random.sample(img_fpaths, int(test_pct*len(img_fpaths)))
train_img_fpaths = [
    fpath for fpath in img_fpaths if fpath not in test_img_fpaths]

for dir in [train_img_dir_path,
            train_mask_dir_path,
            test_img_dir_path,
            test_mask_dir_path]:
    dir.mkdir(exist_ok=True)

for img_fpath in test_img_fpaths:
    mask_fpath = mask_dir_path/f'{img_fpath.stem}.png'
    shutil.copy(img_fpath, test_img_dir_path)
    shutil.copy(mask_fpath, test_mask_dir_path)

for img_fpath in train_img_fpaths:
    mask_fpath = mask_dir_path/f'{img_fpath.stem}.png'
    shutil.copy(img_fpath, train_img_dir_path)
    shutil.copy(mask_fpath, train_mask_dir_path)