In [None]:
import sys
sys.path.append("..")

In [None]:
import numpy as np

from fastai.vision.all import *
from pathlib import Path
from tqdm.auto import tqdm
from shutil import copyfile, rmtree, move

In [None]:
from urllib.request import urlopen
from io import BytesIO
from zipfile import ZipFile

def download_and_unzip(url, extract_to='.'):
    http_response = urlopen(url)
    zipfile = ZipFile(BytesIO(http_response.read()))
    zipfile.extractall(path=extract_to)

In [None]:
PROJ_PATH = Path.cwd().parent
DATA_PATH = PROJ_PATH/'data'
print(PROJ_PATH)
print(DATA_PATH)

In [None]:
download_and_unzip(url='https://github.com/abin24/Magnetic-tile-defect-datasets./archive/refs/heads/master.zip',
                   extract_to=DATA_PATH)

extract_dir = DATA_PATH/'Magnetic-tile-defect-datasets.-master'

In [None]:
dataset_path = DATA_PATH/'MAGNETIC_TILE_SURFACE_DEFECTS'
if dataset_path.is_dir():
    rmtree(dataset_path)
if extract_dir.is_dir():
    move(extract_dir, dataset_path)

In [None]:
dataset_path.ls()

In [None]:
if (dataset_path/'MT_Free').exists():
    rmtree(dataset_path/'MT_Free')

In [None]:
classes = []
for ii in (dataset_path).ls():
    if ii.is_dir() and ii.stem.startswith('MT_'):
        classes.append(ii.stem)
classes

In [None]:
classes_dict = {c:i+1 for i, c in enumerate(classes)}
classes_dict

In [None]:
img_paths = [get_files(dataset_path/c, extensions='.jpg') for c in classes]
img_paths = [ii for sublist in img_paths for ii in sublist]
len(img_paths)

In [None]:
msk_paths = [get_files(dataset_path/c, extensions='.png') for c in classes]
msk_paths = [ii for sublist in msk_paths for ii in sublist]
len(msk_paths)

In [None]:
assert len(img_paths) == len(msk_paths)

In [None]:
path_images = dataset_path/'images'
path_masks = dataset_path/'masks'

for p in [path_images, path_masks]:
    p.mkdir(exist_ok=True)

In [None]:
np.random.seed(42)
for img_path, msk_path in tqdm(zip(img_paths, msk_paths), total=len(img_paths)):
    c = msk_path.parent.parent.stem
    msk = np.array(Image.open(msk_path))
    msk[msk>0] = 1 # binary segmenation: defect/defect-free

    new_img_path = path_images/img_path.name
    new_mask_path = path_masks/msk_path.name
    copyfile(img_path, new_img_path)
    Image.fromarray(msk).save(new_mask_path)

In [None]:
assert len(get_files(path_images, extensions='.jpg')) == len(get_files(path_masks, extensions='.png'))