In [1]:
import sys
sys.path.append("..")

In [2]:
import numpy as np

from fastai.vision.all import *
from pathlib import Path
from tqdm.auto import tqdm
from shutil import copyfile, rmtree, move

In [3]:
from urllib.request import urlopen
from io import BytesIO
from zipfile import ZipFile

def download_and_unzip(url, extract_to='.'):
    http_response = urlopen(url)
    zipfile = ZipFile(BytesIO(http_response.read()))
    zipfile.extractall(path=extract_to)

In [4]:
PROJ_PATH = Path.cwd().parent
DATA_PATH = PROJ_PATH/'data'
print(PROJ_PATH)
print(DATA_PATH)

/mnt/c/Users/akim/gitlab/other/magnetic-tiles-defect
/mnt/c/Users/akim/gitlab/other/magnetic-tiles-defect/data


In [5]:
download_and_unzip(url='https://github.com/abin24/Magnetic-tile-defect-datasets./archive/refs/heads/master.zip',
                   extract_to=DATA_PATH)

extract_dir = DATA_PATH/'Magnetic-tile-defect-datasets.-master'

In [6]:
dataset_path = DATA_PATH/'MAGNETIC_TILE_SURFACE_DEFECTS'
if dataset_path.is_dir():
    rmtree(dataset_path)
if extract_dir.is_dir():
    move(extract_dir, dataset_path)

In [7]:
dataset_path.ls()

(#9) [Path('/mnt/c/Users/akim/gitlab/other/magnetic-tiles-defect/data/MAGNETIC_TILE_SURFACE_DEFECTS/dataset.jpg'),Path('/mnt/c/Users/akim/gitlab/other/magnetic-tiles-defect/data/MAGNETIC_TILE_SURFACE_DEFECTS/dataset.png'),Path('/mnt/c/Users/akim/gitlab/other/magnetic-tiles-defect/data/MAGNETIC_TILE_SURFACE_DEFECTS/MT_Blowhole'),Path('/mnt/c/Users/akim/gitlab/other/magnetic-tiles-defect/data/MAGNETIC_TILE_SURFACE_DEFECTS/MT_Break'),Path('/mnt/c/Users/akim/gitlab/other/magnetic-tiles-defect/data/MAGNETIC_TILE_SURFACE_DEFECTS/MT_Crack'),Path('/mnt/c/Users/akim/gitlab/other/magnetic-tiles-defect/data/MAGNETIC_TILE_SURFACE_DEFECTS/MT_Fray'),Path('/mnt/c/Users/akim/gitlab/other/magnetic-tiles-defect/data/MAGNETIC_TILE_SURFACE_DEFECTS/MT_Free'),Path('/mnt/c/Users/akim/gitlab/other/magnetic-tiles-defect/data/MAGNETIC_TILE_SURFACE_DEFECTS/MT_Uneven'),Path('/mnt/c/Users/akim/gitlab/other/magnetic-tiles-defect/data/MAGNETIC_TILE_SURFACE_DEFECTS/README.md')]

In [8]:
if (dataset_path/'MT_Free').exists():
    rmtree(dataset_path/'MT_Free')

In [9]:
classes = []
for ii in (dataset_path).ls():
    if ii.is_dir() and ii.stem.startswith('MT_'):
        classes.append(ii.stem)
classes

['MT_Blowhole', 'MT_Break', 'MT_Crack', 'MT_Fray', 'MT_Uneven']

In [10]:
classes_dict = {c:i+1 for i, c in enumerate(classes)}
classes_dict

{'MT_Blowhole': 1, 'MT_Break': 2, 'MT_Crack': 3, 'MT_Fray': 4, 'MT_Uneven': 5}

In [11]:
img_paths = [get_files(dataset_path/c, extensions='.jpg') for c in classes]
img_paths = [ii for sublist in img_paths for ii in sublist]
len(img_paths)

392

In [12]:
msk_paths = [get_files(dataset_path/c, extensions='.png') for c in classes]
msk_paths = [ii for sublist in msk_paths for ii in sublist]
len(msk_paths)

392

In [13]:
assert len(img_paths) == len(msk_paths)

In [14]:
path_images = dataset_path/'images'
path_masks = dataset_path/'masks'

for p in [path_images, path_masks]:
    p.mkdir(exist_ok=True)

In [15]:
np.random.seed(42)
for img_path, msk_path in tqdm(zip(img_paths, msk_paths), total=len(img_paths)):
    c = msk_path.parent.parent.stem
    msk = np.array(Image.open(msk_path))
    msk[msk>0] = 1 # binary segmenation: defect/defect-free

    new_img_path = path_images/img_path.name
    new_mask_path = path_masks/msk_path.name
    copyfile(img_path, new_img_path)
    Image.fromarray(msk).save(new_mask_path)

  0%|          | 0/392 [00:00<?, ?it/s]

In [16]:
assert len(get_files(path_images, extensions='.jpg')) == len(get_files(path_masks, extensions='.png'))