### This jupyter notebook allows you to extract diffrent patch sizes from the GAPS dataset


<a href="https://www.tu-ilmenau.de/en/neurob/data-sets-code/gaps/">Link to GAPS dataset</a>


<pre>
@inproceedings{eisenbach2017how,
  title={How to Get Pavement Distress Detection Ready for Deep Learning? A Systematic Approach.},
  author={Eisenbach, Markus and Stricker, Ronny and Seichter, Daniel and Amende, Karl and Debes, Klaus
          and Sesselmann, Maximilian and Ebersbach, Dirk and Stoeckert, Ulrike
          and Gross, Horst-Michael},
  booktitle={International Joint Conference on Neural Networks (IJCNN)},
  pages={2039--2047},
  year={2017}
} </pre>

Please install all the required packages before running the first cell

In [None]:
import numpy as np
import cv2
import pandas as pd
import os
from gaps_dataset import gaps

In [None]:
datadir = '' #replace this with your datadirectory
login = '' #replace this with your login credentials

In [None]:
# Create all the required folders where the patches will be extracted

ps = 256 #define the patch size

os.mkdir(os.path.join(datadir,'Subsets{0}'.format(ps)))
subsetlist = ['train','test','valid']
categorylist = ['Crack','Nocrack']
for s in subsetlist:
    os.mkdir(os.path.join(datadir,'Subsets{0}'.format(ps),s))
    for c in categorylist:
        os.mkdir(os.path.join(datadir,'Subsets{0}'.format(ps),s,c))
os.mkdir(os.path.join(datadir,'masks'))

In [None]:
mpath = os.path.join(datadir,'masks')
for subset in ['train','test','valid']:
    patch_ref = np.load(os.path.join(datadir,'patch_references_'+subset+'.npy')).astype(int)
    columns = ['image_index', 'row', 'col', 'mirror_state', 'binary_label', 'class_label']
    datatype = ['uint16', 'uint16', 'uint16','bool','bool',"uint8"]
    datadf = pd.DataFrame(data= {columns[i]:patch_ref[:,i].astype(datatype[i]) for i in range(len(columns))})
    datadf.class_label = datadf.class_label.astype('category')
    groupeddf = datadf.groupby(by=['image_index'])
    print(subset+' Start')
    for group,data in groupeddf:
        openpath = os.path.join(datadir,'images',subset,'_{:04d}.jpg'.format(group))
        masksavename = subset+'_{:04d}.npy'.format(group)
        I = cv2.imread(openpath)
        maskI = np.zeros((1080,1920),dtype='bool')
        for i in range(len(data)):
            row = data.row.iloc[i]
            col = data.col.iloc[i]
            binary_label = data.binary_label.iloc[i]
            maskI[row:row+64, col:col+64] = binary_label
        np.save(os.path.join(mpath,masksavename),maskI)
    print(subset+' Done')

In [None]:
labelmanager = pd.DataFrame(columns=['img_name','category','score','label'])
masklist = sorted(os.listdir(os.path.join(datadir,'masks')))
print('Start')
i=0
for mask in masklist:
    openpath = os.path.join(datadir,'images',mask.split('.')[0]+'.jpg')
    maskpath = os.path.join(datadir,'masks',mask)
    bcpath = os.path.join(datadir,'Subsets256',mask.split('_')[0],'Crack')
    bncpath = os.path.join(datadir,'Subsets256',mask.split('_')[0],'Nocrack')
    I = cv2.imread(openpath)
    maskI = np.load(maskpath)
    for row in range(1080//ps):
        for col in range(1920//ps):
            masksubset = maskI[ps*row:ps*(row+1), ps*col:ps*(col+1)]
            score = np.sum(masksubset)
            if score>0:
                savename = bcpath+'{0:04d}_{1}_{2}.jpg'.format(int(mask.split('_')[1].split('.')[0]),row,col)
            else:
                savename = bncpath+'{0:04d}_{1}_{2}.jpg'.format(int(mask.split('_')[1].split('.')[0]),row,col)
            cv2.imwrite(savename,I[ps*row:ps*(row+1), ps*col:ps*(col+1)])
            labelmanager.loc[i] = [savename,mask.split('_')[0],score,bool(score)]
            i=i+1
labelmanager.to_csv(os.path.join(datadir,'labels.csv'))
print('Done')