## Currently datasets
- NG-20201217 (Validation)
- OK-20201217 (Training)
- NG-20210108 (Validation)
- OK-20210108 (Training)
- NG-20210115_0121 (Test)
- OK-20210115_0121 (Test)

In [6]:
import os
import glob
import shutil
import pandas as pd

In [15]:
RAW_DATA_PATH = os.path.join(".", "dataset","jet_raw")
OUT_PATH = os.path.join(".", "dataset","Jet")
TRAIN_DIR = ["OK-20201217"]
VALID_DIR = ["OK-20210108"]
VAL_TRAIN_DEFECT = ["NG-20210108", "NG-20201217"]
TEST_DIR = ["NG-20210115_0121", "OK-20210115_0121"]
CLASSES_DICT = {
    "R": r"R[_0-9]*",
    "C": r"C[_0-9]*"
# "F": r"F[0-9]*|F_[0-9]*"
}

In [16]:
for subset in ["train_all", "val", "val_defect", "test"]:
    for classes in CLASSES_DICT.keys():
        dir_ = os.path.join(OUT_PATH, subset, classes)
        if not os.path.exists(dir_):
            os.makedirs(dir_)

# Processing Training images 

In [20]:
TXT_LIST = {}
# Copy images
for OK_NG_dir_ in os.listdir(RAW_DATA_PATH):
    print(OK_NG_dir_)
    if OK_NG_dir_ in TRAIN_DIR:
        subset_name = "train_all"
    elif OK_NG_dir_ in VALID_DIR:
        subset_name = "val"
    elif OK_NG_dir_ in VAL_TRAIN_DEFECT:
        subset_name = "val_defect"
    elif OK_NG_dir_ in TEST_DIR:
        subset_name = "test"
    
    dir_path = os.path.join(RAW_DATA_PATH, OK_NG_dir_)
    for classes, regexp in CLASSES_DICT.items():
        res = [f for f in glob.glob(os.path.join(dir_path, regexp))]                
        for components in res:
            for img_name in glob.glob(os.path.join(components, "*.JPG")):
                ok_dir, component, filename = img_name.split("/")[-3:]
                rename_image = ok_dir + "_" + component + "_" + filename
                oripath = img_name.split("/")[:-1] + [rename_image]
                dst = "/".join([OUT_PATH, subset_name, classes, rename_image])
                shutil.copy2(img_name, dst)
                print(f"copy 2 dst ={dst}")
                TXT_LIST[subset_name] = TXT_LIST.get(subset_name, []) + [dst]
# Create csvfile
for subset_name, value in TXT_LIST.items():
    df = pd.DataFrame(columns=["filename"], data=TXT_LIST[subset_name])
    df.to_csv(f'{OUT_PATH}/{subset_name}.csv', header=False, index=False)

OK-20210115_0121
copy 2 dst =./dataset/Jet/test/R/OK-20210115_0121_R_1206_1_2_PR506-G01A002000351.JPG
copy 2 dst =./dataset/Jet/test/R/OK-20210115_0121_R_1206_1_1_PR210-JET-20210121085357.JPG
copy 2 dst =./dataset/Jet/test/R/OK-20210115_0121_R_1206_1_1_PR506-JET-20210121085357.JPG
copy 2 dst =./dataset/Jet/test/R/OK-20210115_0121_R_0603_1_2_R1077-G01A002000121.JPG
copy 2 dst =./dataset/Jet/test/R/OK-20210115_0121_R_0603_1_2_R1076-G01A002000403.JPG
copy 2 dst =./dataset/Jet/test/R/OK-20210115_0121_R_0603_1_1_R1078-JET-20210121085357.JPG
copy 2 dst =./dataset/Jet/test/R/OK-20210115_0121_R_0603_1_2_R1075-G01A002000005.JPG
copy 2 dst =./dataset/Jet/test/R/OK-20210115_0121_R_0603_1_2_R1076-G01A002000428.JPG
copy 2 dst =./dataset/Jet/test/R/OK-20210115_0121_R_0603_1_2_R1076-G01A002000397.JPG
copy 2 dst =./dataset/Jet/test/R/OK-20210115_0121_R_0603_1_2_R1078-G01A002500243.JPG
copy 2 dst =./dataset/Jet/test/R/OK-20210115_0121_R_0603_1_1_R1043-G01A002000493.JPG
copy 2 dst =./dataset/Jet/test/R/

copy 2 dst =./dataset/Jet/test/C/OK-20210115_0121_C_0603_1_1_PC10-G01A002000294.JPG
copy 2 dst =./dataset/Jet/test/C/OK-20210115_0121_C_0603_1_2_C193-G01A002000449.JPG
copy 2 dst =./dataset/Jet/test/C/OK-20210115_0121_C_0603_1_1_PC10-G01A002000143.JPG
copy 2 dst =./dataset/Jet/test/C/OK-20210115_0121_C_0603_1_2_PC15-G01A002000168.JPG
copy 2 dst =./dataset/Jet/test/C/OK-20210115_0121_C_0603_1_2_C404-G01A002500334.JPG
copy 2 dst =./dataset/Jet/test/C/OK-20210115_0121_C_0603_1_1_C390-G01A002500447.JPG
copy 2 dst =./dataset/Jet/test/C/OK-20210115_0121_C_0603_1_2_C390-G01A002000001.JPG
copy 2 dst =./dataset/Jet/test/C/OK-20210115_0121_C_0603_1_2_C390-G01A002000040.JPG
copy 2 dst =./dataset/Jet/test/C/OK-20210115_0121_C_0603_1_2_C390-G01A002000498.JPG
copy 2 dst =./dataset/Jet/test/C/OK-20210115_0121_C_0603_1_2_PC5-G01A002000029.JPG
copy 2 dst =./dataset/Jet/test/C/OK-20210115_0121_C_0603_1_2_C196-G01A002000354.JPG
copy 2 dst =./dataset/Jet/test/C/OK-20210115_0121_C_0603_1_2_C320-G01A0020000

copy 2 dst =./dataset/Jet/val_defect/C/NG-20201217_C528_F11A003100004_C528_Missing.JPG
copy 2 dst =./dataset/Jet/val_defect/C/NG-20201217_C72_F11A003100060_C72_Poor Solder.JPG
copy 2 dst =./dataset/Jet/val_defect/C/NG-20201217_C63_F11A003100234_C63_Wrong Part.JPG
copy 2 dst =./dataset/Jet/val_defect/C/NG-20201217_C312_F11A003100339_C312_Wrong Part.JPG
NG-20210115_0121
copy 2 dst =./dataset/Jet/test/R/NG-20210115_0121_R_0402_1_2_R852-G01A002000239.JPG
copy 2 dst =./dataset/Jet/test/R/NG-20210115_0121_R_0402_1_1_R259-G01A002500105.JPG
copy 2 dst =./dataset/Jet/test/R/NG-20210115_0121_R_0402_1_2_R12-G01A002500493.JPG
copy 2 dst =./dataset/Jet/test/R/NG-20210115_0121_R_0402_1_1_R7-1200017201160.JPG
copy 2 dst =./dataset/Jet/test/R/NG-20210115_0121_R_0402_1_2_R1025-G01A002000250.JPG
copy 2 dst =./dataset/Jet/test/R/NG-20210115_0121_R_0402_1_2_R1024-G01A002000250.JPG
copy 2 dst =./dataset/Jet/test/R/NG-20210115_0121_R_0402_1_2_R1169-G01A002000250.JPG
copy 2 dst =./dataset/Jet/test/R/NG-20210

copy 2 dst =./dataset/Jet/train_all/R/OK-20201217_R2-1_1082563201579_R2-1_Wrong Part.JPG
copy 2 dst =./dataset/Jet/train_all/R/OK-20201217_R2-1_1082563201271_R2-1_Wrong Part.JPG
copy 2 dst =./dataset/Jet/train_all/R/OK-20201217_R2-1_1082562802553_R2-1_Wrong Part.JPG
copy 2 dst =./dataset/Jet/train_all/R/OK-20201217_R2-1_1082563200207_R2-1_Wrong Part.JPG
copy 2 dst =./dataset/Jet/train_all/R/OK-20201217_R2-1_1082562802575_R2-1_Wrong Part.JPG
copy 2 dst =./dataset/Jet/train_all/R/OK-20201217_R2-1_1082563201920_R2-1_Wrong Part.JPG
copy 2 dst =./dataset/Jet/train_all/R/OK-20201217_R2-1_1082562800396_R2-1_Wrong Part.JPG
copy 2 dst =./dataset/Jet/train_all/R/OK-20201217_R2-1_1082563200429_R2-1_Wrong Part.JPG
copy 2 dst =./dataset/Jet/train_all/R/OK-20201217_R2-1_1082563202498_R2-1_Wrong Part.JPG
copy 2 dst =./dataset/Jet/train_all/R/OK-20201217_R2-1_1082563201037_R2-1_Wrong Part.JPG
copy 2 dst =./dataset/Jet/train_all/R/OK-20201217_R2-1_1082563202373_R2-1_Wrong Part.JPG
copy 2 dst =./dataset

copy 2 dst =./dataset/Jet/train_all/R/OK-20201217_R3-1_1082563202795_R3-1_Wrong Part.JPG
copy 2 dst =./dataset/Jet/train_all/R/OK-20201217_R3-1_1082563201529_R3-1_Wrong Part.JPG
copy 2 dst =./dataset/Jet/train_all/R/OK-20201217_R3-1_1082563202641_R3-1_Wrong Part.JPG
copy 2 dst =./dataset/Jet/train_all/R/OK-20201217_R3-1_1082563200061_R3-1_Wrong Part.JPG
copy 2 dst =./dataset/Jet/train_all/R/OK-20201217_R3-1_1082563201487_R3-1_Wrong Part.JPG
copy 2 dst =./dataset/Jet/train_all/R/OK-20201217_R3-1_1082562800465_R3-1_Wrong Part.JPG
copy 2 dst =./dataset/Jet/train_all/R/OK-20201217_R3-1_1082563201269_R3-1_Wrong Part.JPG
copy 2 dst =./dataset/Jet/train_all/R/OK-20201217_R3-1_1082563201301_R3-1_Wrong Part.JPG
copy 2 dst =./dataset/Jet/train_all/R/OK-20201217_R3-1_1082563200833_R3-1_Wrong Part.JPG
copy 2 dst =./dataset/Jet/train_all/R/OK-20201217_R3-1_1082563201726_R3-1_Wrong Part.JPG
copy 2 dst =./dataset/Jet/train_all/R/OK-20201217_R3-1_1082563201938_R3-1_Wrong Part.JPG
copy 2 dst =./dataset

copy 2 dst =./dataset/Jet/train_all/R/OK-20201217_R1067_F11A003100333_R1067_Bridge.JPG
copy 2 dst =./dataset/Jet/train_all/R/OK-20201217_R1067_F11A003100108_R1067_Bridge.JPG
copy 2 dst =./dataset/Jet/train_all/R/OK-20201217_R1067_F11A003100138_R1067_Bridge.JPG
copy 2 dst =./dataset/Jet/train_all/R/OK-20201217_R1067_F11A003100094_R1067_Missing.JPG
copy 2 dst =./dataset/Jet/train_all/R/OK-20201217_R1067_F11MM03600001_R1067_Bridge.JPG
copy 2 dst =./dataset/Jet/train_all/R/OK-20201217_R1067_F11A003100343_R1067_Poor Solder.JPG
copy 2 dst =./dataset/Jet/train_all/R/OK-20201217_R1067_F11MM03600039_R1067_Poor Solder.JPG
copy 2 dst =./dataset/Jet/train_all/R/OK-20201217_R1067_F11A003100090_R1067_Missing.JPG
copy 2 dst =./dataset/Jet/train_all/R/OK-20201217_R1067_F11A003100232_R1067_Bridge.JPG
copy 2 dst =./dataset/Jet/train_all/R/OK-20201217_R1067_F11A003100345_R1067_Missing.JPG
copy 2 dst =./dataset/Jet/train_all/R/OK-20201217_R1067_F11A003100291_R1067_Bridge.JPG
copy 2 dst =./dataset/Jet/trai

copy 2 dst =./dataset/Jet/val/R/OK-20210108_R104_F12T002300003_R104_Skew.JPG
copy 2 dst =./dataset/Jet/val/R/OK-20210108_R405_1083028500123_R405_Skew.JPG
copy 2 dst =./dataset/Jet/val/R/OK-20210108_R179_1083054400137_R179_Missing.JPG
copy 2 dst =./dataset/Jet/val/R/OK-20210108_R179_1083054400088_R179_Missing.JPG
copy 2 dst =./dataset/Jet/val/R/OK-20210108_R179_1083054400035_R179_Missing.JPG
copy 2 dst =./dataset/Jet/val/R/OK-20210108_R179_1083054400049_R179_Missing.JPG
copy 2 dst =./dataset/Jet/val/R/OK-20210108_R179_1083054400140_R179_Missing.JPG
copy 2 dst =./dataset/Jet/val/R/OK-20210108_R179_1083054400143_R179_Missing.JPG
copy 2 dst =./dataset/Jet/val/R/OK-20210108_R179_1083054400155_R179_Missing.JPG
copy 2 dst =./dataset/Jet/val/R/OK-20210108_R179_1083054400142_R179_Missing.JPG
copy 2 dst =./dataset/Jet/val/R/OK-20210108_R179_1083054400103_R179_Missing.JPG
copy 2 dst =./dataset/Jet/val/R/OK-20210108_R179_1083054400006_R179_Missing.JPG
copy 2 dst =./dataset/Jet/val/R/OK-20210108_R1

copy 2 dst =./dataset/Jet/val/C/OK-20210108_C34_1083054400076_C34_Bridge.JPG
copy 2 dst =./dataset/Jet/val/C/OK-20210108_C34_1083054400113_C34_Poor Solder.JPG
copy 2 dst =./dataset/Jet/val/C/OK-20210108_C34_1083109500094_C34_Skew.JPG
copy 2 dst =./dataset/Jet/val/C/OK-20210108_C34_1083054400155_C34_Poor Solder.JPG
copy 2 dst =./dataset/Jet/val/C/OK-20210108_C34_1083054400163_C34_Poor Solder.JPG
copy 2 dst =./dataset/Jet/val/C/OK-20210108_C34_1083054400165_C34_Poor Solder.JPG
copy 2 dst =./dataset/Jet/val/C/OK-20210108_C103_1083054400059_C103_Wrong Part.JPG
copy 2 dst =./dataset/Jet/val/C/OK-20210108_C103_G01A000200031_C103_Skew.JPG
copy 2 dst =./dataset/Jet/val/C/OK-20210108_C103_G01A000200210_C103_Skew.JPG
copy 2 dst =./dataset/Jet/val/C/OK-20210108_C103_G01A000200035_C103_Skew.JPG
copy 2 dst =./dataset/Jet/val/C/OK-20210108_C103_1083054400121_C103_Poor Solder.JPG
copy 2 dst =./dataset/Jet/val/C/OK-20210108_C103_G01A000200161_C103_Skew.JPG
copy 2 dst =./dataset/Jet/val/C/OK-20210108_C

copy 2 dst =./dataset/Jet/val/C/OK-20210108_C347_1083054400143_C347_Skew.JPG
copy 2 dst =./dataset/Jet/val/C/OK-20210108_C926_1100954900011_C926_Poor Solder.JPG
copy 2 dst =./dataset/Jet/val/C/OK-20210108_C91_1083054600311_C91_Skew.JPG
copy 2 dst =./dataset/Jet/val/C/OK-20210108_C206_1083109600194_C206_Skew.JPG
copy 2 dst =./dataset/Jet/val/C/OK-20210108_C206_1083109500099_C206_Skew.JPG
copy 2 dst =./dataset/Jet/val/C/OK-20210108_C206_1083109600055_C206_Skew.JPG
copy 2 dst =./dataset/Jet/val/C/OK-20210108_C206_1083054400041_C206_Poor Solder.JPG
copy 2 dst =./dataset/Jet/val/C/OK-20210108_C206_1083054400059_C206_Poor Solder.JPG
copy 2 dst =./dataset/Jet/val/C/OK-20210108_C206_1083109600163_C206_Skew.JPG
copy 2 dst =./dataset/Jet/val/C/OK-20210108_C206_1083109600173_C206_Skew.JPG
copy 2 dst =./dataset/Jet/val/C/OK-20210108_C206_1083109600202_C206_Skew.JPG
copy 2 dst =./dataset/Jet/val/C/OK-20210108_C143_1100954900016_C143_Poor Solder.JPG
copy 2 dst =./dataset/Jet/val/C/OK-20210108_C143_1

In [None]:
# hide
# for dirPath, dirNames, fileNames in os.walk(root_path):
#     print(dirNames)
#     for f in fileNames:
#         print os.path.join(dirPath, f)