In [None]:
from pathlib import Path
import glob
import pandas as pd
import numpy as np
import csv
from math import floor
import tensorflow as tf
from collections import namedtuple
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.utils import Sequence

## Files path

In [None]:
# Path related configuration
DATASET_BASE_DIR = Path("/home/italolanza/workspace/TG/dataset")

NORMAL_FILES = glob.glob(str(DATASET_BASE_DIR) + '/normal/*.csv')

HOR_MISALIGNMENT_LOW_FILES = glob.glob(str(DATASET_BASE_DIR) + '/horizontal/0.5mm/*.csv')

HOR_MISALIGNMENT_MEDIUM_FILES = glob.glob(str(DATASET_BASE_DIR) + '/horizontal/1.0mm/*.csv') \
                                + glob.glob(str(DATASET_BASE_DIR) + '/horizontal/1.5mm/*.csv')

HOR_MISALIGNMENT_HIGH_FILES = glob.glob(str(DATASET_BASE_DIR) + '/horizontal/2.0mm/*.csv')

VER_MISALIGNMENT_LOW_FILES = glob.glob(str(DATASET_BASE_DIR) + '/vertical/0.51mm/*.csv') \
                                + glob.glob(str(DATASET_BASE_DIR) + '/vertical/0.63mm/*.csv')

VER_MISALIGNMENT_MEDIUM_FILES = glob.glob(str(DATASET_BASE_DIR) + '/vertical/1.27mm/*.csv') \
                                + glob.glob(str(DATASET_BASE_DIR) + '/vertical/1.40mm/*.csv')

VER_MISALIGNMENT_HIGH_FILES = glob.glob(str(DATASET_BASE_DIR) + '/vertical/1.78mm/*.csv') \
                                + glob.glob(str(DATASET_BASE_DIR) + '/vertical/1.90mm/*.csv')

IMBALANCE_LOW_FILES = glob.glob(str(DATASET_BASE_DIR) + '/imbalance/6g/*.csv') \
                        + glob.glob(str(DATASET_BASE_DIR) + '/imbalance/10g/*.csv')

IMBALANCE_MEDIUM_FILES = glob.glob(str(DATASET_BASE_DIR) + '/imbalance/15g/*.csv') \
                        + glob.glob(str(DATASET_BASE_DIR) + '/imbalance/20g/*.csv') \
                        + glob.glob(str(DATASET_BASE_DIR) + '/imbalance/25g/*.csv')

IMBALANCE_HIGH_FILES = glob.glob(str(DATASET_BASE_DIR) + '/imbalance/30g/*.csv') \
                        + glob.glob(str(DATASET_BASE_DIR) + '/imbalance/35g/*.csv')

OUTPUT_DATA_DIR = DATASET_BASE_DIR.joinpath("output")

## Creating dataset

In [None]:
# Normal data
def process_normal_data(suffle_data=True, test_size=0.3):
    
    if (Path.exists(OUTPUT_DATA_DIR.joinpath("normal_data.csv"))):
        return
 
    
    OUTPUT_DATA_DIR.mkdir(parents=True, exist_ok=True)
    # Y values appedend to the list
    NORMAL_DATA_OUTPUT = [0, 0.0]

    total_lines = 0

    for file_name in NORMAL_FILES:

        data_list = list()

        with open(file_name, 'r') as data_file:
            data_iter = csv.reader(data_file, delimiter=",")            
            for data in data_iter:
                data.extend(NORMAL_DATA_OUTPUT)
                data_list.append(data)

        if (suffle_data):
            data_list.sort()
        
        with open(OUTPUT_DATA_DIR.joinpath("normal_data_treinamento.csv"), 'a') as training_file, \
             open(OUTPUT_DATA_DIR.joinpath("normal_data_validacao.csv"), 'a') as test_file:
            
            data_size = len(data_list)
            test_index = floor(data_size * test_size)
            total_lines += data_size 
           
            test_writer = csv.writer(test_file)
            training_writer = csv.writer(training_file)

            test_writer.writerows(data_list[:test_index])
            training_writer.writerows(data_list[test_index:])
    

    print(f"Dataset size: {total_lines} lines")
    print(f"Test dataset size {floor(total_lines * test_size)} lines")
    print(f"Training dataset size {total_lines - floor(total_lines * test_size)} lines")   

In [None]:
# Imbalance data
def process_imbalance_data(suffle_data=True, test_size=0.3):
    
    #IMBALANCE_OUTPUT_FILES = ["imbalance_low_data.csv", "imbalance_medium_data.csv", "imbalance_high_data.csv"]


    # if (Path.exists(OUTPUT_DATA_DIR.joinpath("imbalance_data.csv"))):
    #     return
 

    OUTPUT_DATA_DIR.mkdir(parents=True, exist_ok=True)
    # Y values appedend to the list
    LOW_IMBALANCE_OUTPUT = [1, 1.0]
    MEDIUM_IMBALANCE_OUTPUT = [1, 2.0]
    HIGH_IMBALANCE_OUTPUT = [1, 3.0]

    total_lines = 0

    # Low criticality (6g, 10g)
    if not ( Path.exists(OUTPUT_DATA_DIR.joinpath("imbalance_low_data.csv")) ):
        for file_name in IMBALANCE_LOW_FILES:

            data_list = list()

            with open(file_name, 'r') as data_file:
                data_iter = csv.reader(data_file, delimiter=",")
                for data in data_iter:
                    data.extend(LOW_IMBALANCE_OUTPUT)
                    data_list.append(data)
            
            if (suffle_data):
                data_list.sort()

            with open(OUTPUT_DATA_DIR.joinpath("imbalance_low_data_treinamento.csv"), 'a') as training_file, \
                 open(OUTPUT_DATA_DIR.joinpath("imbalance_low_data_validacao.csv"), 'a') as test_file:
            
                data_size = len(data_list)
                test_index = floor(data_size * test_size)
                total_lines += data_size 
            
                test_writer = csv.writer(test_file)
                training_writer = csv.writer(training_file)

                test_writer.writerows(data_list[:test_index])
                training_writer.writerows(data_list[test_index:])                
    

    print("Low data")
    print("######################################")
    print(f"Dataset size: {total_lines} lines")
    print(f"Test dataset size {floor(total_lines * test_size)} lines")
    print(f"Training dataset size {total_lines - floor(total_lines * test_size)} lines\n")

    total_lines = 0

    # Medium criticality (15g, 20g, 25g)
    if not ( Path.exists(OUTPUT_DATA_DIR.joinpath("imbalance_medium_data.csv")) ):
        for file_name in IMBALANCE_MEDIUM_FILES:

            data_list = list()

            with open(file_name, 'r') as data_file:
                data_iter = csv.reader(data_file, delimiter=",")                
                for data in data_iter:
                    data.extend(MEDIUM_IMBALANCE_OUTPUT)
                    data_list.append(data)
            
            if (suffle_data):
                data_list.sort()

            with open(OUTPUT_DATA_DIR.joinpath("imbalance_medium_data_treinamento.csv"), 'a') as training_file, \
                 open(OUTPUT_DATA_DIR.joinpath("imbalance_medium_data_validacao.csv"), 'a') as test_file:
            
                data_size = len(data_list)
                test_index = floor(data_size * test_size)
                total_lines += data_size 
            
                test_writer = csv.writer(test_file)
                training_writer = csv.writer(training_file)

                test_writer.writerows(data_list[:test_index])
                training_writer.writerows(data_list[test_index:])

    
    print("Medium data")
    print("######################################")
    print(f"Dataset size: {total_lines} lines")
    print(f"Test dataset size {floor(total_lines * test_size)} lines")
    print(f"Training dataset size {total_lines - floor(total_lines * test_size)} lines\n")

    total_lines = 0

    # High criticality (30g, 35g)
    if not ( Path.exists(OUTPUT_DATA_DIR.joinpath("imbalance_high_data.csv")) ):
        for file_name in IMBALANCE_HIGH_FILES:

            data_list = list()

            with open(file_name, 'r') as data_file:
                data_iter = csv.reader(data_file, delimiter=",")                
                for data in data_iter:
                    data.extend(HIGH_IMBALANCE_OUTPUT)
                    data_list.append(data)

            if (suffle_data):
                data_list.sort()

            with open(OUTPUT_DATA_DIR.joinpath("imbalance_high_data_treinamento.csv"), 'a') as training_file, \
                 open(OUTPUT_DATA_DIR.joinpath("imbalance_high_data_validacao.csv"), 'a') as test_file:
            
                data_size = len(data_list)
                test_index = floor(data_size * test_size)
                total_lines += data_size 
            
                test_writer = csv.writer(test_file)
                training_writer = csv.writer(training_file)

                test_writer.writerows(data_list[:test_index])
                training_writer.writerows(data_list[test_index:])
    
    print("High data")
    print("######################################")
    print(f"Dataset size: {total_lines} lines")
    print(f"Test dataset size {floor(total_lines * test_size)} lines")
    print(f"Training dataset size {total_lines - floor(total_lines * test_size)} lines\n")

In [None]:
# Horizontal misalignment
def process_hor_misalignment_data(suffle_data=True, test_size=0.3):
    
    # HOR_MISLAGNMENT_OUTPUT_FILES=["hor_misalignment_low_data.csv", "hor_misalignment_medium_data.csv", "hor_misalignment_high_data.csv"]


    # if (Path.exists( OUTPUT_DATA_DIR.joinpath("hor_misalignment_data.csv")) ):
    #     return

 

    OUTPUT_DATA_DIR.mkdir(parents=True, exist_ok=True)
    # Y values appedend to the list
    LOW_HOR_MISALIGNMENT_OUTPUT = [2, 1.0]
    MEDIUM_HOR_MISALIGNMENT_OUTPUT = [2, 2.0]
    HIGH_HOR_MISALIGNMENT_OUTPUT = [2, 3.0]

    total_lines = 0

    # Low criticality (6g, 10g)
    if not ( Path.exists(OUTPUT_DATA_DIR.joinpath("hor_misalignment_low_data.csv")) ):
        for file_name in IMBALANCE_LOW_FILES:

            data_list = list()

            with open(file_name, 'r') as data_file:
                data_iter = csv.reader(data_file, delimiter=",")
                for data in data_iter:
                    data.extend(LOW_HOR_MISALIGNMENT_OUTPUT)
                    data_list.append(data)
            
            if (suffle_data):
                data_list.sort()

            with open(OUTPUT_DATA_DIR.joinpath("hor_misalignment_low_data_treinamento.csv"), 'a') as training_file, \
                 open(OUTPUT_DATA_DIR.joinpath("hor_misalignment_low_data_validacao.csv"), 'a') as test_file:
            
                data_size = len(data_list)
                test_index = floor(data_size * test_size)
                total_lines += data_size 
            
                test_writer = csv.writer(test_file)
                training_writer = csv.writer(training_file)

                test_writer.writerows(data_list[:test_index])
                training_writer.writerows(data_list[test_index:])
    
    print("Low data")
    print("######################################")
    print(f"Dataset size: {total_lines} lines")
    print(f"Test dataset size {floor(total_lines * test_size)} lines")
    print(f"Training dataset size {total_lines - floor(total_lines * test_size)} lines\n")
    
    total_lines = 0

    # Medium criticality (15g, 20g, 25g)
    if not ( Path.exists(OUTPUT_DATA_DIR.joinpath("hor_misalignment_medium_data.csv")) ):
        for file_name in IMBALANCE_MEDIUM_FILES:

            data_list = list()

            with open(file_name, 'r') as data_file:
                data_iter = csv.reader(data_file, delimiter=",")                
                for data in data_iter:
                    data.extend(MEDIUM_HOR_MISALIGNMENT_OUTPUT)
                    data_list.append(data)
            
            if (suffle_data):
                data_list.sort()

            with open(OUTPUT_DATA_DIR.joinpath("hor_misalignment_medium_data_treinamento.csv"), 'a') as training_file, \
                 open(OUTPUT_DATA_DIR.joinpath("hor_misalignment_medium_data_validacao.csv"), 'a') as test_file:
            
                data_size = len(data_list)
                test_index = floor(data_size * test_size)
                total_lines += data_size 
            
                test_writer = csv.writer(test_file)
                training_writer = csv.writer(training_file)

                test_writer.writerows(data_list[:test_index])
                training_writer.writerows(data_list[test_index:])
    
    print("Medium data")
    print("######################################")
    print(f"Dataset size: {total_lines} lines")
    print(f"Test dataset size {floor(total_lines * test_size)} lines")
    print(f"Training dataset size {total_lines - floor(total_lines * test_size)} lines\n")
    
    total_lines = 0

    # High criticality (30g, 35g)
    if not ( Path.exists(OUTPUT_DATA_DIR.joinpath("hor_misalignment_high_data.csv")) ):
        for file_name in IMBALANCE_HIGH_FILES:

            data_list = list()

            with open(file_name, 'r') as data_file:
                data_iter = csv.reader(data_file, delimiter=",")                
                for data in data_iter:
                    data.extend(HIGH_HOR_MISALIGNMENT_OUTPUT)
                    data_list.append(data)    

            if (suffle_data):
                data_list.sort()
            
            with open(OUTPUT_DATA_DIR.joinpath("hor_misalignment_high_data_treinamento.csv"), 'a') as training_file, \
                 open(OUTPUT_DATA_DIR.joinpath("hor_misalignment_high_data_validacao.csv"), 'a') as test_file:
            
                data_size = len(data_list)
                test_index = floor(data_size * test_size)
                total_lines += data_size 
            
                test_writer = csv.writer(test_file)
                training_writer = csv.writer(training_file)

                test_writer.writerows(data_list[:test_index])
                training_writer.writerows(data_list[test_index:])
    
    print("High data")
    print("######################################")
    print(f"Dataset size: {total_lines} lines")
    print(f"Test dataset size {floor(total_lines * test_size)} lines")
    print(f"Training dataset size {total_lines - floor(total_lines * test_size)} lines\n")
    
    total_lines = 0

In [None]:
# Vertical misalignment
def process_ver_misalignment_data(suffle_data=True, test_size=0.3):
    
    # VER_MISLAGNMENT_OUTPUT_FILES=["ver_misalignment_low_data.csv", "ver_misalignment_medium_data.csv", "ver_misalignment_high_data.csv"]


    # if (Path.exists( OUTPUT_DATA_DIR.joinpath("ver_misalignment_data.csv")) ):
    #     return

 

    OUTPUT_DATA_DIR.mkdir(parents=True, exist_ok=True)
    # Y values appedend to the list
    LOW_VER_MISALIGNMENT_OUTPUT = [3, 1.0]
    MEDIUM_VER_MISALIGNMENT_OUTPUT = [3, 2.0]
    HIGH_VER_MISALIGNMENT_OUTPUT = [3, 3.0]

    total_lines = 0

    # Low criticality (6g, 10g)
    if not ( Path.exists(OUTPUT_DATA_DIR.joinpath("ver_misalignment_low_data.csv")) ):
        for file_name in IMBALANCE_LOW_FILES:

            data_list = list()

            with open(file_name, 'r') as data_file:
                data_iter = csv.reader(data_file, delimiter=",")
                for data in data_iter:
                    data.extend(LOW_VER_MISALIGNMENT_OUTPUT)
                    data_list.append(data)
            
            if (suffle_data):
                data_list.sort()               

            with open(OUTPUT_DATA_DIR.joinpath("ver_misalignment_low_data_treinamento.csv"), 'a') as training_file, \
                 open(OUTPUT_DATA_DIR.joinpath("ver_misalignment_low_data_validacao.csv"), 'a') as test_file:
            
                data_size = len(data_list)
                test_index = floor(data_size * test_size)
                total_lines += data_size 
            
                test_writer = csv.writer(test_file)
                training_writer = csv.writer(training_file)

                test_writer.writerows(data_list[:test_index])
                training_writer.writerows(data_list[test_index:])
    
    print("Low data")
    print("######################################")
    print(f"Dataset size: {total_lines} lines")
    print(f"Test dataset size {floor(total_lines * test_size)} lines")
    print(f"Training dataset size {total_lines - floor(total_lines * test_size)} lines\n")
    
    total_lines = 0

    # Medium criticality (15g, 20g, 25g)
    if not ( Path.exists(OUTPUT_DATA_DIR.joinpath("ver_misalignment_medium_data.csv")) ):
        for file_name in IMBALANCE_MEDIUM_FILES:

            data_list = list()

            with open(file_name, 'r') as data_file:
                data_iter = csv.reader(data_file, delimiter=",")                
                for data in data_iter:
                    data.extend(MEDIUM_VER_MISALIGNMENT_OUTPUT)
                    data_list.append(data)

            if (suffle_data):
                data_list.sort()

            with open(OUTPUT_DATA_DIR.joinpath("ver_misalignment_medium_data_treinamento.csv"), 'a') as training_file, \
                 open(OUTPUT_DATA_DIR.joinpath("ver_misalignment_medium_data_validacao.csv"), 'a') as test_file:
            
                data_size = len(data_list)
                test_index = floor(data_size * test_size)
                total_lines += data_size 
            
                test_writer = csv.writer(test_file)
                training_writer = csv.writer(training_file)

                test_writer.writerows(data_list[:test_index])
                training_writer.writerows(data_list[test_index:])
    
    print("Medium data")
    print("######################################")
    print(f"Dataset size: {total_lines} lines")
    print(f"Test dataset size {floor(total_lines * test_size)} lines")
    print(f"Training dataset size {total_lines - floor(total_lines * test_size)} lines\n")
    
    total_lines = 0

    # High criticality (30g, 35g)
    if not ( Path.exists(OUTPUT_DATA_DIR.joinpath("ver_misalignment_high_data.csv")) ):
        for file_name in IMBALANCE_HIGH_FILES:

            data_list = list()

            with open(file_name, 'r') as data_file:
                data_iter = csv.reader(data_file, delimiter=",")                
                for data in data_iter:
                    data.extend(HIGH_VER_MISALIGNMENT_OUTPUT)
                    data_list.append(data)    

            if (suffle_data):
                data_list.sort()
            
            with open(OUTPUT_DATA_DIR.joinpath("ver_misalignment_high_data_treinamento.csv"), 'a') as training_file, \
                 open(OUTPUT_DATA_DIR.joinpath("ver_misalignment_high_data_validacao.csv"), 'a') as test_file:
            
                data_size = len(data_list)
                test_index = floor(data_size * test_size)
                total_lines += data_size 
            
                test_writer = csv.writer(test_file)
                training_writer = csv.writer(training_file)

                test_writer.writerows(data_list[:test_index])
                training_writer.writerows(data_list[test_index:])
    
    print("High data")
    print("######################################")
    print(f"Dataset size: {total_lines} lines")
    print(f"Test dataset size {floor(total_lines * test_size)} lines")
    print(f"Training dataset size {total_lines - floor(total_lines * test_size)} lines\n")
    
    total_lines = 0

In [None]:
# process normal data
#process_normal_data()

In [None]:
#process imbalance data
#process_imbalance_data()

In [None]:
#process horizontal misalignment data
#process_hor_misalignment_data()

In [None]:
#process vertical  misalignment data
#process_ver_misalignment_data()

## Loading and processing data

In [None]:
# Func to get the normal data
def get_normal_data(chunk_size):
    if (Path.exists(OUTPUT_DATA_DIR.joinpath("normal_data.csv"))):
        df = pd.read_csv(OUTPUT_DATA_DIR.joinpath("normal_data.csv"),chunksize=chunk_size )
        return df


# Func to get the imbalance data
def get_imbalance_data(chunk_size):

    IMBALANCE_OUTPUT_FILES = ["imbalance_low_data.csv", "imbalance_medium_data.csv", "imbalance_high_data.csv"]
    ImbalanceData = namedtuple("ImbalanceData", "low_imb_data med_imb_data high_imb_data")
    
    data = list()


    if Path.exists(OUTPUT_DATA_DIR.joinpath("imbalance_low_data.csv")) \
        and Path.exists(OUTPUT_DATA_DIR.joinpath("imbalance_medium_data.csv")) \
        and Path.exists(OUTPUT_DATA_DIR.joinpath("imbalance_high_data.csv")):

        for data_type in IMBALANCE_OUTPUT_FILES:
            data.append(pd.read_csv(OUTPUT_DATA_DIR.joinpath(data_type),chunksize=chunk_size ))

    return ImbalanceData(data[0],data[1],data[2])


# Func to get the horizontal misalignment data
def get_horizontal_misalignment_data(chunk_size):

    HOR_MISALIGNMENT_OUTPUT_FILES = ["hor_misalignment_low_data.csv", "hor_misalignment_medium_data.csv", "hor_misalignment_high_data.csv"]
    HorMisalignmentData = namedtuple("HorMisalignmentData", ["low_hor_mis_data", "med_hor_mis_data", "high_hor_mis_data"])
    
    data = list()


    if Path.exists(OUTPUT_DATA_DIR.joinpath("hor_misalignment_low_data.csv")) \
        and Path.exists(OUTPUT_DATA_DIR.joinpath("hor_misalignment_medium_data.csv")) \
        and Path.exists(OUTPUT_DATA_DIR.joinpath("hor_misalignment_high_data.csv")):

        for data_type in HOR_MISALIGNMENT_OUTPUT_FILES:
            data.append(pd.read_csv(OUTPUT_DATA_DIR.joinpath(data_type),chunksize=chunk_size ))

    return HorMisalignmentData(data[0],data[1],data[2])


# Func to get the vertical misalignment data
def get_vertical_misalignment_data(chunk_size):

    VER_MISALIGNMENT_OUTPUT_FILES = ["ver_misalignment_low_data.csv", "ver_misalignment_medium_data.csv", "ver_misalignment_high_data.csv"]
    VerMisalignmentData = namedtuple("VerMisalignmentData", ["low_ver_mis_data", "med_ver_mis_data", "high_ver_mis_data"])
    
    data = list()


    if Path.exists(OUTPUT_DATA_DIR.joinpath("ver_misalignment_low_data.csv")) \
        and Path.exists(OUTPUT_DATA_DIR.joinpath("ver_misalignment_medium_data.csv")) \
        and Path.exists(OUTPUT_DATA_DIR.joinpath("ver_misalignment_high_data.csv")):

        for data_type in VER_MISALIGNMENT_OUTPUT_FILES:
            data.append(pd.read_csv(OUTPUT_DATA_DIR.joinpath(data_type),chunksize=chunk_size ))

    return VerMisalignmentData(data[0],data[1],data[2])

In [None]:
class DataGenerator(Sequence):
    """
    """

    MAX_DATASET_SIZE_TRAINING = 8575000 # The number of lines of the training set of the Normal data (the smaller)
    MAX_DATASET_SIZE_VALIDATION = 3675000 # The number of lines of the validation set of the Normal data (the smaller)

    def __init__(self, batch_size=8575, is_validation=False):
        self.batch_size = batch_size
        self.is_validation = is_validation

    
    def __len__(self):
        # Returns the number of batches in the sequence
        if not self.is_validation:
            return int(floor(DataGenerator.MAX_DATASET_SIZE_TRAINING / self.batch_size))
        else:
            return int(floor(DataGenerator.MAX_DATASET_SIZE_VALIDATION / self.batch_size))
            

    def __getitem__(self, index):
        # Returns the next batch of values based on the index
        name_modifier = ""
        data = pd.DataFrame()
        

        if self.is_validation:
            name_modifier = "_validacao"

        else:
            name_modifier = "_treinamento"

           
        normal_data = pd.read_csv(OUTPUT_DATA_DIR.joinpath(f"normal_data{name_modifier}.csv"),chunksize=self.batch_size, header=None, skiprows=(index*self.batch_size) )
        imbalance_low_data = pd.read_csv(OUTPUT_DATA_DIR.joinpath(f"imbalance_low_data{name_modifier}.csv"),chunksize=self.batch_size, header=None, skiprows=(index*self.batch_size))
        imbalance_medium_data = pd.read_csv(OUTPUT_DATA_DIR.joinpath(f"imbalance_medium_data{name_modifier}.csv"), chunksize=self.batch_size, header=None, skiprows=(index*self.batch_size) )
        imbalance_high_data = pd.read_csv(OUTPUT_DATA_DIR.joinpath(f"imbalance_high_data{name_modifier}.csv"),chunksize=self.batch_size, header=None, skiprows=(index*self.batch_size) )
        hor_misalignment_low_data = pd.read_csv(OUTPUT_DATA_DIR.joinpath(f"hor_misalignment_low_data{name_modifier}.csv"),chunksize=self.batch_size, header=None, skiprows=(index*self.batch_size) )
        hor_misalignment_medium_data = pd.read_csv(OUTPUT_DATA_DIR.joinpath(f"hor_misalignment_medium_data{name_modifier}.csv"),chunksize=self.batch_size, header=None, skiprows=(index*self.batch_size) )
        hor_misalignment_high_data = pd.read_csv(OUTPUT_DATA_DIR.joinpath(f"hor_misalignment_high_data{name_modifier}.csv"),chunksize=self.batch_size, header=None, skiprows=(index*self.batch_size) )
        ver_misalignment_low_data = pd.read_csv(OUTPUT_DATA_DIR.joinpath(f"ver_misalignment_low_data{name_modifier}.csv"),chunksize=self.batch_size, header=None, skiprows=(index*self.batch_size) )
        ver_misalignment_medium_data = pd.read_csv(OUTPUT_DATA_DIR.joinpath(f"ver_misalignment_medium_data{name_modifier}.csv"),chunksize=self.batch_size, header=None, skiprows=(index*self.batch_size) )
        ver_misalignment_high_data = pd.read_csv(OUTPUT_DATA_DIR.joinpath(f"ver_misalignment_high_data{name_modifier}.csv"),chunksize=self.batch_size, header=None, skiprows=(index*self.batch_size) )


        data = data.append(normal_data.get_chunk(self.batch_size), ignore_index=True)
        data = data.append(imbalance_low_data.get_chunk(self.batch_size), ignore_index=True)
        data = data.append(imbalance_medium_data.get_chunk(self.batch_size), ignore_index=True)
        data = data.append(imbalance_high_data.get_chunk(self.batch_size), ignore_index=True)
        data = data.append(hor_misalignment_low_data.get_chunk(self.batch_size), ignore_index=True)
        data = data.append(hor_misalignment_medium_data.get_chunk(self.batch_size), ignore_index=True)
        data = data.append(hor_misalignment_high_data.get_chunk(self.batch_size), ignore_index=True)
        data = data.append(ver_misalignment_low_data.get_chunk(self.batch_size), ignore_index=True)
        data = data.append(ver_misalignment_medium_data.get_chunk(self.batch_size), ignore_index=True)
        data = data.append(ver_misalignment_high_data.get_chunk(self.batch_size), ignore_index=True)


        data_x = data.iloc[:,0:8].to_numpy()
        data_y = data.iloc[:,8:10].to_numpy()

        return (data_x, data_y)

In [None]:
generator = DataGenerator()