In [1]:
from pathlib import Path
import glob
import pandas as pd
import numpy as np
import csv

## Files path

In [2]:
# Path related configuration
DATASET_BASE_DIR = Path("/home/italolanza/workspace/TG/dataset")

NORMAL_FILES = glob.glob(str(DATASET_BASE_DIR) + '/normal/*.csv')

HOR_MISALIGNMENT_LOW_FILES = glob.glob(str(DATASET_BASE_DIR) + '/horizontal/0.5mm/*.csv')

HOR_MISALIGNMENT_MEDIUM_FILES = glob.glob(str(DATASET_BASE_DIR) + '/horizontal/1.0mm/*.csv') \
                                + glob.glob(str(DATASET_BASE_DIR) + '/horizontal/1.5mm/*.csv')

HOR_MISALIGNMENT_HIGH_FILES = glob.glob(str(DATASET_BASE_DIR) + '/horizontal/2.0mm/*.csv')

VER_MISALIGNMENT_LOW_FILES = glob.glob(str(DATASET_BASE_DIR) + '/vertical/0.51mm/*.csv') \
                                + glob.glob(str(DATASET_BASE_DIR) + '/vertical/0.63mm/*.csv')

VER_MISALIGNMENT_MEDIUM_FILES = glob.glob(str(DATASET_BASE_DIR) + '/vertical/1.27mm/*.csv') \
                                + glob.glob(str(DATASET_BASE_DIR) + '/vertical/1.40mm/*.csv')

VER_MISALIGNMENT_HIGH_FILES = glob.glob(str(DATASET_BASE_DIR) + '/vertical/1.78mm/*.csv') \
                                + glob.glob(str(DATASET_BASE_DIR) + '/vertical/1.90mm/*.csv')

IMBALANCE_LOW_FILES = glob.glob(str(DATASET_BASE_DIR) + '/imbalance/6g/*.csv') \
                        + glob.glob(str(DATASET_BASE_DIR) + '/imbalance/10g/*.csv')

IMBALANCE_MEDIUM_FILES = glob.glob(str(DATASET_BASE_DIR) + '/imbalance/15g/*.csv') \
                        + glob.glob(str(DATASET_BASE_DIR) + '/imbalance/20g/*.csv') \
                        + glob.glob(str(DATASET_BASE_DIR) + '/imbalance/25g/*.csv')

IMBALANCE_HIGH_FILES = glob.glob(str(DATASET_BASE_DIR) + '/imbalance/30g/*.csv') \
                        + glob.glob(str(DATASET_BASE_DIR) + '/imbalance/35g/*.csv')

OUTPUT_DATA_DIR = DATASET_BASE_DIR.joinpath("output")

## Loading data

In [3]:
# Normal data
def get_normal_data(chunk_size: int) -> pd.DataFrame:
    
    if (Path.exists(OUTPUT_DATA_DIR.joinpath("normal_data.csv"))):
        df = pd.read_csv(OUTPUT_DATA_DIR.joinpath("normal_data.csv"),chunksize=chunk_size)
        return df
 

    OUTPUT_DATA_DIR.mkdir(parents=True, exist_ok=True)

    for file_name in NORMAL_FILES:

        data_list = list()

        with open(file_name, 'r') as data_file:
            data_iter = csv.reader(data_file, delimiter=",")            
            for data in data_iter:
                data.extend([0, 0.0])
                data_list.append(data)

        
        with open(OUTPUT_DATA_DIR.joinpath("normal_data.csv"), 'a') as output_file:
            writer = csv.writer(output_file)
            writer.writerows(data_list)
    
    return pd.read_csv(OUTPUT_DATA_DIR.joinpath("normal_data.csv"), chunksize=chunk_size, header=None)

In [4]:
# Imbalance data
def process_imbalance_data():
    
    # IMBALANCE_OUTPUT_FILES=["imbalance_low_data.csv", "imbalance_medium_data.csv", "imbalance_high_data.csv"]


    # if (Path.exists( OUTPUT_DATA_DIR.joinpath("imbalance_data.csv")) ):
    #     df = pd.read_csv( OUTPUT_DATA_DIR.joinpath("imbalance_data.csv"),chunksize=chunk_size )
    #     return df
 

    OUTPUT_DATA_DIR.mkdir(parents=True, exist_ok=True)

    # Low criticality (6g, 10g)
    if not ( Path.exists(OUTPUT_DATA_DIR.joinpath("imbalance_low_data.csv")) ):
        for file_name in IMBALANCE_LOW_FILES:

            data_list = list()

            with open(file_name, 'r') as data_file:
                data_iter = csv.reader(data_file, delimiter=",")
                for data in data_iter:
                    data.extend([1, 1.0])
                    data_list.append(data)
            
            with open(OUTPUT_DATA_DIR.joinpath("imbalance_low_data.csv"), 'a') as output_file:
                writer = csv.writer(output_file)
                writer.writerows(data_list)
    
    # Medium criticality (15g, 20g, 25g)
    if not ( Path.exists(OUTPUT_DATA_DIR.joinpath("imbalance_medium_data.csv")) ):
        for file_name in IMBALANCE_MEDIUM_FILES:

            data_list = list()

            with open(file_name, 'r') as data_file:
                data_iter = csv.reader(data_file, delimiter=",")                
                for data in data_iter:
                    data.extend([1, 2.0])
                    data_list.append(data)
            
            with open(OUTPUT_DATA_DIR.joinpath("imbalance_medium_data.csv"), 'a') as output_file:
                writer = csv.writer(output_file)
                writer.writerows(data_list)
    
    # High criticality (30g, 35g)
    if not ( Path.exists(OUTPUT_DATA_DIR.joinpath("imbalance_high_data.csv")) ):
        for file_name in IMBALANCE_HIGH_FILES:

            data_list = list()

            with open(file_name, 'r') as data_file:
                data_iter = csv.reader(data_file, delimiter=",")                
                for data in data_iter:
                    data.extend([1, 3.0])
                    data_list.append(data)    

            
            with open(OUTPUT_DATA_DIR.joinpath("imbalance_high_data.csv"), 'a') as output_file:
                writer = csv.writer(output_file)
                writer.writerows(data_list)


    # # Join imbalance files in one file
    # for file_name in IMBALANCE_OUTPUT_FILES:

    #         data_list = list()

    #         with open(OUTPUT_DATA_DIR.joinpath(file_name), 'r') as data_file:
    #             data_iter = csv.reader(data_file, delimiter=",")
    #             data_list.extend( [data for data in data_iter] )
            
    #         with open(OUTPUT_DATA_DIR.joinpath("imbalance_data.csv"), 'a') as output_file:
    #             writer = csv.writer(output_file)
    #             writer.writerows(data_list)


    # return {
    #     "low": pd.read_csv(OUTPUT_DATA_DIR.joinpath("normal_data.csv"), chunksize=chunk_size, header=None),
    #     "medium": pd.read_csv(OUTPUT_DATA_DIR.joinpath("normal_data.csv"), chunksize=chunk_size, header=None),
    #     "high": pd.read_csv(OUTPUT_DATA_DIR.joinpath("normal_data.csv"), chunksize=chunk_size, header=None)
    # }

In [7]:
# Horizontal misalignment
def process_hor_misalignment_data():
    
    # HOR_MISLAGNMENT_OUTPUT_FILES=["hor_misalignment_low_data.csv", "hor_misalignment_medium_data.csv", "hor_misalignment_high_data.csv"]


    # if (Path.exists( OUTPUT_DATA_DIR.joinpath("hor_misalignment_data.csv")) ):
    #     df = pd.read_csv( OUTPUT_DATA_DIR.joinpath("hor_misalignment_data.csv"),chunksize=chunk_size )
    #     return df

 

    OUTPUT_DATA_DIR.mkdir(parents=True, exist_ok=True)

    # Low criticality (6g, 10g)
    if not ( Path.exists(OUTPUT_DATA_DIR.joinpath("hor_misalignment_low_data.csv")) ):
        for file_name in IMBALANCE_LOW_FILES:

            data_list = list()

            with open(file_name, 'r') as data_file:
                data_iter = csv.reader(data_file, delimiter=",")
                for data in data_iter:
                    data.extend([2, 1.0])
                    data_list.append(data)
            
            with open(OUTPUT_DATA_DIR.joinpath("hor_misalignment_low_data.csv"), 'a') as output_file:
                writer = csv.writer(output_file)
                writer.writerows(data_list)
    
    # Medium criticality (15g, 20g, 25g)
    if not ( Path.exists(OUTPUT_DATA_DIR.joinpath("hor_misalignment_medium_data.csv")) ):
        for file_name in IMBALANCE_MEDIUM_FILES:

            data_list = list()

            with open(file_name, 'r') as data_file:
                data_iter = csv.reader(data_file, delimiter=",")                
                for data in data_iter:
                    data.extend([2, 2.0])
                    data_list.append(data)
            
            with open(OUTPUT_DATA_DIR.joinpath("hor_misalignment_medium_data.csv"), 'a') as output_file:
                writer = csv.writer(output_file)
                writer.writerows(data_list)
    
    # High criticality (30g, 35g)
    if not ( Path.exists(OUTPUT_DATA_DIR.joinpath("hor_misalignment_high_data.csv")) ):
        for file_name in IMBALANCE_HIGH_FILES:

            data_list = list()

            with open(file_name, 'r') as data_file:
                data_iter = csv.reader(data_file, delimiter=",")                
                for data in data_iter:
                    data.extend([2, 3.0])
                    data_list.append(data)    

            
            with open(OUTPUT_DATA_DIR.joinpath("hor_misalignment_high_data.csv"), 'a') as output_file:
                writer = csv.writer(output_file)
                writer.writerows(data_list)


#    # Join hor_misalignment files in one file
#     for file_name in HOR_MISLAGNMENT_OUTPUT_FILES:

#             data_list = list()

#             with open(OUTPUT_DATA_DIR.joinpath(file_name), 'r') as data_file:
#                 data_iter = csv.reader(data_file, delimiter=",")
#                 data_list.extend( [data for data in data_iter] )
            
#             with open(OUTPUT_DATA_DIR.joinpath("hor_misalignment_data.csv"), 'a') as output_file:
#                 writer = csv.writer(output_file)
#                 writer.writerows(data_list)


#     return pd.read_csv(OUTPUT_DATA_DIR.joinpath("hor_misalignment_data.csv"), chunksize=chunk_size, header=None)


In [6]:
# Vertical misalignment
def process_ver_misalignment_data():
    
    # VER_MISLAGNMENT_OUTPUT_FILES=["ver_misalignment_low_data.csv", "ver_misalignment_medium_data.csv", "ver_misalignment_high_data.csv"]


    # if (Path.exists( OUTPUT_DATA_DIR.joinpath("ver_misalignment_data.csv")) ):
    #     df = pd.read_csv( OUTPUT_DATA_DIR.joinpath("ver_misalignment_data.csv"),chunksize=chunk_size )
    #     return df

 

    OUTPUT_DATA_DIR.mkdir(parents=True, exist_ok=True)

    # Low criticality (6g, 10g)
    if not ( Path.exists(OUTPUT_DATA_DIR.joinpath("ver_misalignment_low_data.csv")) ):
        for file_name in IMBALANCE_LOW_FILES:

            data_list = list()

            with open(file_name, 'r') as data_file:
                data_iter = csv.reader(data_file, delimiter=",")
                for data in data_iter:
                    data.extend([3, 1.0])
                    data_list.append(data)
            
            with open(OUTPUT_DATA_DIR.joinpath("ver_misalignment_low_data.csv"), 'a') as output_file:
                writer = csv.writer(output_file)
                writer.writerows(data_list)
    
    # Medium criticality (15g, 20g, 25g)
    if not ( Path.exists(OUTPUT_DATA_DIR.joinpath("ver_misalignment_medium_data.csv")) ):
        for file_name in IMBALANCE_MEDIUM_FILES:

            data_list = list()

            with open(file_name, 'r') as data_file:
                data_iter = csv.reader(data_file, delimiter=",")                
                for data in data_iter:
                    data.extend([3, 2.0])
                    data_list.append(data)
            
            with open(OUTPUT_DATA_DIR.joinpath("ver_misalignment_medium_data.csv"), 'a') as output_file:
                writer = csv.writer(output_file)
                writer.writerows(data_list)
    
    # High criticality (30g, 35g)
    if not ( Path.exists(OUTPUT_DATA_DIR.joinpath("ver_misalignment_high_data.csv")) ):
        for file_name in IMBALANCE_HIGH_FILES:

            data_list = list()

            with open(file_name, 'r') as data_file:
                data_iter = csv.reader(data_file, delimiter=",")                
                for data in data_iter:
                    data.extend([3, 3.0])
                    data_list.append(data)    

            
            with open(OUTPUT_DATA_DIR.joinpath("ver_misalignment_high_data.csv"), 'a') as output_file:
                writer = csv.writer(output_file)
                writer.writerows(data_list)


#    # Join hor_misalignment files in one file
#     for file_name in HOR_MISLAGNMENT_OUTPUT_FILES:

#             data_list = list()

#             with open(OUTPUT_DATA_DIR.joinpath(file_name), 'r') as data_file:
#                 data_iter = csv.reader(data_file, delimiter=",")
#                 data_list.extend( [data for data in data_iter] )
            
#             with open(OUTPUT_DATA_DIR.joinpath("hor_misalignment_data.csv"), 'a') as output_file:
#                 writer = csv.writer(output_file)
#                 writer.writerows(data_list)


#     return pd.read_csv(OUTPUT_DATA_DIR.joinpath("hor_misalignment_data.csv"), chunksize=chunk_size, header=None)



In [8]:
process_ver_misalignment_data()