In [None]:
'''
Import dependent packages & setup environments
'''
import pandas as pd
import numpy as np


pd.set_option('display.max.colwidth', 50)
pd.set_option('display.width', 1000)

'''
Load Data 
'''
# Materials to use
materials = ["PC", "PA66", "ABS", "PCSMOG", "TPU"]

# Load raw dataset
raw_dataset = pd.read_excel('../../data/iae/injection_mold_5000_v1.xlsx', header=0, index_col=False)
print("> Raw Dataset Size : {}".format(len(raw_dataset)))

# Featureset
feature_set = ["weight", "failure", "material", "set_nozzle_temperature", "set_front_temperature", "set_intermediate_temperature", "set_rear_temperature", "set_mold_velocity_1", "set_mold_velocity_2", "set_mold_velocity_3", "set_mold_velocity_4", "set_mold_velocity_5", "set_mold_pressure_1", "set_mold_pressure_2", "set_mold_pressure_3", "set_mold_pressure_4", "set_mold_pressure_5", "set_mold_position_1", "set_mold_position_2", "set_mold_position_3", "set_mold_position_4", "set_mold_position_5", "set_hold_velocity_1", "set_hold_velocity_2", "set_hold_velocity_3", "set_hold_pressure_1", "set_hold_pressure_2", "set_hold_pressure_3"]
print("> Selected Featureset Size : {}".format(len(feature_set)))

raw_dataset = raw_dataset[["weight", "failure", "material", "set_nozzle_temperature", "set_front_temperature", "set_intermediate_temperature", "set_rear_temperature", "set_mold_velocity_1", "set_mold_velocity_2", "set_mold_velocity_3", "set_mold_velocity_4", "set_mold_velocity_5", "set_mold_pressure_1", "set_mold_pressure_2", "set_mold_pressure_3", "set_mold_pressure_4", "set_mold_pressure_5", "set_mold_position_1", "set_mold_position_2", "set_mold_position_3", "set_mold_position_4", "set_mold_position_5", "set_hold_velocity_1", "set_hold_velocity_2", "set_hold_velocity_3", "set_hold_pressure_1", "set_hold_pressure_2", "set_hold_pressure_3"]]

# One-hot Encoding
raw_dataset_onehot = pd.get_dummies(raw_dataset)
print("> Encoded Dataset Size : {}".format(len(raw_dataset_onehot.columns)))

''' 
Data separation for each materials
'''
# for PC
#pc_dataset = raw_dataset.loc[raw_dataset['material'] == "PC"][["weight", "failure"]].dropna()
pc_dataset = raw_dataset.where(raw_dataset['material'] == "PC").dropna()
pc_dataset_positive = pc_dataset.where(pc_dataset["failure"]==0).dropna()
pc_dataset_negative = pc_dataset.where((pc_dataset["failure"]==1) | (pc_dataset["failure"]==2) | (pc_dataset["failure"]==3)).dropna()

# for pc66
#pa66_dataset = raw_dataset.loc[raw_dataset['material'] == "PA66"][["weight", "failure"]].dropna()
pa66_dataset = raw_dataset.where(raw_dataset['material'] == "PA66").dropna()
pa66_dataset_positive = pa66_dataset.where(pa66_dataset["failure"]==0).dropna()
pa66_dataset_negative = pa66_dataset.where((pa66_dataset["failure"]==1) | (pa66_dataset["failure"]==2) | (pa66_dataset["failure"]==3)).dropna()

# for ABS
# abs_dataset = raw_dataset.loc[raw_dataset['material'] == "ABS"][["weight", "failure"]].dropna()
abs_dataset = raw_dataset.where(raw_dataset['material'] == "ABS").dropna()
abs_dataset_positive = abs_dataset.where(abs_dataset["failure"]==0).dropna()
abs_dataset_negative = abs_dataset.where((abs_dataset["failure"]==1) | (abs_dataset["failure"]==2) | (abs_dataset["failure"]==3)).dropna()

# for PCSMOG
#pcsmog_dataset = raw_dataset.loc[raw_dataset['material'] == "PCSMOG"][["weight", "failure"]].dropna()
pcsmog_dataset = raw_dataset.where(raw_dataset['material'] == "PCSMOG").dropna()
pcsmog_dataset_positive = pcsmog_dataset.where(pcsmog_dataset["failure"]==0).dropna()
pcsmog_dataset_negative = pcsmog_dataset.where((pcsmog_dataset["failure"]==1) | (pcsmog_dataset["failure"]==2) | (pcsmog_dataset["failure"]==3)).dropna()

# for TPU
#tpu_dataset = raw_dataset.loc[raw_dataset['material'] == "TPU"][["weight", "failure"]].dropna()
tpu_dataset = raw_dataset.where(raw_dataset['material'] == "TPU").dropna()
tpu_dataset_positive = tpu_dataset.where(tpu_dataset["failure"]==0).dropna()
tpu_dataset_negative = tpu_dataset.where((tpu_dataset["failure"]==1) | (tpu_dataset["failure"]==2) | (tpu_dataset["failure"]==3)).dropna()