# Imports

In [1]:
import pandas as pd
import numpy as np

# Data Read In

In [2]:
label_data = pd.read_csv("data/raw/labeled_data.csv")

In [3]:
label_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7996 entries, 0 to 7995
Data columns (total 45 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   _id                       7996 non-null   object 
 1   TimeStamp                 7996 non-null   object 
 2   PART_FACT_PLAN_DATE       7996 non-null   object 
 3   PART_FACT_SERIAL          7996 non-null   int64  
 4   PART_NAME                 7996 non-null   object 
 5   EQUIP_CD                  7996 non-null   object 
 6   EQUIP_NAME                7996 non-null   object 
 7   PassOrFail                7996 non-null   object 
 8   Reason                    7996 non-null   object 
 9   Injection_Time            7996 non-null   float64
 10  Filling_Time              7996 non-null   float64
 11  Plasticizing_Time         7996 non-null   float64
 12  Cycle_Time                7996 non-null   float64
 13  Clamp_Close_Time          7996 non-null   float64
 14  Cushion_

# Data Preprocessing

In [4]:
# 데이터의 호기 종류 및 개수 확인

label_data["EQUIP_NAME"].value_counts()

650톤-우진2호기    7992
1800TON-우진       2
650톤-우진          2
Name: EQUIP_NAME, dtype: int64

In [5]:
# 데이터의 PART_NAME 확인

label_data["PART_NAME"].value_counts()

CN7 W/S SIDE MLD'G RH        3371
CN7 W/S SIDE MLD'G LH        3365
RG3 MOLD'G W/SHLD, LH         628
RG3 MOLD'G W/SHLD, RH         628
SP2 CVR ROOF RACK CTR, RH       2
JX1 W/S SIDE MLD'G RH           2
Name: PART_NAME, dtype: int64

In [6]:
def make_input(data, machine_name, product_name):
    machine_ = data["EQUIP_NAME"] == machine_name
    product_ = data["PART_NAME"] == product_name
    data = data[machine_ & product_]
    
    # 불필요한 column 제거
    data = data.drop(columns=[
        "_id", "TimeStamp", "PART_FACT_PLAN_DATE", "Reason",
        "PART_FACT_SERIAL", "PART_NAME", "EQUIP_CD", "EQUIP_NAME",
        # 값이 모두 0인 변수들 제거
        "Mold_Temperature_1", "Mold_Temperature_2", "Mold_Temperature_5",
        "Mold_Temperature_6", "Mold_Temperature_7", "Mold_Temperature_8",
        "Mold_Temperature_9", "Mold_Temperature_10", "Mold_Temperature_11",
        "Mold_Temperature_12"
    ], axis=1)
    return data

In [7]:
machine_name = "650톤-우진2호기"
product_name = [
    "CN7 W/S SIDE MLD'G LH", "CN7 W/S SIDE MLD'G RH", 
    "RG3 MOLD'G W/SHLD, LH", "RG3 MOLD'G W/SHLD, RH"
]

In [8]:
cn7lh = make_input(label_data, machine_name, product_name[0])
cn7rh = make_input(label_data, machine_name, product_name[1])
rg3lh = make_input(label_data, machine_name, product_name[2])
rg3rh = make_input(label_data, machine_name, product_name[3])

In [9]:
# 동일한 제품의 LH와 RH를 합침
cn7 = pd.concat([cn7lh, cn7rh], ignore_index=True)
rg3 = pd.concat([rg3lh, rg3rh], ignore_index=True)

In [10]:
cn7.nunique()

PassOrFail                   2
Injection_Time              30
Filling_Time                29
Plasticizing_Time           81
Cycle_Time                  34
Clamp_Close_Time             8
Cushion_Position            11
Switch_Over_Position         1
Plasticizing_Position       36
Clamp_Open_Position          2
Max_Injection_Speed         38
Max_Screw_RPM               10
Average_Screw_RPM           10
Max_Injection_Pressure      20
Max_Switch_Over_Pressure    34
Max_Back_Pressure           38
Average_Back_Pressure       26
Barrel_Temperature_1        41
Barrel_Temperature_2        36
Barrel_Temperature_3        26
Barrel_Temperature_4        48
Barrel_Temperature_5        33
Barrel_Temperature_6        23
Barrel_Temperature_7         1
Hopper_Temperature          80
Mold_Temperature_3          54
Mold_Temperature_4          51
dtype: int64

In [11]:
# 값이 1개인 변수 "Switch_Over_Position"와 "Barrel_Temperature_7" 제거
cn7.drop(columns=["Switch_Over_Position", "Barrel_Temperature_7"], inplace=True)

In [12]:
rg3.nunique()

PassOrFail                   2
Injection_Time               4
Filling_Time                 4
Plasticizing_Time           38
Cycle_Time                  12
Clamp_Close_Time             3
Cushion_Position             9
Switch_Over_Position         2
Plasticizing_Position       11
Clamp_Open_Position          1
Max_Injection_Speed         15
Max_Screw_RPM                5
Average_Screw_RPM            5
Max_Injection_Pressure      17
Max_Switch_Over_Pressure    33
Max_Back_Pressure           28
Average_Back_Pressure       32
Barrel_Temperature_1        25
Barrel_Temperature_2        22
Barrel_Temperature_3        17
Barrel_Temperature_4        30
Barrel_Temperature_5        21
Barrel_Temperature_6        12
Barrel_Temperature_7         1
Hopper_Temperature          53
Mold_Temperature_3          26
Mold_Temperature_4          32
dtype: int64

In [13]:
# 값이 1개인 변수 "Clamp_Open_Position"와 "Barrel_Temperature_7" 제거
rg3.drop(columns=["Clamp_Open_Position", "Barrel_Temperature_7"], inplace=True)

In [14]:
# 1차 가공 dataframe csv로 저장

cn7.to_csv("data/processed/labeled_data_KAMP_cn7.csv", index=False)
rg3.to_csv("data/processed/labeled_data_KAMP_rg3.csv", index=False)