In [1]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score,
    classification_report,
    confusion_matrix,
    f1_score,
    precision_score,
    recall_score,
)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm

import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt 

In [73]:
# csv 불러오기
train_data = pd.read_csv('C:/Users/KimDongyoung/Desktop/git_LGaimers5/Lg_aimers5/data/trim_train_data.csv')
test_data = pd.read_csv('C:/Users/KimDongyoung/Desktop/git_LGaimers5/Lg_aimers5/data/trim_test_data.csv')
submission = pd.read_csv('C:/Users/KimDongyoung/Desktop/git_LGaimers5/Lg_aimers5/data/submission.csv')

In [74]:
# target 열을 임시로 분리
target_train = train_data['target']
target_test = test_data['target']

# 모든 값이 NaN인 열 제거
train_data = train_data.dropna(axis=1, how='all')
test_data = test_data.dropna(axis=1, how='all')

# target 열을 다시 결합
train_data['target'] = target_train
test_data['target'] = target_test

In [80]:
# train_data와 test_data에서 '?'를 포함하는 열 이름 필터링
train_Process_Desc_col = train_data.filter(like='?').columns
test_Process_Desc_col = test_data.filter(like='?').columns

# 필터링된 열 이름 출력
print("<? column in train_data>")
for col in train_Process_Desc_col:
    print(col)

print("<? column in test_data>")
for col in test_Process_Desc_col:
    print(col)

# ? -> Θ로 변경할 열 이름과 새 열 이름 생성
train_new_columns = {col: col.replace('?', 'Θ') for col in train_Process_Desc_col}
test_new_columns = {col: col.replace('?', 'Θ') for col in test_Process_Desc_col}

# 열 이름 변경
train_data.rename(columns=train_new_columns, inplace=True)
test_data.rename(columns=test_new_columns, inplace=True)

# 'Θ'를 포함하는 열 이름 필터링
train_Process_Desc_col = train_data.filter(like='Θ').columns
test_Process_Desc_col = test_data.filter(like='Θ').columns

# 필터링된 열 이름 출력
print("<Θ in train_data>")
print("train_data:")
for col in train_Process_Desc_col:
    print(col)

print("test_data:")
for col in test_Process_Desc_col:
    print(col)

<? column in train_data>
CURE END POSITION ? Collect Result_Dam
CURE STANDBY POSITION ? Collect Result_Dam
CURE START POSITION ? Collect Result_Dam
CURE END POSITION ? Collect Result_Fill2
CURE STANDBY POSITION ? Collect Result_Fill2
CURE START POSITION ? Collect Result_Fill2
<? column in test_data>
<Θ in train_data>
train_data:
CURE END POSITION Θ Collect Result_Dam
CURE STANDBY POSITION Θ Collect Result_Dam
CURE START POSITION Θ Collect Result_Dam
CURE END POSITION Θ Collect Result_Fill2
CURE STANDBY POSITION Θ Collect Result_Fill2
CURE START POSITION Θ Collect Result_Fill2
test_data:
CURE END POSITION Θ Collect Result_Dam
CURE STANDBY POSITION Θ Collect Result_Dam
CURE START POSITION Θ Collect Result_Dam
CURE END POSITION Θ Collect Result_Fill2
CURE STANDBY POSITION Θ Collect Result_Fill2
CURE START POSITION Θ Collect Result_Fill2


In [82]:
# Wip Line 열 제거
wip_line_columns = train_data.filter(like='Wip Line').columns

train_data.drop(columns=wip_line_columns, inplace=True)
test_data.drop(columns=wip_line_columns, inplace=True)

In [83]:
# Process Desc 열 제거
Process_Desc_col = train_data.filter(like='Process Desc').columns

train_data.drop(columns=Process_Desc_col, inplace=True)
test_data.drop(columns=Process_Desc_col, inplace=True)

In [84]:
# Equipment로 시작하는 열 필터링
Equipment_col = train_data.filter(like='Equipment').columns
Equipment_col2 = test_data.filter(like='Equipment').columns

new_train = train_data.filter(items=Equipment_col)
new_test = test_data.filter(items=Equipment_col2)

In [85]:
new_train

Unnamed: 0,Equipment_Dam,Equipment_AutoClave,Equipment_Fill1,Equipment_Fill2
0,Dam dispenser #1,Auto Clave Out,Fill1 dispenser #1,Fill2 dispenser #1
1,Dam dispenser #1,Auto Clave Out,Fill1 dispenser #1,Fill2 dispenser #1
2,Dam dispenser #2,Auto Clave Out,Fill1 dispenser #2,Fill2 dispenser #2
3,Dam dispenser #2,Auto Clave Out,Fill1 dispenser #2,Fill2 dispenser #2
4,Dam dispenser #1,Auto Clave Out,Fill1 dispenser #1,Fill2 dispenser #1
...,...,...,...,...
40501,Dam dispenser #1,Auto Clave Out,Fill1 dispenser #1,Fill2 dispenser #1
40502,Dam dispenser #2,Auto Clave Out,Fill1 dispenser #2,Fill2 dispenser #2
40503,Dam dispenser #1,Auto Clave Out,Fill1 dispenser #1,Fill2 dispenser #1
40504,Dam dispenser #2,Auto Clave Out,Fill1 dispenser #2,Fill2 dispenser #2


In [86]:
# Equipment_same_num 파생변수 생성
def determine_equipment_same_num(row):
    if (row['Equipment_Dam'] == 'Dam dispenser #1' and row['Equipment_AutoClave'] == 'Auto Clave Out' and 
        row['Equipment_Fill1'] == 'Fill1 dispenser #1' and row['Equipment_Fill2'] == 'Fill2 dispenser #1') or \
       (row['Equipment_Dam'] == 'Dam dispenser #2' and row['Equipment_AutoClave'] == 'Auto Clave Out' and 
        row['Equipment_Fill1'] == 'Fill1 dispenser #2' and row['Equipment_Fill2'] == 'Fill2 dispenser #2'):
        return 1
    else:
        return 0

train_data['Equipment_same_num'] = new_train.apply(determine_equipment_same_num, axis=1)
test_data['Equipment_same_num'] = new_test.apply(determine_equipment_same_num, axis=1)

train_data = train_data.drop(columns=['Equipment_Dam', 'Equipment_AutoClave', 'Equipment_Fill1', 'Equipment_Fill2'])
test_data = test_data.drop(columns=['Equipment_Dam', 'Equipment_AutoClave', 'Equipment_Fill1', 'Equipment_Fill2'])

In [87]:
train_data

Unnamed: 0,Model.Suffix_Dam,Workorder_Dam,Insp. Seq No._Dam,Insp Judge Code_Dam,CURE END POSITION X Collect Result_Dam,CURE END POSITION Z Collect Result_Dam,CURE END POSITION Θ Collect Result_Dam,CURE SPEED Collect Result_Dam,CURE STANDBY POSITION X Collect Result_Dam,CURE STANDBY POSITION Z Collect Result_Dam,...,Head Purge Position X Collect Result_Fill2,Head Purge Position Y Collect Result_Fill2,Head Purge Position Z Collect Result_Fill2,Machine Tact time Collect Result_Fill2,PalletID Collect Result_Fill2,Production Qty Collect Result_Fill2,Receip No Collect Result_Fill2,WorkMode Collect Result_Fill2,target,Equipment_same_num
0,AJX75334505,4F1XA938-1,1,OK,240,2.5,-90,100,1150,33.5,...,270,50,114.612,19.9,7,127,1,,Normal,1
1,AJX75334505,3KPM0016-2,1,OK,240,2.5,-90,70,1150,33.5,...,270,50,85.000,19.6,7,185,1,0.0,Normal,1
2,AJX75334501,4E1X9167-1,1,OK,1000,12.5,90,85,1150,33.5,...,270,50,114.612,19.8,10,73,1,,Normal,1
3,AJX75334501,3K1X0057-1,1,OK,1000,12.5,90,70,1150,33.5,...,270,50,85.000,19.9,12,268,1,0.0,Normal,1
4,AJX75334501,3HPM0007-1,1,OK,240,2.5,-90,70,1150,33.5,...,270,50,85.000,19.7,8,121,1,0.0,Normal,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40501,AJX75334501,3J1XF434-2,1,OK,240,2.5,-90,70,1150,33.5,...,270,50,85.000,19.2,1,318,1,0.0,Normal,1
40502,AJX75334501,4E1XC796-1,1,OK,1000,12.5,90,100,1150,33.5,...,270,50,114.612,20.5,14,197,1,,Normal,1
40503,AJX75334501,4C1XD438-1,1,OK,240,2.5,-90,100,1150,33.5,...,270,50,85.000,19.7,1,27,1,,Normal,1
40504,AJX75334501,3I1XA258-1,1,OK,1000,12.5,90,70,1150,33.5,...,270,50,85.000,20.1,13,117,1,0.0,Normal,1


In [88]:
# Model.Suffix_Dam의 이름을 Model.Suffix로 변경
train_data = train_data.rename(columns={'Model.Suffix_Dam': 'Model.Suffix'})
test_data = test_data.rename(columns={'Model.Suffix_Dam': 'Model.Suffix'})

# Model.Suffix_AutoClave, Model.Suffix_Fill1, Model.Suffix_Fill2 열 드롭
train_data = train_data.drop(columns=['Model.Suffix_AutoClave', 'Model.Suffix_Fill1', 'Model.Suffix_Fill2'])
test_data = test_data.drop(columns=['Model.Suffix_AutoClave', 'Model.Suffix_Fill1', 'Model.Suffix_Fill2'])

In [89]:
# Workorder_Dam의 이름을 Workorder로 변경
train_data = train_data.rename(columns={'Workorder_Dam': 'Workorder'})
test_data = test_data.rename(columns={'Workorder_Dam': 'Workorder'})

# Workorder_AutoClave, Workorder_Fill1, Workorder_Fill2 열 드롭
train_data = train_data.drop(columns=['Workorder_AutoClave', 'Workorder_Fill1', 'Workorder_Fill2'])
test_data = test_data.drop(columns=['Workorder_AutoClave', 'Workorder_Fill1', 'Workorder_Fill2'])

In [90]:
# Insp. Seq No 열 제거
Insp_Seq_No_col = train_data.filter(like='Insp. Seq No').columns

train_data.drop(columns=Insp_Seq_No_col, inplace=True)
test_data.drop(columns=Insp_Seq_No_col, inplace=True)

# Insp Judge Code 열 제거
Insp_Judge_Code_col = train_data.filter(like='Insp Judge Code').columns

train_data.drop(columns=Insp_Judge_Code_col, inplace=True)
test_data.drop(columns=Insp_Judge_Code_col, inplace=True)

In [91]:
# 값의 종류가 1개이고 결측값이 없는 열을 제거하는 함수
def drop_single_value_columns(df):
    cols_to_drop = [col for col in df.columns if col != 'target' and df[col].nunique() == 1 and df[col].isnull().sum() == 0]
    df_dropped = df.drop(columns=cols_to_drop)
    return df_dropped, cols_to_drop

# train_data와 test_data에서 해당 열 제거 및 삭제된 열 이름과 개수 출력
train_data, train_cols_dropped = drop_single_value_columns(train_data)
test_data, test_cols_dropped = drop_single_value_columns(test_data)

# print("삭제된 train_data 열 이름:", train_cols_dropped)
print("삭제된 train_data 열 개수:", len(train_cols_dropped))

# print("삭제된 test_data 열 이름:", test_cols_dropped)
print("삭제된 test_data 열 개수:", len(test_cols_dropped))

삭제된 train_data 열 개수: 42
삭제된 test_data 열 개수: 42


In [92]:
train_data

Unnamed: 0,Model.Suffix,Workorder,CURE END POSITION X Collect Result_Dam,CURE END POSITION Z Collect Result_Dam,CURE END POSITION Θ Collect Result_Dam,CURE SPEED Collect Result_Dam,CURE START POSITION X Collect Result_Dam,CURE START POSITION Θ Collect Result_Dam,DISCHARGED SPEED OF RESIN Collect Result_Dam,DISCHARGED TIME OF RESIN(Stage1) Collect Result_Dam,...,HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill2,HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill2,Head Purge Position Z Collect Result_Fill2,Machine Tact time Collect Result_Fill2,PalletID Collect Result_Fill2,Production Qty Collect Result_Fill2,Receip No Collect Result_Fill2,WorkMode Collect Result_Fill2,target,Equipment_same_num
0,AJX75334505,4F1XA938-1,240,2.5,-90,100,1030,-90,16,14.9,...,243.7,243.7,114.612,19.9,7,127,1,,Normal,1
1,AJX75334505,3KPM0016-2,240,2.5,-90,70,1030,-90,10,21.3,...,243.7,243.7,85.000,19.6,7,185,1,0.0,Normal,1
2,AJX75334501,4E1X9167-1,1000,12.5,90,85,280,90,16,14.7,...,243.5,243.5,114.612,19.8,10,73,1,,Normal,1
3,AJX75334501,3K1X0057-1,1000,12.5,90,70,280,90,10,21.3,...,243.5,243.5,85.000,19.9,12,268,1,0.0,Normal,1
4,AJX75334501,3HPM0007-1,240,2.5,-90,70,1030,-90,10,9.7,...,243.7,243.7,85.000,19.7,8,121,1,0.0,Normal,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40501,AJX75334501,3J1XF434-2,240,2.5,-90,70,1030,-90,10,17.0,...,243.7,243.7,85.000,19.2,1,318,1,0.0,Normal,1
40502,AJX75334501,4E1XC796-1,1000,12.5,90,100,280,90,16,14.9,...,243.5,243.5,114.612,20.5,14,197,1,,Normal,1
40503,AJX75334501,4C1XD438-1,240,2.5,-90,100,1030,-90,16,14.2,...,243.7,243.7,85.000,19.7,1,27,1,,Normal,1
40504,AJX75334501,3I1XA258-1,1000,12.5,90,70,280,90,10,9.7,...,243.5,243.5,85.000,20.1,13,117,1,0.0,Normal,1


In [93]:
# 파생변수 생성: 3개의 컬럼 값이 모두 동일하면 해당 값을 저장, 아니면 diff
train_data['Receip_No'] = train_data.apply(
    lambda row: row['Receip No Collect Result_Dam'] if (row['Receip No Collect Result_Dam'] == row['Receip No Collect Result_Fill1'] == row['Receip No Collect Result_Fill2']) else 'diff',
    axis=1
)
test_data['Receip_No'] = test_data.apply(
    lambda row: row['Receip No Collect Result_Dam'] if (row['Receip No Collect Result_Dam'] == row['Receip No Collect Result_Fill1'] == row['Receip No Collect Result_Fill2']) else 'diff',
    axis=1
)

In [94]:
train_data['Receip_No'].value_counts()

Receip_No
1       39276
6         980
9          96
17         86
3          64
diff        4
Name: count, dtype: int64

In [95]:
# 파생변수 생성: Receip No와 Model.Suffix의 조합
train_data['model_receip'] = train_data['Model.Suffix'] + '_' + train_data['Receip_No'].astype(str)
test_data['model_receip'] = test_data['Model.Suffix'] + '_' + test_data['Receip_No'].astype(str)

In [96]:
# 파생변수 생성: workorder 앞 4자리 -> workorder_prefix
train_data['workorder_prefix'] = train_data['Workorder'].str[:4]
test_data['workorder_prefix'] = test_data['Workorder'].str[:4]

In [97]:
# 파생변수 생성: Receip No와 workorder_prefix의 조합 -> diff, 3.0, 9.0의 경우에만
train_data['workorder_receip'] = train_data.apply(
    lambda row: f"{row['workorder_prefix']}_{row['Receip_No']}" 
    if row['Receip_No'] in ['diff', 3.0, 9.0] else row['workorder_prefix'],
    axis=1
)
test_data['workorder_receip'] = test_data.apply(
    lambda row: f"{row['workorder_prefix']}_{row['Receip_No']}" 
    if row['Receip_No'] in ['diff', 3.0, 9.0] else row['workorder_prefix'],
    axis=1
)

In [98]:
columns_to_drop = [
    'Model.Suffix',
    'Workorder',
    'workorder_prefix',
    'Receip_No',
    'Receip No Collect Result_Dam',
    'Receip No Collect Result_Fill1',
    'Receip No Collect Result_Fill2'
]

# 컬럼 드롭
train_data.drop(columns=columns_to_drop, inplace=True)
test_data.drop(columns=columns_to_drop, inplace=True)

In [99]:
train_data

Unnamed: 0,CURE END POSITION X Collect Result_Dam,CURE END POSITION Z Collect Result_Dam,CURE END POSITION Θ Collect Result_Dam,CURE SPEED Collect Result_Dam,CURE START POSITION X Collect Result_Dam,CURE START POSITION Θ Collect Result_Dam,DISCHARGED SPEED OF RESIN Collect Result_Dam,DISCHARGED TIME OF RESIN(Stage1) Collect Result_Dam,DISCHARGED TIME OF RESIN(Stage2) Collect Result_Dam,DISCHARGED TIME OF RESIN(Stage3) Collect Result_Dam,...,HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill2,Head Purge Position Z Collect Result_Fill2,Machine Tact time Collect Result_Fill2,PalletID Collect Result_Fill2,Production Qty Collect Result_Fill2,WorkMode Collect Result_Fill2,target,Equipment_same_num,model_receip,workorder_receip
0,240,2.5,-90,100,1030,-90,16,14.9,8.4,14.7,...,243.7,114.612,19.9,7,127,,Normal,1,AJX75334505_1,4F1X
1,240,2.5,-90,70,1030,-90,10,21.3,4.9,21.3,...,243.7,85.000,19.6,7,185,0.0,Normal,1,AJX75334505_1,3KPM
2,1000,12.5,90,85,280,90,16,14.7,8.5,14.7,...,243.5,114.612,19.8,10,73,,Normal,1,AJX75334501_1,4E1X
3,1000,12.5,90,70,280,90,10,21.3,8.4,21.3,...,243.5,85.000,19.9,12,268,0.0,Normal,1,AJX75334501_1,3K1X
4,240,2.5,-90,70,1030,-90,10,9.7,4.9,9.6,...,243.7,85.000,19.7,8,121,0.0,Normal,1,AJX75334501_1,3HPM
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40501,240,2.5,-90,70,1030,-90,10,17.0,5.0,17.0,...,243.7,85.000,19.2,1,318,0.0,Normal,1,AJX75334501_1,3J1X
40502,1000,12.5,90,100,280,90,16,14.9,8.5,14.7,...,243.5,114.612,20.5,14,197,,Normal,1,AJX75334501_1,4E1X
40503,240,2.5,-90,100,1030,-90,16,14.2,8.2,14.3,...,243.7,85.000,19.7,1,27,,Normal,1,AJX75334501_1,4C1X
40504,1000,12.5,90,70,280,90,10,9.7,4.9,9.7,...,243.5,85.000,20.1,13,117,0.0,Normal,1,AJX75334501_1,3I1X


In [100]:
# 레진 도포 좌표 X, Y, Z 합치기
def create_coordinate_columns(data):
    # Dam
    # stage1
    data['head_normal_coordinate_stage1_Dam'] = (
        data['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam'].astype(str) + ',' +
        data['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam'].astype(str) + ',' +
        data['HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Dam'].astype(str)
    )

    # stage2
    data['head_normal_coordinate_stage2_Dam'] = (
        data['HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Dam'].astype(str) + ',' +
        data['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam'].astype(str) + ',' +
        data['HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Dam'].astype(str)
    )

    # stage3
    data['head_normal_coordinate_stage3_Dam'] = (
        data['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam'].astype(str) + ',' +
        data['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam'].astype(str) + ',' +
        data['HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Dam'].astype(str)
    )

    # Fill1
    # stage1
    data['head_normal_coordinate_stage1_Fill1'] = (
        data['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1'].astype(str) + ',' +
        data['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1'].astype(str) + ',' +
        data['HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill1'].astype(str)
    )

    # stage2
    data['head_normal_coordinate_stage2_Fill1'] = (
        data['HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill1'].astype(str) + ',' +
        data['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill1'].astype(str) + ',' +
        data['HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill1'].astype(str)
    )

    # stage3
    data['head_normal_coordinate_stage3_Fill1'] = (
        data['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1'].astype(str) + ',' +
        data['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1'].astype(str) + ',' +
        data['HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill1'].astype(str)
    )

    # Fill2
    # stage1
    data['head_normal_coordinate_stage1_Fill2'] = (
        data['HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill2'].astype(str) + ',' +
        data['HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill2'].astype(str) + ',' +
        data['HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill2'].astype(str)
    )

    # stage2
    data['head_normal_coordinate_stage2_Fill2'] = (
        data['HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill2'].astype(str) + ',' +
        data['HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill2'].astype(str) + ',' +
        data['HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill2'].astype(str)
    )

    # stage3
    data['head_normal_coordinate_stage3_Fill2'] = (
        data['HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill2'].astype(str) + ',' +
        data['HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill2'].astype(str) + ',' +
        data['HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill2'].astype(str)
    )

# train_data와 test_data에 대해 함수 호출
create_coordinate_columns(train_data)
create_coordinate_columns(test_data)

In [101]:
# 레진 도포 좌표 X, Y, Z 컬럼 드롭
columns_to_drop = [
    'HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam',
    'HEAD NORMAL COORDINATE X AXIS(Stage1) Judge Value_Dam',
    'HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Dam',
    'HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam',
    'HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam',
    'HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam',
    'HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam',
    'HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Dam',
    'HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Dam',
    'HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Dam',

    'HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill1',
    'HEAD NORMAL COORDINATE X AXIS(Stage1) Judge Value_Fill1',
    'HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill1',
    'HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1',
    'HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1',
    'HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill1',
    'HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1',
    'HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill1',
    'HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill1',
    'HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill1',
    
    'HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill2',
    'HEAD NORMAL COORDINATE X AXIS(Stage1) Judge Value_Fill2',
    'HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill2',
    'HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill2',
    'HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill2',
    'HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill2',
    'HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill2',
    'HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill2',
    'HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill2',
    'HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill2'
]

# 컬럼 드롭
train_data.drop(columns=columns_to_drop, inplace=True)
test_data.drop(columns=columns_to_drop, inplace=True)

In [102]:
# UV 경화 좌표 합치기
def create_coordinate_columns(data):
    # Dam
    # cure end
    data['cure_end_position_XZΘ_Dam'] = (
        data['CURE END POSITION X Collect Result_Dam'].astype(str) + ',' +
        data['CURE END POSITION Z Collect Result_Dam'].astype(str) + ',' +
        data['CURE END POSITION Θ Collect Result_Dam'].astype(str)
    )

    # cure start
    data['cure_start_position_XΘ_Dam'] = (
        data['CURE START POSITION X Collect Result_Dam'].astype(str) + ',' +
        data['CURE START POSITION Θ Collect Result_Dam'].astype(str)
    )

    # Fill2
    # cure end
    data['cure_end_position_XZ_Fill2'] = (
        data['CURE END POSITION X Collect Result_Fill2'].astype(str) + ',' +
        data['CURE END POSITION Z Collect Result_Fill2'].astype(str) 
    )

    # cure start
    data['cure_start_position_XZ_Fill2'] = (
        data['CURE START POSITION X Collect Result_Fill2'].astype(str) + ',' +
        data['CURE START POSITION Z Collect Result_Fill2'].astype(str) 
    )

# train_data와 test_data에 대해 함수 호출
create_coordinate_columns(train_data)
create_coordinate_columns(test_data)

In [103]:
# UV 경화 좌표 X, Y, Z 컬럼 드롭
columns_to_drop = [
    'CURE END POSITION X Collect Result_Dam',
    'CURE END POSITION Z Collect Result_Dam',
    'CURE END POSITION Θ Collect Result_Dam',
    'CURE START POSITION X Collect Result_Dam',
    'CURE START POSITION Θ Collect Result_Dam',

    'CURE END POSITION X Collect Result_Fill2',
    'CURE END POSITION Z Collect Result_Fill2',
    'CURE START POSITION X Collect Result_Fill2',
    'CURE START POSITION Z Collect Result_Fill2'
]

# 컬럼 드롭
train_data.drop(columns=columns_to_drop, inplace=True)
test_data.drop(columns=columns_to_drop, inplace=True)

In [104]:
train_data

Unnamed: 0,CURE SPEED Collect Result_Dam,DISCHARGED SPEED OF RESIN Collect Result_Dam,DISCHARGED TIME OF RESIN(Stage1) Collect Result_Dam,DISCHARGED TIME OF RESIN(Stage2) Collect Result_Dam,DISCHARGED TIME OF RESIN(Stage3) Collect Result_Dam,Dispense Volume(Stage1) Collect Result_Dam,Dispense Volume(Stage2) Collect Result_Dam,Dispense Volume(Stage3) Collect Result_Dam,Head Clean Position Z Collect Result_Dam,Head Purge Position Z Collect Result_Dam,...,head_normal_coordinate_stage1_Fill1,head_normal_coordinate_stage2_Fill1,head_normal_coordinate_stage3_Fill1,head_normal_coordinate_stage1_Fill2,head_normal_coordinate_stage2_Fill2,head_normal_coordinate_stage3_Fill2,cure_end_position_XZΘ_Dam,cure_start_position_XΘ_Dam,cure_end_position_XZ_Fill2,cure_start_position_XZ_Fill2
0,100,16,14.9,8.4,14.7,1.04,0.58,1.02,124.00,130.85,...,"838.4,430.0,244.52","458.7,429.8,244.52","157.0,430.3,244.52","835.5,428.0,243.7","458.0,427.9,243.7","156.0,428.0,243.7","240,2.5,-90","1030,-90",24033,102033
1,70,10,21.3,4.9,21.3,1.49,0.34,1.49,130.85,130.85,...,"838.4,430.5,244.4","458.5,430.5,244.4","157.0,430.8,244.4","835.5,428.0,243.7","458.0,427.9,243.7","156.0,428.0,243.7","240,2.5,-90","1030,-90",24033,102033
2,85,16,14.7,8.5,14.7,1.61,0.93,1.61,124.00,130.85,...,"837.9,1323.5,244.415","458.1,1322.5,244.415","156.1,1323.1,244.415","305.0,1324.2,243.5","499.8,1324.2,243.5","694.0,1324.2,243.5","1000,12.5,90",28090,24033,102033
3,70,10,21.3,8.4,21.3,1.49,0.58,1.49,130.85,130.85,...,"837.7,1323.2,244.3","458.8,1322.5,244.3","157.0,1322.8,244.3","305.0,1324.2,243.5","499.8,1324.2,243.5","694.0,1324.2,243.5","1000,12.5,90",28090,24033,102033
4,70,10,9.7,4.9,9.6,0.67,0.34,1.49,133.50,133.50,...,"838.4,430.2,244.4","458.5,430.5,244.4","157.0,430.5,244.4","835.5,428.0,243.7","458.0,427.9,243.7","156.0,428.0,243.7","240,2.5,-90","1030,-90",24032,102033
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40501,70,10,17.0,5.0,17.0,1.19,0.35,1.49,130.85,130.85,...,"838.4,430.5,244.4","458.5,430.5,244.4","157.0,430.8,244.4","835.5,428.0,243.7","458.0,427.9,243.7","156.0,428.0,243.7","240,2.5,-90","1030,-90",24033,102033
40502,100,16,14.9,8.5,14.7,1.04,0.59,1.49,124.00,130.85,...,"838.1,1325.0,244.535","458.7,1324.0,244.535","156.1,1324.3,244.535","304.8,1324.2,243.5","499.8,1324.2,243.5","692.8,1324.2,243.5","1000,12.5,90",28090,24033,102033
40503,100,16,14.2,8.2,14.3,0.99,0.57,1.00,124.00,130.85,...,"838.4,430.8,244.543","458.4,430.5,244.543","157.0,431.1,244.543","835.5,428.0,243.7","458.0,427.9,243.7","156.0,428.0,243.7","240,2.5,-90","1030,-90",24033,102033
40504,70,10,9.7,4.9,9.7,0.67,0.34,1.49,130.85,130.85,...,"837.7,1323.2,244.4","458.8,1323.0,244.4","157.0,1322.8,244.4","305.0,1324.2,243.5","499.8,1324.2,243.5","694.0,1324.2,243.5","1000,12.5,90",28090,24033,102033
