In [566]:
import os
from pprint import pprint

import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score,
    classification_report,
    confusion_matrix,
    f1_score,
    precision_score,
    recall_score,
)
from sklearn.model_selection import train_test_split
from tqdm import tqdm

In [567]:
train_data = pd.read_csv("train.csv")
print(train_data.shape)

(40506, 464)


In [568]:
empty_columns = train_data.columns[train_data.isna().all()].tolist()
print(len(empty_columns))

278


In [569]:
for column in empty_columns:
    del train_data[column]
train_data.shape

(40506, 186)

In [570]:
for col in train_data.columns:
    if train_data[col].nunique() == 1:
        del train_data[col]
train_data.shape

(40506, 146)

In [571]:
train_data = train_data.dropna()
train_data.shape

(27740, 146)

In [572]:
# 데이터 분할
normal_ratio = 1.0  # 1.0 means 1:1 ratio

df_normal = train_data[train_data["target"] == "Normal"]
df_abnormal = train_data[train_data["target"] == "AbNormal"]

num_normal = len(df_normal)
num_abnormal = len(df_abnormal)
print(f"  Total: Normal: {num_normal}, AbNormal: {num_abnormal}")

df_normal = df_normal.sample(n=int(num_abnormal * normal_ratio), replace=False, random_state=110)
df_concat = pd.concat([df_normal, df_abnormal], axis=0).reset_index(drop=True)
df_concat.value_counts("target")

  Total: Normal: 26038, AbNormal: 1702


target
AbNormal    1702
Normal      1702
Name: count, dtype: int64

In [573]:
for col in df_concat.columns:
    print(col)

Equipment_Dam
Model.Suffix_Dam
Workorder_Dam
CURE END POSITION X Collect Result_Dam
CURE END POSITION Z Collect Result_Dam
CURE END POSITION Θ Collect Result_Dam
CURE SPEED Collect Result_Dam
CURE START POSITION X Collect Result_Dam
CURE START POSITION Θ Collect Result_Dam
DISCHARGED SPEED OF RESIN Collect Result_Dam
DISCHARGED TIME OF RESIN(Stage1) Collect Result_Dam
DISCHARGED TIME OF RESIN(Stage2) Collect Result_Dam
DISCHARGED TIME OF RESIN(Stage3) Collect Result_Dam
Dispense Volume(Stage1) Collect Result_Dam
Dispense Volume(Stage2) Collect Result_Dam
Dispense Volume(Stage3) Collect Result_Dam
HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Dam
HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Dam
HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam
HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam
HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam
HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam
HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_D

In [574]:
test_data = pd.read_csv("test.csv")
test_data = test_data[df_concat.columns]

In [575]:
print(test_data.isnull().sum())

Equipment_Dam                                 0
Model.Suffix_Dam                              0
Workorder_Dam                                 0
CURE END POSITION X Collect Result_Dam        0
CURE END POSITION Z Collect Result_Dam        0
                                          ...  
PalletID Collect Result_Fill2                 0
Production Qty Collect Result_Fill2           0
Receip No Collect Result_Fill2                0
WorkMode Collect Result_Fill2                 0
target                                    17361
Length: 146, dtype: int64


In [576]:
# null 값이 있는 열 제거 target 열은 제거하지 않음
for col in test_data.columns:
    if test_data[col].isnull().sum() > 0 and col != "target":
        del test_data[col]
        del df_concat[col]

In [577]:
for col in df_concat.columns:
    print(col)
print(df_concat.shape)

Equipment_Dam
Model.Suffix_Dam
Workorder_Dam
CURE END POSITION X Collect Result_Dam
CURE END POSITION Z Collect Result_Dam
CURE END POSITION Θ Collect Result_Dam
CURE SPEED Collect Result_Dam
CURE START POSITION X Collect Result_Dam
CURE START POSITION Θ Collect Result_Dam
DISCHARGED SPEED OF RESIN Collect Result_Dam
DISCHARGED TIME OF RESIN(Stage1) Collect Result_Dam
DISCHARGED TIME OF RESIN(Stage2) Collect Result_Dam
DISCHARGED TIME OF RESIN(Stage3) Collect Result_Dam
Dispense Volume(Stage1) Collect Result_Dam
Dispense Volume(Stage2) Collect Result_Dam
Dispense Volume(Stage3) Collect Result_Dam
HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Dam
HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam
HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam
HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam
HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam
HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Dam
HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_D

# 데이터 전처리

In [578]:
df_concat["target"] = df_concat["target"].map({"Normal": 0, "AbNormal": 1})

In [579]:
# 1.Equipment_Dam
target_ratio = df_concat.groupby("Equipment_Dam")["target"].mean()
print(target_ratio)

Equipment_Dam
Dam dispenser #1    0.513308
Dam dispenser #2    0.469624
Name: target, dtype: float64


In [580]:
# 가중치 부여
df_concat["Equipment_Dam_weight"] = df_concat["Equipment_Dam"].map(target_ratio)
#df_concat["Equipment_Dam_weight"]
df_concat["Equipment_Dam_weight"].value_counts()


Equipment_Dam_weight
0.513308    2367
0.469624    1037
Name: count, dtype: int64

In [581]:
test_data["Equipment_Dam_weight"] = test_data["Equipment_Dam"].map(target_ratio)
test_data["Equipment_Dam_weight"].value_counts()

Equipment_Dam_weight
0.513308    10734
0.469624     6627
Name: count, dtype: int64

In [582]:
# nan 값 있는지 확인
print(test_data["Equipment_Dam_weight"].isnull().sum())

0


In [583]:
# 2. Model.Suffix_Dam
target_ratio = df_concat.groupby("Model.Suffix_Dam")["target"].mean()
print(target_ratio)

Model.Suffix_Dam
AJX75334501    0.497020
AJX75334502    0.543909
AJX75334503    0.000000
AJX75334505    0.200000
AJX75334507    0.444444
AJX75334508    0.333333
Name: target, dtype: float64


In [584]:
df_concat["Model.Suffix_Dam_weight"] = df_concat["Model.Suffix_Dam"].map(target_ratio)
df_concat["Model.Suffix_Dam_weight"].value_counts()

Model.Suffix_Dam_weight
0.497020    3020
0.543909     353
0.200000      15
0.444444       9
0.333333       6
0.000000       1
Name: count, dtype: int64

In [585]:
test_data["Model.Suffix_Dam_weight"] = test_data["Model.Suffix_Dam"].map(target_ratio)
test_data["Model.Suffix_Dam_weight"].value_counts()

Model.Suffix_Dam_weight
0.497020    14495
0.543909     1405
0.200000     1189
0.444444      137
0.000000       66
0.333333       28
Name: count, dtype: int64

In [586]:
print(test_data["Model.Suffix_Dam_weight"].isnull().sum())

41


In [587]:
# 최빈값으로 결측치 처리
mode_value = df_concat["Model.Suffix_Dam_weight"].mode()[0]
test_data["Model.Suffix_Dam_weight"].fillna(mode_value, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_data["Model.Suffix_Dam_weight"].fillna(mode_value, inplace=True)


In [588]:
# 3. Workorder_Da
target_ratio = df_concat.groupby("Workorder_Dam")["target"].mean()
print(target_ratio)

Workorder_Dam
3F1X5847-2    0.857143
3F1X9643-1    0.500000
3F1X9644-1    0.615385
3F1X9648-1    0.555556
3F1X9648-2    0.250000
                ...   
4E1X0057-1    0.400000
4E1X9167-1    0.125000
4E1X9168-1    0.500000
4E1X9169-1    0.500000
4E1X9170-1    0.600000
Name: target, Length: 421, dtype: float64


In [589]:
df_concat["Workorder_Dam_weight"] = df_concat["Workorder_Dam"].map(target_ratio)
df_concat["Workorder_Dam_weight"].value_counts()

Workorder_Dam_weight
0.500000    282
0.000000    178
0.400000    155
0.333333    144
0.600000    125
           ... 
0.230769     13
0.846154     13
0.636364     11
0.909091     11
0.272727     11
Name: count, Length: 81, dtype: int64

In [590]:
test_data["Workorder_Dam_weight"] = test_data["Workorder_Dam"].map(target_ratio)
test_data["Workorder_Dam_weight"].value_counts()

Workorder_Dam_weight
0.500000    1270
0.000000    1087
0.400000     637
0.333333     594
0.250000     520
            ... 
0.722222      27
0.807692      23
0.230769      22
0.636364      21
0.909091      19
Name: count, Length: 81, dtype: int64

In [591]:
print(test_data["Workorder_Dam_weight"].isnull().sum())

5669


In [592]:
del df_concat["Workorder_Dam"]
del df_concat["Model.Suffix_Dam"]
del test_data["Workorder_Dam"]
del test_data["Model.Suffix_Dam"]

In [593]:
# 4. CURE END POSITION X Collect Result_Dam
target_ratio = df_concat.groupby("CURE END POSITION X Collect Result_Dam")["target"].mean()
print(target_ratio)

CURE END POSITION X Collect Result_Dam
240.0     0.513308
1000.0    0.469624
Name: target, dtype: float64


In [594]:
# 1번과 동일한 값이므로 삭제
del df_concat["CURE END POSITION X Collect Result_Dam"]
print(df_concat.shape)

(3404, 143)


In [595]:
# 5. CURE END POSITION Z Collect Result_Dam
target_ratio = df_concat.groupby("CURE END POSITION Z Collect Result_Dam")["target"].mean()

print(target_ratio)

CURE END POSITION Z Collect Result_Dam
2.5     0.513308
12.5    0.469624
Name: target, dtype: float64


In [596]:
# 1번과 동일한 값이므로 삭제
del df_concat["CURE END POSITION Z Collect Result_Dam"]
print(df_concat.shape)

(3404, 142)


In [597]:
# 6. CURE END POSITION Θ Collect Result_Dam
target_ratio = df_concat.groupby("CURE END POSITION Θ Collect Result_Dam")["target"].mean()
print(target_ratio)

CURE END POSITION Θ Collect Result_Dam
-90    0.513308
 90    0.469624
Name: target, dtype: float64


In [598]:
# 1버과 동일한 값이므로 삭제
del df_concat["CURE END POSITION Θ Collect Result_Dam"]
print(df_concat.shape)

(3404, 141)


In [599]:
# 7.CURE SPEED Collect Result_Dam
target_ratio = df_concat.groupby("CURE SPEED Collect Result_Dam")["target"].mean()
print(target_ratio)

CURE SPEED Collect Result_Dam
70     0.507211
85     0.282051
95     0.250000
100    0.338710
105    0.464286
Name: target, dtype: float64


In [600]:
df_concat["CURE SPEED Collect Result_Dam_weight"] = df_concat["CURE SPEED Collect Result_Dam"].map(target_ratio)
df_concat["CURE SPEED Collect Result_Dam_weight"].value_counts()

CURE SPEED Collect Result_Dam_weight
0.507211    3259
0.338710      62
0.282051      39
0.464286      28
0.250000      16
Name: count, dtype: int64

In [601]:
test_data["CURE SPEED Collect Result_Dam_weight"] = test_data["CURE SPEED Collect Result_Dam"].map(target_ratio)
test_data["CURE SPEED Collect Result_Dam_weight"].value_counts()

CURE SPEED Collect Result_Dam_weight
0.507211    14677
0.282051      974
0.338710      942
0.464286      687
0.250000       81
Name: count, dtype: int64

In [602]:
print(test_data["CURE SPEED Collect Result_Dam_weight"].isnull().sum())

0


In [603]:
# 8. CURE START POSITION X Collect Result_Dam
target_ratio = df_concat.groupby("CURE START POSITION X Collect Result_Dam")["target"].mean()
print(target_ratio)

CURE START POSITION X Collect Result_Dam
280     0.469624
1030    0.513308
Name: target, dtype: float64


In [604]:
# 1번과 동일한 값이므로 삭제
del df_concat["CURE START POSITION X Collect Result_Dam"]
print(df_concat.shape)

(3404, 141)


In [605]:
# 9. CURE START POSITION Θ Collect Result_Dam
target_ratio = df_concat.groupby("CURE START POSITION Θ Collect Result_Dam")["target"].mean()
print(target_ratio)

CURE START POSITION Θ Collect Result_Dam
-90    0.513308
 90    0.469624
Name: target, dtype: float64


In [606]:
# 1번과 동일한 값이므로 삭제
del df_concat["CURE START POSITION Θ Collect Result_Dam"]
print(df_concat.shape)

(3404, 140)


In [607]:
# 10. DISCHARGED SPEED OF RESIN Collect Result_Dam
target_ratio = df_concat.groupby("DISCHARGED SPEED OF RESIN Collect Result_Dam")["target"].mean()
print(target_ratio)

DISCHARGED SPEED OF RESIN Collect Result_Dam
10    0.547187
16    0.413333
Name: target, dtype: float64


In [608]:
df_concat["DISCHARGED SPEED OF RESIN Collect Result_Dam_weight"] = df_concat["DISCHARGED SPEED OF RESIN Collect Result_Dam"].map(target_ratio)
df_concat["DISCHARGED SPEED OF RESIN Collect Result_Dam_weight"].value_counts()

DISCHARGED SPEED OF RESIN Collect Result_Dam_weight
0.547187    2204
0.413333    1200
Name: count, dtype: int64

In [609]:
test_data["DISCHARGED SPEED OF RESIN Collect Result_Dam_weight"] = test_data["DISCHARGED SPEED OF RESIN Collect Result_Dam"].map(target_ratio)
test_data["DISCHARGED SPEED OF RESIN Collect Result_Dam_weight"].value_counts()

DISCHARGED SPEED OF RESIN Collect Result_Dam_weight
0.547187    8944
0.413333    8415
Name: count, dtype: int64

In [610]:
print(test_data["DISCHARGED SPEED OF RESIN Collect Result_Dam_weight"].isnull().sum())

2


In [611]:
# 최빈값으로 결측치 처리
mode_value = test_data["DISCHARGED SPEED OF RESIN Collect Result_Dam_weight"].mode()[0]
test_data["DISCHARGED SPEED OF RESIN Collect Result_Dam_weight"].fillna(mode_value, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_data["DISCHARGED SPEED OF RESIN Collect Result_Dam_weight"].fillna(mode_value, inplace=True)


In [612]:
# 11. DISCHARGED TIME OF RESIN(Stage1) Collect Result_Dam
target_ratio = df_concat.groupby("DISCHARGED TIME OF RESIN(Stage1) Collect Result_Dam")["target"].mean()
print(target_ratio)

DISCHARGED TIME OF RESIN(Stage1) Collect Result_Dam
9.6     0.588905
9.7     0.549869
11.6    0.857143
13.1    0.600000
13.2    0.417824
13.5    0.157895
13.6    0.250000
13.7    1.000000
13.8    0.424242
14.2    0.425373
14.3    0.500000
14.7    0.285714
17.0    0.524055
17.1    0.000000
21.2    0.000000
21.3    0.400000
Name: target, dtype: float64


In [613]:
df_concat["DISCHARGED TIME OF RESIN(Stage1) Collect Result_Dam_weight"] = df_concat["DISCHARGED TIME OF RESIN(Stage1) Collect Result_Dam"].map(target_ratio)
df_concat["DISCHARGED TIME OF RESIN(Stage1) Collect Result_Dam_weight"].value_counts()

DISCHARGED TIME OF RESIN(Stage1) Collect Result_Dam_weight
0.417824    864
0.549869    762
0.588905    703
0.524055    582
0.400000    140
0.425373    134
0.285714     70
0.600000     55
0.424242     33
0.250000     20
0.157895     19
0.857143     14
0.500000      4
0.000000      3
1.000000      1
Name: count, dtype: int64

In [614]:
test_data["DISCHARGED TIME OF RESIN(Stage1) Collect Result_Dam_weight"] = test_data["DISCHARGED TIME OF RESIN(Stage1) Collect Result_Dam"].map(target_ratio)
test_data["DISCHARGED TIME OF RESIN(Stage1) Collect Result_Dam_weight"].value_counts()

DISCHARGED TIME OF RESIN(Stage1) Collect Result_Dam_weight
0.417824    4767
0.400000    2517
0.549869    2292
0.524055    2079
0.588905    1955
0.285714     593
0.425373     558
0.600000     378
0.157895     129
0.424242     111
0.250000     110
0.857143      70
0.000000      33
0.500000      11
1.000000       5
Name: count, dtype: int64

In [615]:
print(test_data["DISCHARGED TIME OF RESIN(Stage1) Collect Result_Dam_weight"].isnull().sum())

1753


In [616]:
# 평균값으로 결측치 처리
mean_value = test_data["DISCHARGED TIME OF RESIN(Stage1) Collect Result_Dam_weight"].mean()
test_data["DISCHARGED TIME OF RESIN(Stage1) Collect Result_Dam_weight"].fillna(mean_value, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_data["DISCHARGED TIME OF RESIN(Stage1) Collect Result_Dam_weight"].fillna(mean_value, inplace=True)


In [617]:
# 12. DISCHARGED TIME OF RESIN(Stage2) Collect Result_Dam
target_ratio = df_concat.groupby("DISCHARGED TIME OF RESIN(Stage2) Collect Result_Dam")["target"].mean()
print(target_ratio)

DISCHARGED TIME OF RESIN(Stage2) Collect Result_Dam
3.8    0.533333
3.9    0.588595
4.8    0.637500
4.9    0.541697
5.0    0.512397
5.8    1.000000
5.9    1.000000
6.5    0.625000
6.6    0.451613
6.7    0.495413
7.1    0.250000
7.2    0.333333
7.5    0.477876
7.6    0.396396
7.7    0.380952
7.9    0.642857
8.0    0.750000
8.1    0.338308
8.2    0.406250
8.3    0.403670
8.4    0.375887
8.5    0.375000
8.6    0.266667
Name: target, dtype: float64


In [618]:
df_concat["DISCHARGED TIME OF RESIN(Stage2) Collect Result_Dam_weight"] = df_concat["DISCHARGED TIME OF RESIN(Stage2) Collect Result_Dam"].map(target_ratio)
df_concat["DISCHARGED TIME OF RESIN(Stage2) Collect Result_Dam_weight"].value_counts()

DISCHARGED TIME OF RESIN(Stage2) Collect Result_Dam_weight
0.541697    1379
0.588595     491
0.403670     218
0.338308     201
0.406250     160
0.375887     141
0.512397     121
0.477876     113
0.396396     111
0.495413     109
0.451613      93
0.637500      80
0.380952      63
0.375000      32
0.625000      32
0.266667      15
0.533333      15
0.642857      14
0.333333       6
0.250000       4
0.750000       4
1.000000       2
Name: count, dtype: int64

In [619]:
test_data["DISCHARGED TIME OF RESIN(Stage2) Collect Result_Dam_weight"] = test_data["DISCHARGED TIME OF RESIN(Stage2) Collect Result_Dam"].map(target_ratio)
test_data["DISCHARGED TIME OF RESIN(Stage2) Collect Result_Dam_weight"].value_counts()

DISCHARGED TIME OF RESIN(Stage2) Collect Result_Dam_weight
0.541697    4383
0.588595    1491
0.375887    1473
0.451613    1156
0.403670     966
0.338308     920
0.625000     728
0.406250     709
0.266667     679
0.375000     676
0.512397     437
0.477876     436
0.495413     430
0.396396     391
0.380952     237
0.637500     222
0.533333      46
0.333333      35
0.750000      16
0.250000      15
0.642857      14
1.000000       4
Name: count, dtype: int64

In [620]:
print(test_data["DISCHARGED TIME OF RESIN(Stage2) Collect Result_Dam_weight"].isnull().sum())

1897


In [621]:
mean_value = test_data["DISCHARGED TIME OF RESIN(Stage2) Collect Result_Dam_weight"].mean()
test_data["DISCHARGED TIME OF RESIN(Stage2) Collect Result_Dam_weight"].fillna(mean_value, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_data["DISCHARGED TIME OF RESIN(Stage2) Collect Result_Dam_weight"].fillna(mean_value, inplace=True)


In [622]:
#13. DISCHARGED TIME OF RESIN(Stage3) Collect Result_Dam
target_ratio = df_concat.groupby("DISCHARGED TIME OF RESIN(Stage3) Collect Result_Dam")["target"].mean()
print(target_ratio)

DISCHARGED TIME OF RESIN(Stage3) Collect Result_Dam
9.6     0.597222
9.7     0.564103
9.8     0.000000
11.5    1.000000
11.6    0.833333
13.1    0.400000
13.2    0.434014
13.5    0.090909
13.6    0.250000
13.7    0.250000
13.8    0.466667
14.2    0.416667
14.3    0.470588
14.7    0.311475
14.8    0.111111
17.0    0.516878
17.1    0.550459
21.2    0.250000
21.3    0.398551
Name: target, dtype: float64


In [623]:
df_concat["DISCHARGED TIME OF RESIN(Stage3) Collect Result_Dam_weight"] = df_concat["DISCHARGED TIME OF RESIN(Stage3) Collect Result_Dam"].map(target_ratio)
df_concat["DISCHARGED TIME OF RESIN(Stage3) Collect Result_Dam_weight"].value_counts()

DISCHARGED TIME OF RESIN(Stage3) Collect Result_Dam_weight
0.564103    1248
0.434014     735
0.516878     474
0.597222     216
0.400000     175
0.398551     138
0.550459     109
0.416667      96
0.311475      61
0.470588      51
0.250000      36
0.466667      30
0.833333      12
0.090909      11
0.111111       9
1.000000       2
0.000000       1
Name: count, dtype: int64

In [624]:
test_data["DISCHARGED TIME OF RESIN(Stage3) Collect Result_Dam_weight"] = test_data["DISCHARGED TIME OF RESIN(Stage3) Collect Result_Dam"].map(target_ratio)
test_data["DISCHARGED TIME OF RESIN(Stage3) Collect Result_Dam_weight"].value_counts()

DISCHARGED TIME OF RESIN(Stage3) Collect Result_Dam_weight
0.434014    4010
0.564103    3670
0.398551    2450
0.311475    2075
0.516878    1693
0.400000    1100
0.597222     576
0.550459     397
0.416667     395
0.111111     271
0.250000     257
0.470588     211
0.466667      97
0.090909      86
0.833333      63
1.000000       7
0.000000       1
Name: count, dtype: int64

In [625]:
print(test_data["DISCHARGED TIME OF RESIN(Stage3) Collect Result_Dam_weight"].isnull().sum())

2


In [626]:
#최빈값으로 결측치 처리
mode_value = test_data["DISCHARGED TIME OF RESIN(Stage3) Collect Result_Dam_weight"].mode()[0]
test_data["DISCHARGED TIME OF RESIN(Stage3) Collect Result_Dam_weight"].fillna(mode_value, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_data["DISCHARGED TIME OF RESIN(Stage3) Collect Result_Dam_weight"].fillna(mode_value, inplace=True)


In [627]:
# 14. Dispense Volume(Stage1) Collect Result_Dam
target_ratio = df_concat.groupby("Dispense Volume(Stage1) Collect Result_Dam")["target"].mean()
print(target_ratio)

Dispense Volume(Stage1) Collect Result_Dam
0.67    0.568601
0.81    0.857143
0.91    0.600000
0.92    0.417824
0.94    0.157895
0.95    0.285714
0.96    0.424242
0.99    0.425373
1.00    0.500000
1.02    0.244898
1.19    0.523156
1.48    0.000000
1.49    0.400000
1.61    0.380952
Name: target, dtype: float64


In [628]:
df_concat["Dispense Volume(Stage1) Collect Result_Dam_weight"] = df_concat["Dispense Volume(Stage1) Collect Result_Dam"].map(target_ratio)
df_concat["Dispense Volume(Stage1) Collect Result_Dam_weight"].value_counts()

Dispense Volume(Stage1) Collect Result_Dam_weight
0.568601    1465
0.417824     864
0.523156     583
0.400000     140
0.425373     134
0.600000      55
0.244898      49
0.424242      33
0.285714      21
0.380952      21
0.157895      19
0.857143      14
0.500000       4
0.000000       2
Name: count, dtype: int64

In [629]:
test_data["Dispense Volume(Stage1) Collect Result_Dam_weight"] = test_data["Dispense Volume(Stage1) Collect Result_Dam"].map(target_ratio)
test_data["Dispense Volume(Stage1) Collect Result_Dam_weight"].value_counts()

Dispense Volume(Stage1) Collect Result_Dam_weight
0.568601    4247
0.417824    3440
0.400000    2515
0.523156    2090
0.425373     542
0.380952     328
0.244898     265
0.600000     253
0.157895     129
0.285714     115
0.424242     111
0.857143      70
0.000000      22
0.500000      11
Name: count, dtype: int64

In [630]:
print(test_data["Dispense Volume(Stage1) Collect Result_Dam_weight"].isnull().sum())

3223


In [631]:
del df_concat["Dispense Volume(Stage1) Collect Result_Dam"]
del test_data["Dispense Volume(Stage1) Collect Result_Dam"]
del df_concat["Dispense Volume(Stage1) Collect Result_Dam_weight"]
del test_data["Dispense Volume(Stage1) Collect Result_Dam_weight"]

In [632]:
# 15. Dispense Volume(Stage2) Collect Result_Dam
target_ratio = df_concat.groupby("Dispense Volume(Stage2) Collect Result_Dam")["target"].mean()
print(target_ratio)

Dispense Volume(Stage2) Collect Result_Dam
0.26    0.533333
0.27    0.588595
0.33    0.637500
0.34    0.541697
0.35    0.512397
0.40    1.000000
0.41    1.000000
0.45    0.625000
0.46    0.475248
0.49    0.250000
0.50    0.333333
0.52    0.477876
0.53    0.390805
0.55    0.642857
0.56    0.346341
0.57    0.406250
0.58    0.397143
0.59    0.250000
0.60    0.363636
0.92    0.222222
0.93    0.750000
0.94    0.000000
Name: target, dtype: float64


In [633]:
df_concat["Dispense Volume(Stage2) Collect Result_Dam_weight"] = df_concat["Dispense Volume(Stage2) Collect Result_Dam"].map(target_ratio)
df_concat["Dispense Volume(Stage2) Collect Result_Dam_weight"].value_counts()

Dispense Volume(Stage2) Collect Result_Dam_weight
0.541697    1379
0.588595     491
0.397143     350
0.346341     205
0.475248     202
0.390805     174
0.406250     160
0.512397     121
0.477876     113
0.637500      80
0.625000      32
0.250000      28
0.533333      15
0.642857      14
0.363636      11
0.222222       9
0.750000       8
0.333333       6
0.000000       4
1.000000       2
Name: count, dtype: int64

In [634]:
test_data["Dispense Volume(Stage2) Collect Result_Dam_weight"] = test_data["Dispense Volume(Stage2) Collect Result_Dam"].map(target_ratio)
test_data["Dispense Volume(Stage2) Collect Result_Dam_weight"].value_counts()

Dispense Volume(Stage2) Collect Result_Dam_weight
0.541697    4383
0.397143    1923
0.588595    1491
0.346341     930
0.406250     703
0.475248     672
0.390805     628
0.222222     512
0.512397     437
0.477876     436
0.250000     377
0.363636     360
0.000000     319
0.750000     314
0.637500     222
0.625000     188
0.533333      46
0.333333      35
0.642857      14
1.000000       4
Name: count, dtype: int64

In [635]:
print(test_data["Dispense Volume(Stage2) Collect Result_Dam_weight"].isnull().sum())

3367


In [636]:
del df_concat["Dispense Volume(Stage2) Collect Result_Dam"]
del test_data["Dispense Volume(Stage2) Collect Result_Dam"]
del df_concat["Dispense Volume(Stage2) Collect Result_Dam_weight"]
del test_data["Dispense Volume(Stage2) Collect Result_Dam_weight"]

In [637]:
# 16. Dispense Volume(Stage3) Collect Result_Dam
target_ratio = df_concat.groupby("Dispense Volume(Stage3) Collect Result_Dam")["target"].mean()
print(target_ratio)

Dispense Volume(Stage3) Collect Result_Dam
0.67    0.568989
0.68    0.000000
0.80    1.000000
0.81    0.833333
0.91    0.400000
0.92    0.434014
0.94    0.090909
0.95    0.250000
0.96    0.466667
0.99    0.416667
1.00    0.470588
1.02    0.272727
1.03    0.000000
1.19    0.523156
1.48    0.250000
1.49    0.398551
1.61    0.411765
1.62    0.250000
Name: target, dtype: float64


In [638]:
df_concat["Dispense Volume(Stage3) Collect Result_Dam_weight"] = df_concat["Dispense Volume(Stage3) Collect Result_Dam"].map(target_ratio)
df_concat["Dispense Volume(Stage3) Collect Result_Dam_weight"].value_counts()

Dispense Volume(Stage3) Collect Result_Dam_weight
0.568989    1464
0.434014     735
0.523156     583
0.400000     175
0.398551     138
0.416667      96
0.470588      51
0.272727      44
0.250000      40
0.466667      30
0.411765      17
0.833333      12
0.090909      11
0.000000       6
1.000000       2
Name: count, dtype: int64

In [639]:
test_data["Dispense Volume(Stage3) Collect Result_Dam_weight"] = test_data["Dispense Volume(Stage3) Collect Result_Dam"].map(target_ratio)
test_data["Dispense Volume(Stage3) Collect Result_Dam_weight"].value_counts()

Dispense Volume(Stage3) Collect Result_Dam_weight
0.568989    4246
0.434014    2878
0.398551    2452
0.523156    2090
0.272727    1066
0.411765    1009
0.400000     778
0.250000     393
0.416667     381
0.470588     209
0.000000     136
0.466667      97
0.090909      86
0.833333      63
1.000000       7
Name: count, dtype: int64

In [640]:
print(test_data["Dispense Volume(Stage3) Collect Result_Dam_weight"].isnull().sum())

1470


In [641]:
# 평균값으로 결측치 처리
mean_value = test_data["Dispense Volume(Stage3) Collect Result_Dam_weight"].mean()
test_data["Dispense Volume(Stage3) Collect Result_Dam_weight"].fillna(mean_value, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_data["Dispense Volume(Stage3) Collect Result_Dam_weight"].fillna(mean_value, inplace=True)


In [642]:
# 16. HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Dam
target_ratio = df_concat.groupby("HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Dam")["target"].mean()
print(target_ratio)

HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Dam
161.20    0.342466
161.70    0.380000
162.40    0.421348
162.70    0.448161
462.20    0.400000
462.50    0.461538
462.75    0.576923
463.00    0.607502
463.50    0.461538
463.80    0.417910
465.10    0.604651
465.30    0.477876
465.50    0.363636
548.90    0.272727
549.40    0.416667
550.30    0.277778
550.40    0.473684
550.60    0.364130
Name: target, dtype: float64


In [643]:
df_concat["HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Dam_weight"] = df_concat["HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Dam"].map(target_ratio)
df_concat["HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Dam_weight"].value_counts()

HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Dam_weight
0.607502    1093
0.461538     416
0.448161     299
0.473684     285
0.604651     258
0.417910     201
0.364130     184
0.421348     178
0.477876     113
0.272727      77
0.342466      73
0.363636      66
0.576923      52
0.380000      50
0.416667      36
0.277778      18
0.400000       5
Name: count, dtype: int64

In [644]:
test_data["HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Dam_weight"] = test_data["HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Dam"].map(target_ratio)
test_data["HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Dam_weight"].value_counts()

HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Dam_weight
0.607502    2969
0.461538    1583
0.421348    1539
0.473684    1051
0.448161    1016
0.277778     777
0.417910     767
0.364130     766
0.604651     707
0.477876     490
0.342466     373
0.272727     338
0.363636     333
0.416667     224
0.380000     177
0.576923     154
0.400000       9
Name: count, dtype: int64

In [645]:
print(test_data["HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Dam_weight"].isnull().sum())

4088


In [646]:
del df_concat["HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Dam"]
del test_data["HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Dam"]
del df_concat["HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Dam_weight"]
del test_data["HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Dam_weight"]

In [647]:
# 17. HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam
target_ratio = df_concat.groupby("HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam")["target"].mean()
print(target_ratio)

HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam
159.5    0.459459
160.0    0.629412
160.5    0.562152
160.8    0.417910
462.2    0.272727
462.7    0.416667
463.6    0.277778
463.7    0.469965
463.9    0.364130
464.0    1.000000
464.2    0.342466
464.7    0.380000
465.4    0.421348
465.7    0.448161
551.1    0.000000
551.3    0.567568
552.0    0.363636
Name: target, dtype: float64


In [648]:
df_concat["HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam_weight"] = df_concat["HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam"].map(target_ratio)
df_concat["HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam_weight"].value_counts()

HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam_weight
0.562152    1078
0.567568     370
0.629412     340
0.448161     299
0.469965     283
0.417910     201
0.364130     184
0.421348     178
0.459459     148
0.272727      77
0.342466      73
0.363636      66
0.380000      50
0.416667      36
0.277778      18
1.000000       2
0.000000       1
Name: count, dtype: int64

In [649]:
test_data["HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam_weight"] = test_data["HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam"].map(target_ratio)
test_data["HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam_weight"].value_counts()

HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam_weight
0.562152    3289
0.567568    1195
0.469965    1050
0.448161     989
0.629412     860
0.421348     850
0.417910     767
0.364130     766
0.277778     696
0.459459     566
0.342466     373
0.272727     338
0.363636     333
0.416667     224
0.380000     177
1.000000      34
0.000000       2
Name: count, dtype: int64

In [650]:
print(test_data["HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam_weight"].isnull().sum())

4852


In [651]:
del df_concat["HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam"]
del test_data["HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam"]
del df_concat["HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam_weight"]
del test_data["HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Dam_weight"]

In [652]:
# 18. HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam
target_ratio = df_concat.groupby("HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam")["target"].mean()
print(target_ratio)

HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam
159.8     0.272727
160.3     0.416667
161.2     0.277778
161.3     0.473684
161.5     0.364130
377.3     0.443038
377.5     0.589327
550.5     0.342466
551.0     0.380000
551.7     0.421348
552.0     0.448161
1269.0    0.603846
1271.3    0.477477
1271.8    0.363636
Name: target, dtype: float64


In [653]:
df_concat["HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam_weight"] = df_concat["HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam"].map(target_ratio)
df_concat["HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam_weight"].value_counts()

HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam_weight
0.589327    1293
0.443038     474
0.448161     299
0.473684     285
0.603846     260
0.364130     184
0.421348     178
0.477477     111
0.272727      77
0.342466      73
0.363636      66
0.380000      50
0.416667      36
0.277778      18
Name: count, dtype: int64

In [654]:
test_data["HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam_weight"] = test_data["HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam"].map(target_ratio)
test_data["HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam_weight"].value_counts()

HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam_weight
0.589327    3693
0.443038    1789
0.448161    1702
0.473684    1084
0.421348     850
0.364130     766
0.603846     737
0.477477     460
0.342466     373
0.272727     338
0.363636     333
0.416667     224
0.380000     177
0.277778     114
Name: count, dtype: int64

In [655]:
print(test_data["HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam_weight"].isnull().sum())

4721


In [656]:
del df_concat["HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam"]
del test_data["HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam"]
del df_concat["HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam_weight"]
del test_data["HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Dam_weight"]

In [657]:
# 19. HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam
target_ratio = df_concat.groupby("HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam")["target"].mean()
print(target_ratio)

HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam
377.0     0.589327
377.3     0.443038
377.6     0.405000
1268.2    0.557692
1268.4    0.608187
1268.8    0.648649
1270.7    0.435028
1271.8    0.421667
Name: target, dtype: float64


In [658]:
df_concat["HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam_weight"] = df_concat["HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam"].map(target_ratio)
df_concat["HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam_weight"].value_counts()

HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam_weight
0.589327    1293
0.421667     600
0.405000     600
0.443038     474
0.435028     177
0.608187     171
0.557692      52
0.648649      37
Name: count, dtype: int64

In [659]:
test_data["HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam_weight"] = test_data["HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam"].map(target_ratio)
test_data["HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam_weight"].value_counts()

HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam_weight
0.405000    4653
0.421667    4442
0.589327    3693
0.443038    1929
0.435028     793
0.608187     445
0.557692     187
0.648649     105
Name: count, dtype: int64

In [660]:
print(test_data["HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam_weight"].isnull().sum())

1114


In [661]:
mean_value = test_data["HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam_weight"].mean()
test_data["HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam_weight"].fillna(mean_value, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_data["HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Dam_weight"].fillna(mean_value, inplace=True)


In [662]:
# 20. HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam
target_ratio = df_concat.groupby("HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam")["target"].mean()
print(target_ratio)

HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam
377.1     0.405000
377.3     0.443038
377.5     0.589327
1269.0    0.603846
1271.3    0.477477
1271.8    0.363636
1383.9    0.421667
Name: target, dtype: float64


In [663]:
df_concat["HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam_weight"] = df_concat["HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam"].map(target_ratio)
df_concat["HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam_weight"].value_counts()

HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam_weight
0.589327    1293
0.421667     600
0.405000     600
0.443038     474
0.603846     260
0.477477     111
0.363636      66
Name: count, dtype: int64

In [664]:
test_data["HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam_weight"] = test_data["HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam"].map(target_ratio)
test_data["HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam_weight"].value_counts()

HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam_weight
0.405000    4712
0.589327    3693
0.421667    3004
0.443038    1870
0.603846     737
0.477477     460
0.363636     333
Name: count, dtype: int64

In [665]:
print(test_data["HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam_weight"].isnull().sum())

2552


In [666]:
mean_value = test_data["HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam_weight"].mean()
test_data["HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam_weight"].fillna(mean_value, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_data["HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Dam_weight"].fillna(mean_value, inplace=True)


In [667]:
# 21. HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Dam
target_ratio = df_concat.groupby("HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Dam")["target"].mean()
print(target_ratio)

HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Dam
282.15     0.487912
284.60     0.550388
284.80     0.593133
377.60     0.429648
378.00     0.356436
1271.80    0.421667
Name: target, dtype: float64


In [668]:
df_concat["HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Dam_weight"] = df_concat["HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Dam"].map(target_ratio)
df_concat["HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Dam_weight"].value_counts()

HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Dam_weight
0.593133    1165
0.487912     910
0.421667     600
0.429648     398
0.356436     202
0.550388     129
Name: count, dtype: int64

In [669]:
test_data["HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Dam_weight"] = test_data["HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Dam"].map(target_ratio)
test_data["HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Dam_weight"].value_counts()

HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Dam_weight
0.421667    4442
0.593133    3353
0.487912    3318
0.429648    3184
0.356436    1469
0.550388     341
Name: count, dtype: int64

In [670]:
print(test_data["HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Dam_weight"].isnull().sum())

1254


In [671]:
mean_value = test_data["HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Dam_weight"].mean()
test_data["HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Dam_weight"].fillna(mean_value, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_data["HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Dam_weight"].fillna(mean_value, inplace=True)


In [672]:
# 22. HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Dam
target_ratio = df_concat.groupby("HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Dam")["target"].mean()
print(target_ratio)

HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Dam
273.800    0.472222
274.000    0.583333
274.200    0.400000
274.330    0.354839
274.400    0.337349
274.510    0.232558
274.780    0.425000
280.894    0.445946
281.095    0.465649
281.222    0.560000
281.226    0.384615
281.300    0.500000
281.324    0.258065
281.413    0.235294
281.424    0.000000
281.430    0.428571
281.513    0.142857
281.517    0.333333
282.150    0.487912
282.343    0.436170
282.500    0.454545
284.600    0.550388
284.800    0.593133
Name: target, dtype: float64


In [673]:
df_concat["HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Dam_weight"] = df_concat["HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Dam"].map(target_ratio)
df_concat["HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Dam_weight"].value_counts()

HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Dam_weight
0.593133    1165
0.487912     910
0.436170     188
0.454545     165
0.445946     148
0.465649     131
0.550388     129
0.337349      83
0.400000      80
0.354839      62
0.232558      43
0.425000      40
0.583333      36
0.333333      36
0.472222      36
0.428571      35
0.235294      34
0.258065      31
0.560000      25
0.384615      13
0.142857       7
0.500000       4
0.000000       3
Name: count, dtype: int64

In [674]:
test_data["HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Dam_weight"] = test_data["HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Dam"].map(target_ratio)
test_data["HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Dam_weight"].value_counts()

HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Dam_weight
0.487912    5718
0.593133    3353
0.354839    1283
0.232558    1247
0.454545     883
0.436170     835
0.465649     542
0.445946     490
0.400000     352
0.337349     347
0.550388     341
0.428571     183
0.583333     182
0.472222     180
0.333333     170
0.235294     160
0.425000     154
0.258065     150
0.560000      70
0.384615      55
0.500000      26
0.000000      14
0.142857       8
Name: count, dtype: int64

In [675]:
print(test_data["HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Dam_weight"].isnull().sum())

618


In [676]:
# 최빈값으로 결측치 처리
mode_value = test_data["HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Dam_weight"].mode()[0]
test_data["HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Dam_weight"].fillna(mode_value, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_data["HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Dam_weight"].fillna(mode_value, inplace=True)


In [677]:
# 22. HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Dam
target_ratio = df_concat.groupby("HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Dam")["target"].mean()
print(target_ratio)

HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Dam
273.800    0.472222
274.000    0.583333
274.200    0.400000
274.330    0.354839
274.400    0.337349
274.510    0.232558
274.780    0.425000
280.894    0.445946
281.095    0.465649
281.222    0.560000
281.226    0.384615
281.300    0.500000
281.324    0.258065
281.413    0.235294
281.424    0.000000
281.430    0.428571
281.513    0.142857
281.517    0.333333
282.150    0.487912
282.343    0.436170
282.500    0.454545
284.600    0.550388
284.800    0.593133
Name: target, dtype: float64


In [678]:
# 22번과 동일한 값이므로 삭제
del df_concat["HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Dam"]
print(df_concat.shape)

(3404, 143)


In [679]:
# HEAD Standby Position X Collect Result_Dam
target_ratio = df_concat.groupby("HEAD Standby Position X Collect Result_Dam")["target"].mean()
print(target_ratio) 

HEAD Standby Position X Collect Result_Dam
257.000    0.547187
273.800    0.472222
274.000    0.583333
274.200    0.400000
274.330    0.354839
274.400    0.337349
274.510    0.232558
274.780    0.425000
280.894    0.445946
281.095    0.465649
281.222    0.560000
281.226    0.384615
281.300    0.500000
281.324    0.258065
281.413    0.235294
281.424    0.000000
281.430    0.428571
281.513    0.142857
281.517    0.333333
282.343    0.436170
282.500    0.454545
Name: target, dtype: float64


In [680]:
# 22번과 동일한 값이므로 삭제
del df_concat["HEAD Standby Position X Collect Result_Dam"]
print(df_concat.shape)

(3404, 142)


In [681]:
# 24. HEAD Standby Position Y Collect Result_Dam
target_ratio = df_concat.groupby("HEAD Standby Position Y Collect Result_Dam")["target"].mean()
print(target_ratio)

HEAD Standby Position Y Collect Result_Dam
66     0.547187
257    0.413333
Name: target, dtype: float64


In [682]:
# 10번과 동일한 값이므로 삭제
del df_concat["HEAD Standby Position Y Collect Result_Dam"]
print(df_concat.shape)

(3404, 141)


In [683]:
# 25. HEAD Standby Position Z Collect Result_Dam
target_ratio = df_concat.groupby("HEAD Standby Position Z Collect Result_Dam")["target"].mean()
print(target_ratio)

HEAD Standby Position Z Collect Result_Dam
0     0.547187
66    0.413333
Name: target, dtype: float64


In [684]:
# 10번과 동일한 값이므로 삭제
del df_concat["HEAD Standby Position Z Collect Result_Dam"]
print(df_concat.shape)

(3404, 140)


In [685]:
# 26. Head Clean Position X Collect Result_Dam
target_ratio = df_concat.groupby("Head Clean Position X Collect Result_Dam")["target"].mean()
print(target_ratio)

Head Clean Position X Collect Result_Dam
0.0      0.413333
127.5    0.547187
Name: target, dtype: float64


In [686]:
# 10번과 동일한 값이므로 삭제
del df_concat["Head Clean Position X Collect Result_Dam"]
print(df_concat.shape)

(3404, 139)


In [687]:
# 27. Head Clean Position Y Collect Result_Dam
target_ratio = df_concat.groupby("Head Clean Position Y Collect Result_Dam")["target"].mean()
print(target_ratio)

Head Clean Position Y Collect Result_Dam
66.0     0.547187
127.5    0.413333
Name: target, dtype: float64


In [688]:
# 10번과 동일한 값이므로 삭제
del df_concat["Head Clean Position Y Collect Result_Dam"]
print(df_concat.shape)

(3404, 138)


In [689]:
# 28. Head Clean Position Z Collect Result_Dam
target_ratio = df_concat.groupby("Head Clean Position Z Collect Result_Dam")["target"].mean()
print(target_ratio)

Head Clean Position Z Collect Result_Dam
66.00     0.413333
124.50    0.682927
130.85    0.487912
133.50    0.585794
Name: target, dtype: float64


In [690]:
df_concat["Head Clean Position Z Collect Result_Dam_weight"] = df_concat["Head Clean Position Z Collect Result_Dam"].map(target_ratio)
df_concat["Head Clean Position Z Collect Result_Dam_weight"].value_counts()

Head Clean Position Z Collect Result_Dam_weight
0.585794    1253
0.413333    1200
0.487912     910
0.682927      41
Name: count, dtype: int64

In [691]:
test_data["Head Clean Position Z Collect Result_Dam_weight"] = test_data["Head Clean Position Z Collect Result_Dam"].map(target_ratio)
test_data["Head Clean Position Z Collect Result_Dam_weight"].value_counts()

Head Clean Position Z Collect Result_Dam_weight
0.413333    10349
0.585794     3513
0.487912     3317
0.682927      182
Name: count, dtype: int64

In [692]:
print(test_data["Head Clean Position Z Collect Result_Dam_weight"].isnull().sum())

0


In [693]:
# 29. Head Purge Position X Collect Result_Dam
target_ratio = df_concat.groupby("Head Purge Position X Collect Result_Dam")["target"].mean()
print(target_ratio)

Head Purge Position X Collect Result_Dam
118.85    0.000000
124.00    0.387863
130.85    0.425610
257.00    0.547187
Name: target, dtype: float64


In [694]:
df_concat["Head Purge Position X Collect Result_Dam_weight"] = df_concat["Head Purge Position X Collect Result_Dam"].map(target_ratio)
df_concat["Head Purge Position X Collect Result_Dam_weight"].value_counts()

Head Purge Position X Collect Result_Dam_weight
0.547187    2204
0.425610     820
0.387863     379
0.000000       1
Name: count, dtype: int64

In [695]:
# 1개만 데이터 있는 값이 있어서 무의미 하다고 생각하여 삭제
del df_concat["Head Purge Position X Collect Result_Dam"]
del df_concat["Head Purge Position X Collect Result_Dam_weight"]
print(df_concat.shape)

(3404, 138)


In [696]:
# 30. Head Purge Position Y Collect Result_Dam
target_ratio = df_concat.groupby("Head Purge Position Y Collect Result_Dam")["target"].mean()
print(target_ratio)

Head Purge Position Y Collect Result_Dam
66     0.547187
257    0.413333
Name: target, dtype: float64


In [697]:
# 10번과 동일한 값이므로 삭제
del df_concat["Head Purge Position Y Collect Result_Dam"]
print(df_concat.shape)

(3404, 137)


In [698]:
# 31. Head Purge Position Z Collect Result_Dam
target_ratio = df_concat.groupby("Head Purge Position Z Collect Result_Dam")["target"].mean()
print(target_ratio)

Head Purge Position Z Collect Result_Dam
66.00     0.413333
124.50    0.682927
130.85    0.487321
133.50    0.585987
Name: target, dtype: float64


In [699]:
# 28번과 비슷한 값이므로 삭제
del df_concat["Head Purge Position Z Collect Result_Dam"]
print(df_concat.shape)

(3404, 136)


In [700]:
# 32. Head Zero Position X Collect Result_Dam
target_ratio = df_concat.groupby("Head Zero Position X Collect Result_Dam")["target"].mean()
print(target_ratio)

Head Zero Position X Collect Result_Dam
130.85    0.413333
505.00    0.547187
Name: target, dtype: float64


In [701]:
# 10번과 동일한 값이므로 삭제
del df_concat["Head Zero Position X Collect Result_Dam"]
print(df_concat.shape)

(3404, 135)


In [702]:
# 33. Head Zero Position Y Collect Result_Dam
target_ratio = df_concat.groupby("Head Zero Position Y Collect Result_Dam")["target"].mean()
print(target_ratio)

Head Zero Position Y Collect Result_Dam
300.0    0.522124
303.5    0.597279
505.0    0.413333
Name: target, dtype: float64


In [703]:
df_concat["Head Zero Position Y Collect Result_Dam_weight"] = df_concat["Head Zero Position Y Collect Result_Dam"].map(target_ratio)
df_concat["Head Zero Position Y Collect Result_Dam_weight"].value_counts()

Head Zero Position Y Collect Result_Dam_weight
0.522124    1469
0.413333    1200
0.597279     735
Name: count, dtype: int64

In [704]:
test_data["Head Zero Position Y Collect Result_Dam_weight"] = test_data["Head Zero Position Y Collect Result_Dam"].map(target_ratio)
test_data["Head Zero Position Y Collect Result_Dam_weight"].value_counts()

Head Zero Position Y Collect Result_Dam_weight
0.413333    10349
0.522124     4951
0.597279     2061
Name: count, dtype: int64

In [705]:
print(test_data["Head Zero Position Y Collect Result_Dam_weight"].isnull().sum())

0


In [706]:
# 34. Head Zero Position Z Collect Result_Dam
target_ratio = df_concat.groupby("Head Zero Position Z Collect Result_Dam")["target"].mean()
print(target_ratio)

Head Zero Position Z Collect Result_Dam
265.0    0.547187
300.0    0.413333
Name: target, dtype: float64


In [707]:
# 10번과 동일한 값이므로 삭제
del df_concat["Head Zero Position Z Collect Result_Dam"]
print(df_concat.shape)

(3404, 135)


In [708]:
# 35. Machine Tact time Collect Result_Dam
target_ratio = df_concat.groupby("Machine Tact time Collect Result_Dam")["target"].mean()
print(target_ratio)

Machine Tact time Collect Result_Dam
48.00     0.000000
48.30     0.000000
49.10     1.000000
51.00     0.000000
51.20     1.000000
            ...   
104.00    0.500000
104.10    0.000000
197.30    1.000000
265.02    0.413333
999.90    1.000000
Name: target, Length: 148, dtype: float64


In [709]:
del df_concat["Machine Tact time Collect Result_Dam"]
print(df_concat.shape)

(3404, 134)


In [710]:
# 36. PalletID Collect Result_Dam
target_ratio = df_concat.groupby("PalletID Collect Result_Dam")["target"].mean()
print(target_ratio) 

PalletID Collect Result_Dam
1.0      0.565854
2.0      0.547414
3.0      0.562232
4.0      0.554502
5.0      0.532110
           ...   
101.4    0.000000
101.6    0.000000
102.0    1.000000
135.4    1.000000
243.1    1.000000
Name: target, Length: 156, dtype: float64


In [711]:
del df_concat["PalletID Collect Result_Dam"]
print(df_concat.shape)

(3404, 133)


In [712]:
# 37. Production Qty Collect Result_Dam
target_ratio = df_concat.groupby("Production Qty Collect Result_Dam")["target"].mean()
print(target_ratio)

Production Qty Collect Result_Dam
0      0.750000
1      0.421053
2      0.470588
3      0.467391
4      0.350649
         ...   
465    0.000000
468    0.000000
470    1.000000
485    1.000000
487    1.000000
Name: target, Length: 424, dtype: float64


In [713]:
del df_concat["Production Qty Collect Result_Dam"]
print(df_concat.shape)

(3404, 132)


In [714]:
# 38. Receip No Collect Result_Dam
target_ratio = df_concat.groupby("Receip No Collect Result_Dam")["target"].mean()
print(target_ratio)

Receip No Collect Result_Dam
0      0.714286
1      0.547349
2      1.000000
3      0.333333
4      0.750000
         ...   
591    0.000000
596    0.000000
597    0.000000
599    0.000000
608    0.000000
Name: target, Length: 470, dtype: float64


In [715]:
del df_concat["Receip No Collect Result_Dam"]
print(df_concat.shape)

(3404, 131)


In [716]:
# 39. Stage1 Circle1 Distance Speed Collect Result_Dam
target_ratio = df_concat.groupby("Stage1 Circle1 Distance Speed Collect Result_Dam")["target"].mean()
print(target_ratio)

Stage1 Circle1 Distance Speed Collect Result_Dam
1       0.399277
6       0.586957
9       0.000000
4000    0.394366
5000    0.523156
9000    0.571332
Name: target, dtype: float64


In [717]:
df_concat["Stage1 Circle1 Distance Speed Collect Result_Dam_weight"] = df_concat["Stage1 Circle1 Distance Speed Collect Result_Dam"].map(target_ratio)
df_concat["Stage1 Circle1 Distance Speed Collect Result_Dam_weight"].value_counts()

Stage1 Circle1 Distance Speed Collect Result_Dam_weight
0.571332    1479
0.399277    1107
0.523156     583
0.394366     142
0.586957      92
0.000000       1
Name: count, dtype: int64

In [718]:
del df_concat["Stage1 Circle1 Distance Speed Collect Result_Dam"]
del df_concat["Stage1 Circle1 Distance Speed Collect Result_Dam_weight"]
print(df_concat.shape)

(3404, 130)


In [719]:
# 40. Stage1 Circle2 Distance Speed Collect Result_Dam
target_ratio = df_concat.groupby("Stage1 Circle2 Distance Speed Collect Result_Dam")["target"].mean()
print(target_ratio)

Stage1 Circle2 Distance Speed Collect Result_Dam
4000    0.394366
5000    0.523156
5800    0.285714
6000    0.427536
6200    0.441176
6500    0.419624
9000    0.571332
Name: target, dtype: float64


In [720]:
df_concat["Stage1 Circle2 Distance Speed Collect Result_Dam_weight"] = df_concat["Stage1 Circle2 Distance Speed Collect Result_Dam"].map(target_ratio)
df_concat["Stage1 Circle2 Distance Speed Collect Result_Dam_weight"].value_counts()

Stage1 Circle2 Distance Speed Collect Result_Dam_weight
0.571332    1479
0.419624     958
0.523156     583
0.394366     142
0.427536     138
0.285714      70
0.441176      34
Name: count, dtype: int64

In [721]:
test_data["Stage1 Circle2 Distance Speed Collect Result_Dam_weight"] = test_data["Stage1 Circle2 Distance Speed Collect Result_Dam"].map(target_ratio)
test_data["Stage1 Circle2 Distance Speed Collect Result_Dam_weight"].value_counts()

Stage1 Circle2 Distance Speed Collect Result_Dam_weight
0.419624    5386
0.571332    4317
0.394366    2537
0.285714    2346
0.523156    2090
0.427536     569
0.441176     116
Name: count, dtype: int64

In [722]:
print(test_data["Stage1 Circle2 Distance Speed Collect Result_Dam_weight"].isnull().sum())

0


In [723]:
# 41. Stage1 Circle3 Distance Speed Collect Result_Dam
target_ratio = df_concat.groupby("Stage1 Circle3 Distance Speed Collect Result_Dam")["target"].mean()
print(target_ratio)

Stage1 Circle3 Distance Speed Collect Result_Dam
4000    0.394366
5000    0.523156
5800    0.285714
6000    0.427536
6200    0.441176
6500    0.419624
9000    0.571332
Name: target, dtype: float64


In [724]:
# 40번과 동일한 값이므로 삭제
del df_concat["Stage1 Circle3 Distance Speed Collect Result_Dam"]
print(df_concat.shape)

(3404, 130)


In [725]:
# 42. Stage1 Circle4 Distance Speed Collect Result_Dam
target_ratio = df_concat.groupby("Stage1 Circle4 Distance Speed Collect Result_Dam")["target"].mean()
print(target_ratio)

Stage1 Circle4 Distance Speed Collect Result_Dam
4000    0.394366
5000    0.523156
5800    0.285714
6000    0.427536
6200    0.441176
6500    0.419624
9000    0.571332
Name: target, dtype: float64


In [726]:
# 40번과 동일한 값이므로 삭제
del df_concat["Stage1 Circle4 Distance Speed Collect Result_Dam"]
print(df_concat.shape)

(3404, 129)


In [727]:
# 43. Stage1 Line1 Distance Speed Collect Result_Dam
target_ratio = df_concat.groupby("Stage1 Line1 Distance Speed Collect Result_Dam")["target"].mean()
print(target_ratio)

Stage1 Line1 Distance Speed Collect Result_Dam
4000    0.394366
5000    0.523156
5800    0.285714
6000    0.427536
6200    0.441176
6500    0.419624
7000    0.857143
9000    0.568601
Name: target, dtype: float64


In [728]:
df_concat["Stage1 Line1 Distance Speed Collect Result_Dam_weight"] = df_concat["Stage1 Line1 Distance Speed Collect Result_Dam"].map(target_ratio)
df_concat["Stage1 Line1 Distance Speed Collect Result_Dam_weight"].value_counts()

Stage1 Line1 Distance Speed Collect Result_Dam_weight
0.568601    1465
0.419624     958
0.523156     583
0.394366     142
0.427536     138
0.285714      70
0.441176      34
0.857143      14
Name: count, dtype: int64

In [729]:
test_data["Stage1 Line1 Distance Speed Collect Result_Dam_weight"] = test_data["Stage1 Line1 Distance Speed Collect Result_Dam"].map(target_ratio)
test_data["Stage1 Line1 Distance Speed Collect Result_Dam_weight"].value_counts()

Stage1 Line1 Distance Speed Collect Result_Dam_weight
0.419624    5386
0.568601    4247
0.394366    2537
0.285714    2346
0.523156    2090
0.427536     569
0.441176     116
0.857143      70
Name: count, dtype: int64

In [730]:
print(test_data["Stage1 Line1 Distance Speed Collect Result_Dam_weight"].isnull().sum())

0


In [731]:
# 44. Stage1 Line2 Distance Speed Collect Result_Dam
target_ratio = df_concat.groupby("Stage1 Line2 Distance Speed Collect Result_Dam")["target"].mean()
print(target_ratio)

Stage1 Line2 Distance Speed Collect Result_Dam
4000    0.394366
5000    0.523156
5800    0.285714
6000    0.410256
6200    0.441176
6500    0.422340
9000    0.571332
Name: target, dtype: float64


In [732]:
# 40번과 동일한 값이므로 삭제
del df_concat["Stage1 Line2 Distance Speed Collect Result_Dam"]
print(df_concat.shape)

(3404, 129)


In [733]:
# 45. Stage1 Line3 Distance Speed Collect Result_Dam
target_ratio = df_concat.groupby("Stage1 Line3 Distance Speed Collect Result_Dam")["target"].mean()
print(target_ratio)

Stage1 Line3 Distance Speed Collect Result_Dam
4000    0.394366
5000    0.523156
5800    0.285714
6000    0.427536
6200    0.441176
6500    0.419624
7000    0.857143
9000    0.568601
Name: target, dtype: float64


In [734]:
# 43번과 동일한 값이므로 삭제
del df_concat["Stage1 Line3 Distance Speed Collect Result_Dam"]
print(df_concat.shape)

(3404, 128)


In [735]:
#46. Stage1 Line4 Distance Speed Collect
target_ratio = df_concat.groupby("Stage1 Line4 Distance Speed Collect Result_Dam")["target"].mean()
print(target_ratio)

Stage1 Line4 Distance Speed Collect Result_Dam
4000    0.394366
5000    0.523156
5800    0.285714
6000    0.389937
6200    0.441176
6500    0.425827
9000    0.571332
Name: target, dtype: float64


In [736]:
# 40번과 동일한 값이므로 삭제
del df_concat["Stage1 Line4 Distance Speed Collect Result_Dam"]
print(df_concat.shape)

(3404, 127)


In [737]:
# 47. Stage2 Circle1 Distance Speed Collect Result_Dam
target_ratio = df_concat.groupby("Stage2 Circle1 Distance Speed Collect Result_Dam")["target"].mean()
print(target_ratio)

Stage2 Circle1 Distance Speed Collect Result_Dam
5000     0.415254
5800     0.285714
6000     0.427536
6200     0.441176
6500     0.419624
9000     0.544304
12000    0.586957
Name: target, dtype: float64


In [738]:
df_concat["Stage2 Circle1 Distance Speed Collect Result_Dam_weight"] = df_concat["Stage2 Circle1 Distance Speed Collect Result_Dam"].map(target_ratio)
df_concat["Stage2 Circle1 Distance Speed Collect Result_Dam_weight"].value_counts()

Stage2 Circle1 Distance Speed Collect Result_Dam_weight
0.544304    1580
0.419624     958
0.586957     506
0.427536     138
0.415254     118
0.285714      70
0.441176      34
Name: count, dtype: int64

In [739]:
test_data["Stage2 Circle1 Distance Speed Collect Result_Dam_weight"] = test_data["Stage2 Circle1 Distance Speed Collect Result_Dam"].map(target_ratio)
test_data["Stage2 Circle1 Distance Speed Collect Result_Dam_weight"].value_counts()

Stage2 Circle1 Distance Speed Collect Result_Dam_weight
0.419624    5386
0.544304    5042
0.285714    2346
0.586957    1533
0.427536     569
0.415254     437
0.441176     116
Name: count, dtype: int64

In [740]:
print(test_data["Stage2 Circle1 Distance Speed Collect Result_Dam_weight"].isnull().sum())

1932


In [741]:
# 최빈값으로 결측치 처리
mode_value = test_data["Stage2 Circle1 Distance Speed Collect Result_Dam_weight"].mode()[0]
test_data["Stage2 Circle1 Distance Speed Collect Result_Dam_weight"].fillna(mode_value, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_data["Stage2 Circle1 Distance Speed Collect Result_Dam_weight"].fillna(mode_value, inplace=True)


In [742]:
# 48. Stage2 Circle2 Distance Speed Collect Result_Dam
target_ratio = df_concat.groupby("Stage2 Circle2 Distance Speed Collect Result_Dam")["target"].mean()
print(target_ratio)

Stage2 Circle2 Distance Speed Collect Result_Dam
5000     0.415254
5300     0.285714
5500     0.388699
6000     0.500000
6500     0.455556
9000     0.544304
12000    0.586957
Name: target, dtype: float64


In [743]:
df_concat["Stage2 Circle2 Distance Speed Collect Result_Dam_weight"] = df_concat["Stage2 Circle2 Distance Speed Collect Result_Dam"].map(target_ratio)
df_concat["Stage2 Circle2 Distance Speed Collect Result_Dam_weight"].value_counts()

Stage2 Circle2 Distance Speed Collect Result_Dam_weight
0.544304    1580
0.388699     584
0.455556     540
0.586957     506
0.415254     118
0.285714      70
0.500000       6
Name: count, dtype: int64

In [744]:
test_data["Stage2 Circle2 Distance Speed Collect Result_Dam_weight"] = test_data["Stage2 Circle2 Distance Speed Collect Result_Dam"].map(target_ratio)
test_data["Stage2 Circle2 Distance Speed Collect Result_Dam_weight"].value_counts()

Stage2 Circle2 Distance Speed Collect Result_Dam_weight
0.544304    5042
0.455556    3453
0.388699    2589
0.285714    2346
0.586957    1548
0.415254     483
0.500000      14
Name: count, dtype: int64

In [745]:
print(test_data["Stage2 Circle2 Distance Speed Collect Result_Dam_weight"].isnull().sum())

1886


In [746]:
mean_value = test_data["Stage2 Circle2 Distance Speed Collect Result_Dam_weight"].mean()
test_data["Stage2 Circle2 Distance Speed Collect Result_Dam_weight"].fillna(mean_value, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_data["Stage2 Circle2 Distance Speed Collect Result_Dam_weight"].fillna(mean_value, inplace=True)


In [747]:
# 49. Stage2 Circle3 Distance Speed Collect Result_Dam
target_ratio = df_concat.groupby("Stage2 Circle3 Distance Speed Collect Result_Dam")["target"].mean()
print(target_ratio)

Stage2 Circle3 Distance Speed Collect Result_Dam
5000     0.415254
5300     0.285714
5500     0.388699
6000     0.500000
6500     0.455556
9000     0.544304
12000    0.586957
Name: target, dtype: float64


In [748]:
# 48번과 동일한 값이므로 삭제
del df_concat["Stage2 Circle3 Distance Speed Collect Result_Dam"]
print(df_concat.shape)

(3404, 128)


In [749]:
# 50. Stage2 Circle4 Distance Speed Collect Result_Dam
target_ratio = df_concat.groupby("Stage2 Circle4 Distance Speed Collect Result_Dam")["target"].mean()
print(target_ratio)

Stage2 Circle4 Distance Speed Collect Result_Dam
5000     0.415254
5300     0.285714
5500     0.388699
6000     0.500000
6500     0.455556
9000     0.544304
12000    0.586957
Name: target, dtype: float64


In [750]:
# 48번과 동일한 값이므로 삭제
del df_concat["Stage2 Circle4 Distance Speed Collect Result_Dam"]
print(df_concat.shape)

(3404, 127)


In [751]:
# 51. Stage2 Line1 Distance Speed Collect Result_Dam
target_ratio = df_concat.groupby("Stage2 Line1 Distance Speed Collect Result_Dam")["target"].mean()
print(target_ratio)

Stage2 Line1 Distance Speed Collect Result_Dam
5000     0.415254
5300     0.285714
5500     0.388699
6000     0.500000
6500     0.455556
9000     0.544304
12000    0.586957
Name: target, dtype: float64


In [752]:
# 48번과 동일한 값이므로 삭제
del df_concat["Stage2 Line1 Distance Speed Collect Result_Dam"]
print(df_concat.shape)

(3404, 126)


In [753]:
# 52. Stage2 Line2 Distance Speed Collect Result_Dam
target_ratio = df_concat.groupby("Stage2 Line2 Distance Speed Collect Result_Dam")["target"].mean()
print(target_ratio)

Stage2 Line2 Distance Speed Collect Result_Dam
5000     0.416000
5300     0.285714
5500     0.401368
6500     0.487705
9000     0.544304
12000    0.586957
13000    1.000000
Name: target, dtype: float64


In [754]:
df_concat["Stage2 Line2 Distance Speed Collect Result_Dam_weight"] = df_concat["Stage2 Line2 Distance Speed Collect Result_Dam"].map(target_ratio)
df_concat["Stage2 Line2 Distance Speed Collect Result_Dam_weight"].value_counts()

Stage2 Line2 Distance Speed Collect Result_Dam_weight
0.544304    1580
0.401368     877
0.586957     506
0.487705     244
0.416000     125
0.285714      70
1.000000       2
Name: count, dtype: int64

In [755]:
# 수치가 적어서 삭제
del df_concat["Stage2 Line2 Distance Speed Collect Result_Dam"]
del df_concat["Stage2 Line2 Distance Speed Collect Result_Dam_weight"]
print(df_concat.shape)

(3404, 125)


In [756]:
#53. Stage2 Line3 Distance Speed Collect Result_Dam
target_ratio = df_concat.groupby("Stage2 Line3 Distance Speed Collect Result_Dam")["target"].mean()
print(target_ratio)

Stage2 Line3 Distance Speed Collect Result_Dam
5000     0.415254
5300     0.285714
5500     0.381533
5800     0.800000
6000     0.500000
6500     0.455556
9000     0.544304
12000    0.586957
Name: target, dtype: float64


In [757]:
df_concat["Stage2 Line3 Distance Speed Collect Result_Dam_weight"] = df_concat["Stage2 Line3 Distance Speed Collect Result_Dam"].map(target_ratio)
df_concat["Stage2 Line3 Distance Speed Collect Result_Dam_weight"].value_counts()

Stage2 Line3 Distance Speed Collect Result_Dam_weight
0.544304    1580
0.381533     574
0.455556     540
0.586957     506
0.415254     118
0.285714      70
0.800000      10
0.500000       6
Name: count, dtype: int64

In [758]:
test_data["Stage2 Line3 Distance Speed Collect Result_Dam_weight"] = test_data["Stage2 Line3 Distance Speed Collect Result_Dam"].map(target_ratio)
test_data["Stage2 Line3 Distance Speed Collect Result_Dam_weight"].value_counts()

Stage2 Line3 Distance Speed Collect Result_Dam_weight
0.544304    5042
0.455556    3453
0.381533    2584
0.285714    2346
0.586957    1548
0.415254     483
0.500000      14
0.800000       5
Name: count, dtype: int64

In [759]:
print(test_data["Stage2 Line3 Distance Speed Collect Result_Dam_weight"].isnull().sum())

1886


In [760]:
mean_value = test_data["Stage2 Line3 Distance Speed Collect Result_Dam_weight"].mean()
test_data["Stage2 Line3 Distance Speed Collect Result_Dam_weight"].fillna(mean_value, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_data["Stage2 Line3 Distance Speed Collect Result_Dam_weight"].fillna(mean_value, inplace=True)


In [761]:
# 54. Stage2 Line4 Distance Speed Collect Result_Dam
target_ratio = df_concat.groupby("Stage2 Line4 Distance Speed Collect Result_Dam")["target"].mean()
print(target_ratio)

Stage2 Line4 Distance Speed Collect Result_Dam
5000     0.419847
5300     0.285714
5500     0.400689
6000     0.300000
6500     0.500000
9000     0.544304
12000    0.586957
Name: target, dtype: float64


In [762]:
df_concat["Stage2 Line4 Distance Speed Collect Result_Dam_weight"] = df_concat["Stage2 Line4 Distance Speed Collect Result_Dam"].map(target_ratio)
df_concat["Stage2 Line4 Distance Speed Collect Result_Dam_weight"].value_counts()

Stage2 Line4 Distance Speed Collect Result_Dam_weight
0.544304    1580
0.400689     871
0.586957     506
0.500000     236
0.419847     131
0.285714      70
0.300000      10
Name: count, dtype: int64

In [763]:
test_data["Stage2 Line4 Distance Speed Collect Result_Dam_weight"] = test_data["Stage2 Line4 Distance Speed Collect Result_Dam"].map(target_ratio)
test_data["Stage2 Line4 Distance Speed Collect Result_Dam_weight"].value_counts()

Stage2 Line4 Distance Speed Collect Result_Dam_weight
0.544304    5042
0.400689    3653
0.285714    2346
0.500000    2318
0.586957    1548
0.419847     518
0.300000      50
Name: count, dtype: int64

In [764]:
print(test_data["Stage2 Line4 Distance Speed Collect Result_Dam_weight"].isnull().sum())

1886


In [765]:
mean_value = test_data["Stage2 Line4 Distance Speed Collect Result_Dam_weight"].mean()
test_data["Stage2 Line4 Distance Speed Collect Result_Dam_weight"].fillna(mean_value, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_data["Stage2 Line4 Distance Speed Collect Result_Dam_weight"].fillna(mean_value, inplace=True)


In [766]:
# 55. Stage3 Circle1 Distance Speed Collect Result_Dam
target_ratio = df_concat.groupby("Stage3 Circle1 Distance Speed Collect Result_Dam")["target"].mean()
print(target_ratio)

Stage3 Circle1 Distance Speed Collect Result_Dam
4000    0.394366
5000    0.523156
5300    0.285714
5500    0.381533
5800    0.800000
6000    0.500000
6500    0.455556
9000    0.571332
Name: target, dtype: float64


In [767]:
df_concat["Stage3 Circle1 Distance Speed Collect Result_Dam_weight"] = df_concat["Stage3 Circle1 Distance Speed Collect Result_Dam"].map(target_ratio)
df_concat["Stage3 Circle1 Distance Speed Collect Result_Dam_weight"].value_counts()

Stage3 Circle1 Distance Speed Collect Result_Dam_weight
0.571332    1479
0.523156     583
0.381533     574
0.455556     540
0.394366     142
0.285714      70
0.800000      10
0.500000       6
Name: count, dtype: int64

In [768]:
test_data["Stage3 Circle1 Distance Speed Collect Result_Dam_weight"] = test_data["Stage3 Circle1 Distance Speed Collect Result_Dam"].map(target_ratio)
test_data["Stage3 Circle1 Distance Speed Collect Result_Dam_weight"].value_counts()

Stage3 Circle1 Distance Speed Collect Result_Dam_weight
0.571332    4317
0.455556    3453
0.381533    2584
0.285714    2346
0.394366    2310
0.523156    2136
0.500000      14
0.800000       5
Name: count, dtype: int64

In [769]:
print(test_data["Stage3 Circle1 Distance Speed Collect Result_Dam_weight"].isnull().sum())

196


In [770]:
mean_value = test_data["Stage3 Circle1 Distance Speed Collect Result_Dam_weight"].mean()
test_data["Stage3 Circle1 Distance Speed Collect Result_Dam_weight"].fillna(mean_value, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_data["Stage3 Circle1 Distance Speed Collect Result_Dam_weight"].fillna(mean_value, inplace=True)


In [771]:
# 56. Stage3 Circle2 Distance Speed Collect Result_Dam
target_ratio = df_concat.groupby("Stage3 Circle2 Distance Speed Collect Result_Dam")["target"].mean()
print(target_ratio)

Stage3 Circle2 Distance Speed Collect Result_Dam
4000    0.394366
5000    0.523156
5800    0.285714
6000    0.435374
6200    0.441176
6500    0.418335
9000    0.571332
Name: target, dtype: float64


In [772]:
# 43번과 동일한 값이므로 삭제
del df_concat["Stage3 Circle2 Distance Speed Collect Result_Dam"]
print(df_concat.shape)

(3404, 127)


In [773]:
# 57. Stage3 Circle3 Distance Speed Collect Result_Dam
target_ratio = df_concat.groupby("Stage3 Circle3 Distance Speed Collect Result_Dam")["target"].mean()
print(target_ratio)

Stage3 Circle3 Distance Speed Collect Result_Dam
4000    0.394366
5000    0.523156
5800    0.285714
6000    0.435374
6200    0.441176
6500    0.418335
9000    0.571332
Name: target, dtype: float64


In [774]:
# 43번과 동일한 값이므로 삭제
del df_concat["Stage3 Circle3 Distance Speed Collect Result_Dam"]
print(df_concat.shape)

(3404, 126)


In [775]:
# 58. Stage3 Circle4 Distance Speed Collect Result_Dam
target_ratio = df_concat.groupby("Stage3 Circle4 Distance Speed Collect Result_Dam")["target"].mean()
print(target_ratio)

Stage3 Circle4 Distance Speed Collect Result_Dam
4000    0.394366
5000    0.523156
5800    0.285714
6000    0.435374
6200    0.441176
6500    0.418335
9000    0.571332
Name: target, dtype: float64


In [776]:
# 43번과 동일한 값이므로 삭제
del df_concat["Stage3 Circle4 Distance Speed Collect Result_Dam"]
print(df_concat.shape)

(3404, 125)


In [777]:
# 59. Stage3 Line1 Distance Speed Collect Result_Dam
target_ratio = df_concat.groupby("Stage3 Line1 Distance Speed Collect Result_Dam")["target"].mean()
print(target_ratio)

Stage3 Line1 Distance Speed Collect Result_Dam
4000    0.394366
5000    0.523156
5800    0.285714
6000    0.435374
6200    0.441176
6500    0.418335
7000    0.857143
9000    0.568601
Name: target, dtype: float64


In [778]:
# 43번과 동일한 값이므로 삭제
del df_concat["Stage3 Line1 Distance Speed Collect Result_Dam"]
print(df_concat.shape)

(3404, 124)


In [779]:
# 60. Stage3 Line2 Distance Speed Collect Result_Dam
target_ratio = df_concat.groupby("Stage3 Line2 Distance Speed Collect Result_Dam")["target"].mean()
print(target_ratio)

Stage3 Line2 Distance Speed Collect Result_Dam
4000    0.394366
5000    0.523156
5800    0.285714
6000    0.418182
6200    0.441176
6500    0.421053
9000    0.571332
Name: target, dtype: float64


In [780]:
# 40번과 동일한 값이므로 삭제
del df_concat["Stage3 Line2 Distance Speed Collect Result_Dam"]
print(df_concat.shape)

(3404, 123)


In [781]:
# 61. Stage3 Line3 Distance Speed Collect Result_Dam
target_ratio = df_concat.groupby("Stage3 Line3 Distance Speed Collect Result_Dam")["target"].mean()
print(target_ratio)

Stage3 Line3 Distance Speed Collect Result_Dam
4000    0.394366
5000    0.523156
5800    0.285714
6000    0.435374
6200    0.441176
6500    0.418335
7000    0.857143
9000    0.568601
Name: target, dtype: float64


In [782]:
# 43번과 동일한 값이므로 삭제
del df_concat["Stage3 Line3 Distance Speed Collect Result_Dam"]
print(df_concat.shape)

(3404, 122)


In [783]:
# 62. Stage3 Line4 Distance Speed Collect Result_Dam
target_ratio = df_concat.groupby("Stage3 Line4 Distance Speed Collect Result_Dam")["target"].mean()
print(target_ratio)

Stage3 Line4 Distance Speed Collect Result_Dam
4000    0.394366
5000    0.523156
5800    0.285714
6000    0.398810
6200    0.441176
6500    0.424569
9000    0.571332
Name: target, dtype: float64


In [784]:
# 40번과 동일한 값이므로 삭제
del df_concat["Stage3 Line4 Distance Speed Collect Result_Dam"]
print(df_concat.shape)

(3404, 121)


In [785]:
# 63. THICKNESS 1 Collect Result_Dam
target_ratio = df_concat.groupby("THICKNESS 1 Collect Result_Dam")["target"].mean()
print(target_ratio)

THICKNESS 1 Collect Result_Dam
0       0.547187
5800    0.285714
6000    0.435374
6200    0.441176
6500    0.418335
Name: target, dtype: float64


In [786]:
df_concat["THICKNESS 1 Collect Result_Dam_weight"] = df_concat["THICKNESS 1 Collect Result_Dam"].map(target_ratio)
df_concat["THICKNESS 1 Collect Result_Dam_weight"].value_counts()

THICKNESS 1 Collect Result_Dam_weight
0.547187    2204
0.418335     949
0.435374     147
0.285714      70
0.441176      34
Name: count, dtype: int64

In [787]:
test_data["THICKNESS 1 Collect Result_Dam_weight"] = test_data["THICKNESS 1 Collect Result_Dam"].map(target_ratio)
test_data["THICKNESS 1 Collect Result_Dam_weight"].value_counts()

THICKNESS 1 Collect Result_Dam_weight
0.547187    7012
0.418335    5349
0.285714    2346
0.435374     606
0.441176     116
Name: count, dtype: int64

In [788]:
print(test_data["THICKNESS 1 Collect Result_Dam_weight"].isnull().sum())

1932


In [789]:
mean_value = test_data["THICKNESS 1 Collect Result_Dam_weight"].mean()
test_data["THICKNESS 1 Collect Result_Dam_weight"].fillna(mean_value, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_data["THICKNESS 1 Collect Result_Dam_weight"].fillna(mean_value, inplace=True)


In [790]:
# 64. THICKNESS 2 Collect Result_Dam
target_ratio = df_concat.groupby("THICKNESS 2 Collect Result_Dam")["target"].mean()
print(target_ratio)

THICKNESS 2 Collect Result_Dam
-0.054    0.381443
 0.000    0.504260
 0.012    0.285714
 0.014    0.428571
Name: target, dtype: float64


In [791]:
df_concat["THICKNESS 2 Collect Result_Dam_weight"] = df_concat["THICKNESS 2 Collect Result_Dam"].map(target_ratio)
df_concat["THICKNESS 2 Collect Result_Dam_weight"].value_counts()

THICKNESS 2 Collect Result_Dam_weight
0.504260    3286
0.381443      97
0.428571      14
0.285714       7
Name: count, dtype: int64

In [792]:
test_data["THICKNESS 2 Collect Result_Dam_weight"] = test_data["THICKNESS 2 Collect Result_Dam"].map(target_ratio)
test_data["THICKNESS 2 Collect Result_Dam_weight"].value_counts()

THICKNESS 2 Collect Result_Dam_weight
0.504260    15204
0.428571      610
0.381443      355
0.285714       45
Name: count, dtype: int64

In [793]:
print(test_data["THICKNESS 2 Collect Result_Dam_weight"].isnull().sum())

1147


In [794]:
# 최빈값으로 결측치
mode_value = test_data["THICKNESS 2 Collect Result_Dam_weight"].mode()[0]
test_data["THICKNESS 2 Collect Result_Dam_weight"].fillna(mode_value, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_data["THICKNESS 2 Collect Result_Dam_weight"].fillna(mode_value, inplace=True)


In [795]:
# 65. THICKNESS 3 Collect Result_Dam
target_ratio = df_concat.groupby("THICKNESS 3 Collect Result_Dam")["target"].mean()
print(target_ratio)

THICKNESS 3 Collect Result_Dam
-0.219    0.381443
-0.022    0.285714
 0.000    0.504260
 0.007    0.428571
Name: target, dtype: float64


In [796]:
# 64번과 동일한 값이므로 삭제
del df_concat["THICKNESS 3 Collect Result_Dam"]
print(df_concat.shape)

(3404, 122)


In [797]:
# 66. WorkMode Collect Result_Dam
target_ratio = df_concat.groupby("WorkMode Collect Result_Dam")["target"].mean()
print(target_ratio)

WorkMode Collect Result_Dam
0.000    0.416821
0.003    0.285714
0.007    0.381443
0.012    0.428571
7.000    0.547187
Name: target, dtype: float64


In [798]:
# 63번과 동일한 값이므로 삭제
del df_concat["WorkMode Collect Result_Dam"]
print(df_concat.shape)

(3404, 121)


In [799]:
# 67. Model.Suffix_AutoClave
target_ratio = df_concat.groupby("Model.Suffix_AutoClave")["target"].mean()
print(target_ratio)

Model.Suffix_AutoClave
AJX75334501    0.497020
AJX75334502    0.543909
AJX75334503    0.000000
AJX75334505    0.200000
AJX75334507    0.444444
AJX75334508    0.333333
Name: target, dtype: float64


In [800]:
df_concat["Model.Suffix_AutoClave_weight"] = df_concat["Model.Suffix_AutoClave"].map(target_ratio)
df_concat["Model.Suffix_AutoClave_weight"].value_counts()

Model.Suffix_AutoClave_weight
0.497020    3020
0.543909     353
0.200000      15
0.444444       9
0.333333       6
0.000000       1
Name: count, dtype: int64

In [801]:
del df_concat["Model.Suffix_AutoClave"]
del df_concat["Model.Suffix_AutoClave_weight"]
print(df_concat.shape)

(3404, 120)


In [802]:
# 68. Workorder_AutoClave
target_ratio = df_concat.groupby("Workorder_AutoClave")["target"].mean()
print(target_ratio)

Workorder_AutoClave
3F1X5847-2    0.857143
3F1X9643-1    0.500000
3F1X9644-1    0.615385
3F1X9648-1    0.555556
3F1X9648-2    0.250000
                ...   
4E1X0057-1    0.400000
4E1X9167-1    0.125000
4E1X9168-1    0.500000
4E1X9169-1    0.500000
4E1X9170-1    0.600000
Name: target, Length: 421, dtype: float64


In [803]:
del df_concat["Workorder_AutoClave"]
print(df_concat.shape)

(3404, 119)


In [804]:
# 69. 1st Pressure Collect Result_AutoClave
target_ratio = df_concat.groupby("1st Pressure Collect Result_AutoClave")["target"].mean()
print(target_ratio)

1st Pressure Collect Result_AutoClave
0.296    1.000000
0.297    0.714286
0.298    0.469136
0.299    0.402516
0.300    0.514019
0.301    0.496212
0.302    0.504808
0.303    0.555000
0.304    0.521212
0.305    0.441718
0.306    0.500000
0.307    0.500000
0.308    0.420732
0.309    0.504673
0.310    0.516304
0.311    0.508197
0.312    0.481651
0.313    0.532663
0.314    0.511962
0.315    0.533333
0.316    0.515789
0.317    0.571429
0.318    0.400000
0.319    0.000000
Name: target, dtype: float64


In [805]:
df_concat["1st Pressure Collect Result_AutoClave_weight"] = df_concat["1st Pressure Collect Result_AutoClave"].map(target_ratio)
df_concat["1st Pressure Collect Result_AutoClave_weight"].value_counts()

1st Pressure Collect Result_AutoClave_weight
0.496212    264
0.508197    244
0.481651    218
0.504673    214
0.514019    214
0.511962    209
0.504808    208
0.555000    200
0.532663    199
0.533333    195
0.516304    184
0.500000    178
0.521212    165
0.420732    164
0.441718    163
0.402516    159
0.515789     95
0.469136     81
0.571429     35
0.714286      7
0.400000      5
1.000000      2
0.000000      1
Name: count, dtype: int64

In [806]:
del df_concat["1st Pressure Collect Result_AutoClave"]
del df_concat["1st Pressure Collect Result_AutoClave_weight"]
print(df_concat.shape)

(3404, 118)


In [807]:
# 70. 1st Pressure 1st Pressure Unit Time_AutoClave
target_ratio = df_concat.groupby("1st Pressure 1st Pressure Unit Time_AutoClave")["target"].mean()
print(target_ratio)

1st Pressure 1st Pressure Unit Time_AutoClave
60     1.000000
61     0.933333
180    0.722222
240    0.544901
241    0.455048
300    0.598131
Name: target, dtype: float64


In [808]:
df_concat["1st Pressure 1st Pressure Unit Time_AutoClave_weight"] = df_concat["1st Pressure 1st Pressure Unit Time_AutoClave"].map(target_ratio)
df_concat["1st Pressure 1st Pressure Unit Time_AutoClave_weight"].value_counts()

1st Pressure 1st Pressure Unit Time_AutoClave_weight
0.455048    2169
0.544901     657
0.598131     535
0.722222      18
0.933333      15
1.000000      10
Name: count, dtype: int64

In [809]:
test_data["1st Pressure 1st Pressure Unit Time_AutoClave_weight"] = test_data["1st Pressure 1st Pressure Unit Time_AutoClave"].map(target_ratio)
test_data["1st Pressure 1st Pressure Unit Time_AutoClave_weight"].value_counts()

1st Pressure 1st Pressure Unit Time_AutoClave_weight
0.455048    11886
0.544901     3696
0.598131     1609
0.722222       22
0.933333       20
1.000000        6
Name: count, dtype: int64

In [810]:
print(test_data["1st Pressure 1st Pressure Unit Time_AutoClave_weight"].isnull().sum())

122


In [811]:
# 최빈값으로 결측치 처리
mode_value = test_data["1st Pressure 1st Pressure Unit Time_AutoClave_weight"].mode()[0]
test_data["1st Pressure 1st Pressure Unit Time_AutoClave_weight"].fillna(mode_value, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_data["1st Pressure 1st Pressure Unit Time_AutoClave_weight"].fillna(mode_value, inplace=True)


In [812]:
# 71. 2nd Pressure Collect Result_AutoClave
target_ratio = df_concat.groupby("2nd Pressure Collect Result_AutoClave")["target"].mean()
print(target_ratio)

2nd Pressure Collect Result_AutoClave
0.297    1.000000
0.298    0.333333
0.299    0.500000
0.300    0.461538
0.301    0.459459
0.302    0.576271
0.303    0.593407
0.304    0.613861
0.305    0.447059
0.306    0.553846
0.307    0.629630
0.308    0.480000
0.309    0.514286
0.310    0.523810
0.311    0.505495
0.312    0.509259
0.313    0.435185
0.314    0.567308
0.315    0.547826
0.316    0.576577
0.317    0.555556
0.318    0.562500
0.319    0.875000
0.320    0.000000
0.349    1.000000
0.350    0.500000
0.351    0.500000
0.352    1.000000
0.353    0.500000
0.354    0.333333
0.355    0.333333
0.356    1.000000
0.357    0.666667
0.358    0.666667
0.359    0.333333
0.360    0.500000
0.361    0.000000
0.362    1.000000
0.395    1.000000
0.452    1.000000
0.490    0.401734
0.491    0.408100
0.492    0.455357
0.493    0.304348
0.497    0.277778
0.498    0.535354
0.499    0.619048
0.500    0.540816
0.501    0.623762
0.502    0.638889
0.503    0.600000
0.580    0.545455
0.581    0.342105
0.582   

In [813]:
df_concat["2nd Pressure Collect Result_AutoClave_weight"] = df_concat["2nd Pressure Collect Result_AutoClave"].map(target_ratio)
df_concat["2nd Pressure Collect Result_AutoClave_weight"].value_counts()

2nd Pressure Collect Result_AutoClave_weight
0.401734    346
0.455357    336
0.408100    321
0.619048    126
0.576271    118
0.547826    115
0.459459    111
0.576577    111
0.509259    108
0.435185    108
0.514286    105
0.567308    104
0.613861    101
0.623762    101
0.535354     99
0.540816     98
0.593407     91
0.505495     91
0.447059     85
0.523810     84
0.461538     78
0.553846     65
0.555556     63
0.545455     55
0.629630     54
0.480000     50
0.304348     46
0.500000     40
0.342105     38
0.638889     36
0.333333     24
1.000000     18
0.277778     18
0.562500     16
0.533333     15
0.000000     10
0.875000      8
0.666667      6
0.600000      5
Name: count, dtype: int64

In [814]:
test_data["2nd Pressure Collect Result_AutoClave_weight"] = test_data["2nd Pressure Collect Result_AutoClave"].map(target_ratio)
test_data["2nd Pressure Collect Result_AutoClave_weight"].value_counts()

2nd Pressure Collect Result_AutoClave_weight
0.408100    2136
0.401734    2082
0.455357    1824
0.509259     683
0.435185     637
0.505495     618
0.523810     579
0.567308     569
0.459459     551
0.576271     549
0.514286     523
0.447059     507
0.547826     488
0.576577     455
0.613861     451
0.480000     439
0.593407     406
0.461538     400
0.629630     345
0.553846     344
0.619048     341
0.540816     298
0.304348     269
0.623762     258
0.535354     248
0.555556     241
0.500000     211
0.545455     154
0.342105     128
0.638889     118
0.333333      97
0.533333      91
0.277778      82
0.562500      50
0.000000      45
1.000000      41
0.875000      26
0.600000      17
0.666667      11
Name: count, dtype: int64

In [815]:
print(test_data["2nd Pressure Collect Result_AutoClave_weight"].isnull().sum())

49


In [816]:
mean_value = test_data["2nd Pressure Collect Result_AutoClave_weight"].mean()
test_data["2nd Pressure Collect Result_AutoClave_weight"].fillna(mean_value, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_data["2nd Pressure Collect Result_AutoClave_weight"].fillna(mean_value, inplace=True)


In [817]:
# 72. 2nd Pressure Unit Time_AutoClave
target_ratio = df_concat.groupby("2nd Pressure Unit Time_AutoClave")["target"].mean()
print(target_ratio)

2nd Pressure Unit Time_AutoClave
0      0.000000
1      0.415833
60     0.591356
61     1.000000
120    0.578801
121    0.491062
150    0.600000
180    1.000000
Name: target, dtype: float64


In [818]:
df_concat["2nd Pressure Unit Time_AutoClave_weight"] = df_concat["2nd Pressure Unit Time_AutoClave"].map(target_ratio)
df_concat["2nd Pressure Unit Time_AutoClave_weight"].value_counts()

2nd Pressure Unit Time_AutoClave_weight
0.415833    1200
0.491062     951
0.578801     717
0.591356     509
1.000000      14
0.600000      10
0.000000       3
Name: count, dtype: int64

In [819]:
test_data["2nd Pressure Unit Time_AutoClave_weight"] = test_data["2nd Pressure Unit Time_AutoClave"].map(target_ratio)
test_data["2nd Pressure Unit Time_AutoClave_weight"].value_counts()

2nd Pressure Unit Time_AutoClave_weight
0.415833    8276
0.491062    4578
0.578801    2027
0.591356    1508
0.000000      93
0.600000      27
1.000000      26
Name: count, dtype: int64

In [820]:
print(test_data["2nd Pressure Unit Time_AutoClave_weight"].isnull().sum())

826


In [821]:
mean_value = test_data["2nd Pressure Unit Time_AutoClave_weight"].mean()
test_data["2nd Pressure Unit Time_AutoClave_weight"].fillna(mean_value, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_data["2nd Pressure Unit Time_AutoClave_weight"].fillna(mean_value, inplace=True)


In [822]:
# 73. 3rd Pressure Collect Result_AutoClave
target_ratio = df_concat.groupby("3rd Pressure Collect Result_AutoClave")["target"].mean()
print(target_ratio)

3rd Pressure Collect Result_AutoClave
0.312    1.000000
0.349    1.000000
0.350    0.500000
0.351    0.500000
0.352    1.000000
0.353    0.333333
0.354    0.428571
0.355    0.333333
0.356    0.800000
0.357    1.000000
0.358    0.500000
0.360    0.000000
0.361    0.250000
0.362    1.000000
0.404    1.000000
0.405    1.000000
0.406    0.666667
0.407    1.000000
0.408    1.000000
0.451    1.000000
0.495    0.333333
0.496    0.413043
0.497    0.435597
0.498    0.501429
0.499    0.490754
0.500    0.506008
0.501    0.563084
0.502    0.537037
0.503    0.714286
0.504    1.000000
0.505    1.000000
0.596    0.642857
0.597    0.375000
0.598    0.320000
0.599    0.368421
0.600    0.230769
0.610    0.928571
0.611    0.500000
0.698    1.000000
0.914    0.000000
Name: target, dtype: float64


In [823]:
df_concat["3rd Pressure Collect Result_AutoClave_weight"] = df_concat["3rd Pressure Collect Result_AutoClave"].map(target_ratio)
df_concat["3rd Pressure Collect Result_AutoClave_weight"].value_counts()

3rd Pressure Collect Result_AutoClave_weight
0.506008    749
0.490754    703
0.501429    700
0.563084    428
0.435597    427
0.537037    108
0.413043     92
0.320000     25
0.375000     24
1.000000     22
0.500000     20
0.368421     19
0.714286     14
0.642857     14
0.928571     14
0.230769     13
0.333333      9
0.428571      7
0.800000      5
0.000000      4
0.250000      4
0.666667      3
Name: count, dtype: int64

In [824]:
test_data["3rd Pressure Collect Result_AutoClave_weight"] = test_data["3rd Pressure Collect Result_AutoClave"].map(target_ratio)
test_data["3rd Pressure Collect Result_AutoClave_weight"].value_counts()

3rd Pressure Collect Result_AutoClave_weight
0.501429    3682
0.490754    3509
0.506008    3338
0.435597    2698
0.563084    1735
0.413043     991
0.537037     419
0.375000     131
0.368421     109
0.320000      77
0.500000      55
0.333333      52
1.000000      50
0.642857      45
0.230769      43
0.714286      28
0.000000      23
0.250000      17
0.928571      11
0.800000       9
0.428571       6
0.666667       5
Name: count, dtype: int64

In [825]:
print(test_data["3rd Pressure Collect Result_AutoClave_weight"].isnull().sum())

328


In [826]:
mean_value = test_data["3rd Pressure Collect Result_AutoClave_weight"].mean()
test_data["3rd Pressure Collect Result_AutoClave_weight"].fillna(mean_value, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_data["3rd Pressure Collect Result_AutoClave_weight"].fillna(mean_value, inplace=True)


In [827]:
# 74. 3rd Pressure Unit Time_AutoClave
target_ratio = df_concat.groupby("3rd Pressure Unit Time_AutoClave")["target"].mean()
print(target_ratio)

3rd Pressure Unit Time_AutoClave
1      1.000000
60     0.769231
61     1.000000
90     0.846154
120    0.562757
121    0.452706
150    0.578947
180    0.722222
Name: target, dtype: float64


In [828]:
df_concat["3rd Pressure Unit Time_AutoClave_weight"] = df_concat["3rd Pressure Unit Time_AutoClave"].map(target_ratio)
df_concat["3rd Pressure Unit Time_AutoClave_weight"].value_counts()

3rd Pressure Unit Time_AutoClave_weight
0.452706    2125
0.562757     972
0.578947     247
0.722222      18
1.000000      16
0.846154      13
0.769231      13
Name: count, dtype: int64

In [829]:
test_data["3rd Pressure Unit Time_AutoClave_weight"] = test_data["3rd Pressure Unit Time_AutoClave"].map(target_ratio)
test_data["3rd Pressure Unit Time_AutoClave_weight"].value_counts()

3rd Pressure Unit Time_AutoClave_weight
0.452706    11557
0.562757     4895
0.578947      678
0.846154       69
1.000000       22
0.769231       21
0.722222       21
Name: count, dtype: int64

In [830]:
print(test_data["3rd Pressure Unit Time_AutoClave_weight"].isnull().sum())

98


In [831]:
# 최빈값으로 결측치 처리
mode_value = test_data["3rd Pressure Unit Time_AutoClave_weight"].mode()[0]
test_data["3rd Pressure Unit Time_AutoClave_weight"].fillna(mode_value, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_data["3rd Pressure Unit Time_AutoClave_weight"].fillna(mode_value, inplace=True)


In [832]:
# 75. Chamber Temp. Collect Result_AutoClave
target_ratio = df_concat.groupby("Chamber Temp. Collect Result_AutoClave")["target"].mean()
print(target_ratio)

Chamber Temp. Collect Result_AutoClave
32    0.866667
34    1.000000
35    1.000000
36    1.000000
43    1.000000
45    0.600000
46    0.640000
47    0.514925
48    0.463602
49    0.507128
50    0.537246
51    0.527778
52    0.548387
53    0.477670
54    0.461426
55    0.491289
56    0.488000
57    0.526316
58    0.363636
59    0.000000
Name: target, dtype: float64


In [833]:
df_concat["Chamber Temp. Collect Result_AutoClave_weight"] = df_concat["Chamber Temp. Collect Result_AutoClave"].map(target_ratio)
df_concat["Chamber Temp. Collect Result_AutoClave_weight"].value_counts()

Chamber Temp. Collect Result_AutoClave_weight
0.461426    687
0.477670    515
0.507128    491
0.537246    443
0.491289    287
0.463602    261
0.548387    186
0.514925    134
0.488000    125
0.527778    108
0.526316     76
0.640000     50
0.866667     15
0.363636     11
1.000000      9
0.600000      5
0.000000      1
Name: count, dtype: int64

In [834]:
del df_concat["Chamber Temp. Collect Result_AutoClave"]
del df_concat["Chamber Temp. Collect Result_AutoClave_weight"]
print(df_concat.shape)

(3404, 122)


In [835]:
# 76. Chamber Temp. Unit Time_AutoClave
target_ratio = df_concat.groupby("Chamber Temp. Unit Time_AutoClave")["target"].mean()
print(target_ratio)

Chamber Temp. Unit Time_AutoClave
180    1.000000
183    1.000000
242    1.000000
300    1.000000
303    0.500000
360    0.400000
361    0.212121
363    0.420103
480    0.573661
481    0.565217
483    0.491043
510    0.592308
512    0.600000
540    0.857143
Name: target, dtype: float64


In [836]:
df_concat["Chamber Temp. Unit Time_AutoClave_weight"] = df_concat["Chamber Temp. Unit Time_AutoClave"].map(target_ratio)
df_concat["Chamber Temp. Unit Time_AutoClave_weight"].value_counts()

Chamber Temp. Unit Time_AutoClave_weight
0.420103    1164
0.491043     949
0.573661     896
0.592308     260
0.565217      46
0.212121      33
1.000000      25
0.857143      14
0.600000      10
0.400000       5
0.500000       2
Name: count, dtype: int64

In [837]:
test_data["Chamber Temp. Unit Time_AutoClave_weight"] = test_data["Chamber Temp. Unit Time_AutoClave"].map(target_ratio)
test_data["Chamber Temp. Unit Time_AutoClave_weight"].value_counts()

Chamber Temp. Unit Time_AutoClave_weight
0.420103    6394
0.491043    4576
0.573661    2567
0.212121    1702
0.592308     745
0.565217     190
0.400000      94
0.600000      27
0.857143      25
1.000000       9
0.500000       4
Name: count, dtype: int64

In [838]:
print(test_data["Chamber Temp. Unit Time_AutoClave_weight"].isnull().sum())

1028


In [839]:
# 최빈값으로 결측치 처리
mode_value = test_data["Chamber Temp. Unit Time_AutoClave_weight"].mode()[0]
test_data["Chamber Temp. Unit Time_AutoClave_weight"].fillna(mode_value, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_data["Chamber Temp. Unit Time_AutoClave_weight"].fillna(mode_value, inplace=True)


In [840]:
# 77. Chamber Temp. Judge Value_AutoClave
target_ratio = df_concat.groupby("Chamber Temp. Judge Value_AutoClave")["target"].mean()
print(target_ratio)

Chamber Temp. Judge Value_AutoClave
NG    0.505297
OK    0.497967
Name: target, dtype: float64


In [841]:
# 둘이 수치가 거의 비슷
del df_concat["Chamber Temp. Judge Value_AutoClave"]
print(df_concat.shape)

(3404, 122)


In [842]:
# 78. Equipment_Fill1
target_ratio = df_concat.groupby("Equipment_Fill1")["target"].mean()
print(target_ratio)

Equipment_Fill1
Fill1 dispenser #1    0.513514
Fill1 dispenser #2    0.469112
Name: target, dtype: float64


In [843]:
df_concat["Equipment_Fill1_weight"] = df_concat["Equipment_Fill1"].map(target_ratio)
df_concat["Equipment_Fill1_weight"].value_counts()

Equipment_Fill1_weight
0.513514    2368
0.469112    1036
Name: count, dtype: int64

In [844]:
test_data["Equipment_Fill1_weight"] = test_data["Equipment_Fill1"].map(target_ratio)
test_data["Equipment_Fill1_weight"].value_counts()

Equipment_Fill1_weight
0.513514    10738
0.469112     6623
Name: count, dtype: int64

In [845]:
print(test_data["Equipment_Fill1_weight"].isnull().sum())

0


In [846]:
# 79. Model.Suffix_Fill1
target_ratio = df_concat.groupby("Model.Suffix_Fill1")["target"].mean()
print(target_ratio)

Model.Suffix_Fill1
AJX75334501    0.497020
AJX75334502    0.543909
AJX75334503    0.000000
AJX75334505    0.200000
AJX75334507    0.444444
AJX75334508    0.333333
Name: target, dtype: float64


In [847]:
del df_concat["Model.Suffix_Fill1"]
print(df_concat.shape)

(3404, 122)


In [848]:
# 80. Workorder_Fill1
target_ratio = df_concat.groupby("Workorder_Fill1")["target"].mean()
print(target_ratio)

Workorder_Fill1
3F1X5847-2    0.857143
3F1X9643-1    0.500000
3F1X9644-1    0.615385
3F1X9648-1    0.555556
3F1X9648-2    0.250000
                ...   
4E1X0057-1    0.400000
4E1X9167-1    0.125000
4E1X9168-1    0.500000
4E1X9169-1    0.500000
4E1X9170-1    0.600000
Name: target, Length: 421, dtype: float64


In [849]:
del df_concat["Workorder_Fill1"]
print(df_concat.shape)

(3404, 121)


In [850]:
# 81. DISCHARGED SPEED OF RESIN Collect Result_Fill1
target_ratio = df_concat.groupby("DISCHARGED SPEED OF RESIN Collect Result_Fill1")["target"].mean()
print(target_ratio)

DISCHARGED SPEED OF RESIN Collect Result_Fill1
8.0     0.485714
10.6    0.476770
10.9    0.596651
Name: target, dtype: float64


In [851]:
df_concat["DISCHARGED SPEED OF RESIN Collect Result_Fill1_weight"] = df_concat["DISCHARGED SPEED OF RESIN Collect Result_Fill1"].map(target_ratio)
df_concat["DISCHARGED SPEED OF RESIN Collect Result_Fill1_weight"].value_counts()

DISCHARGED SPEED OF RESIN Collect Result_Fill1_weight
0.476770    2712
0.596651     657
0.485714      35
Name: count, dtype: int64

In [852]:
test_data["DISCHARGED SPEED OF RESIN Collect Result_Fill1_weight"] = test_data["DISCHARGED SPEED OF RESIN Collect Result_Fill1"].map(target_ratio)
test_data["DISCHARGED SPEED OF RESIN Collect Result_Fill1_weight"].value_counts()

DISCHARGED SPEED OF RESIN Collect Result_Fill1_weight
0.476770    15342
0.596651     1878
0.485714      141
Name: count, dtype: int64

In [853]:
print(test_data["DISCHARGED SPEED OF RESIN Collect Result_Fill1_weight"].isnull().sum())

0


In [854]:
# 82. DISCHARGED TIME OF RESIN(Stage1) Collect Result_Fill1
target_ratio = df_concat.groupby("DISCHARGED TIME OF RESIN(Stage1) Collect Result_Fill1")["target"].mean()
print(target_ratio)

DISCHARGED TIME OF RESIN(Stage1) Collect Result_Fill1
12.8    0.398936
12.9    0.424550
13.2    0.592593
13.3    0.555932
13.4    0.528107
13.5    0.532726
13.6    0.714286
17.3    0.400000
17.4    0.550000
Name: target, dtype: float64


In [855]:
df_concat["DISCHARGED TIME OF RESIN(Stage1) Collect Result_Fill1_weight"] = df_concat["DISCHARGED TIME OF RESIN(Stage1) Collect Result_Fill1"].map(target_ratio)
df_concat["DISCHARGED TIME OF RESIN(Stage1) Collect Result_Fill1_weight"].value_counts()

DISCHARGED TIME OF RESIN(Stage1) Collect Result_Fill1_weight
0.532726    1207
0.424550     888
0.528107     676
0.555932     295
0.398936     188
0.592593     108
0.550000      20
0.400000      15
0.714286       7
Name: count, dtype: int64

In [856]:
test_data["DISCHARGED TIME OF RESIN(Stage1) Collect Result_Fill1_weight"] = test_data["DISCHARGED TIME OF RESIN(Stage1) Collect Result_Fill1"].map(target_ratio)
test_data["DISCHARGED TIME OF RESIN(Stage1) Collect Result_Fill1_weight"].value_counts()

DISCHARGED TIME OF RESIN(Stage1) Collect Result_Fill1_weight
0.532726    6864
0.424550    3724
0.528107    2934
0.398936    1160
0.555932     922
0.714286     653
0.592593     286
0.400000      85
0.550000      56
Name: count, dtype: int64

In [857]:
print(test_data["DISCHARGED TIME OF RESIN(Stage1) Collect Result_Fill1_weight"].isnull().sum())

677


In [858]:
mean_value = test_data["DISCHARGED TIME OF RESIN(Stage1) Collect Result_Fill1_weight"].mean()
test_data["DISCHARGED TIME OF RESIN(Stage1) Collect Result_Fill1_weight"].fillna(mean_value, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_data["DISCHARGED TIME OF RESIN(Stage1) Collect Result_Fill1_weight"].fillna(mean_value, inplace=True)


In [859]:
# 83. DISCHARGED TIME OF RESIN(Stage2) Collect Result_Fill1
target_ratio = df_concat.groupby("DISCHARGED TIME OF RESIN(Stage2) Collect Result_Fill1")["target"].mean()
print(target_ratio)

DISCHARGED TIME OF RESIN(Stage2) Collect Result_Fill1
3.5    0.468531
3.6    0.443601
3.7    0.554717
3.8    0.581633
3.9    0.812500
4.0    0.571429
4.2    1.000000
4.3    0.000000
4.7    0.333333
4.9    0.579235
5.0    0.611222
5.1    0.666667
Name: target, dtype: float64


In [860]:
df_concat["DISCHARGED TIME OF RESIN(Stage2) Collect Result_Fill1_weight"] = df_concat["DISCHARGED TIME OF RESIN(Stage2) Collect Result_Fill1"].map(target_ratio)
df_concat["DISCHARGED TIME OF RESIN(Stage2) Collect Result_Fill1_weight"].value_counts()

DISCHARGED TIME OF RESIN(Stage2) Collect Result_Fill1_weight
0.443601    1977
0.611222     499
0.579235     366
0.554717     265
0.468531     143
0.581633      98
0.666667      21
0.812500      16
0.571429      14
0.333333       3
0.000000       1
1.000000       1
Name: count, dtype: int64

In [861]:
del df_concat["DISCHARGED TIME OF RESIN(Stage2) Collect Result_Fill1"]
del df_concat["DISCHARGED TIME OF RESIN(Stage2) Collect Result_Fill1_weight"]
print(df_concat.shape)

(3404, 122)


In [862]:
# 84. DISCHARGED TIME OF RESIN(Stage3) Collect Result_Fill1
target_ratio = df_concat.groupby("DISCHARGED TIME OF RESIN(Stage3) Collect Result_Fill1")["target"].mean()
print(target_ratio)

DISCHARGED TIME OF RESIN(Stage3) Collect Result_Fill1
12.8    0.414815
12.9    0.415755
13.0    0.500000
13.1    0.538462
13.2    0.663551
13.3    0.517241
13.4    0.556391
13.5    0.523994
13.6    0.285714
17.3    0.520000
17.4    0.400000
Name: target, dtype: float64


In [863]:
df_concat["DISCHARGED TIME OF RESIN(Stage3) Collect Result_Fill1_weight"] = df_concat["DISCHARGED TIME OF RESIN(Stage3) Collect Result_Fill1"].map(target_ratio)
df_concat["DISCHARGED TIME OF RESIN(Stage3) Collect Result_Fill1_weight"].value_counts()

DISCHARGED TIME OF RESIN(Stage3) Collect Result_Fill1_weight
0.523994    1292
0.415755     914
0.556391     665
0.517241     232
0.414815     135
0.663551     107
0.520000      25
0.538462      13
0.400000      10
0.285714       7
0.500000       4
Name: count, dtype: int64

In [864]:
test_data["DISCHARGED TIME OF RESIN(Stage3) Collect Result_Fill1_weight"] = test_data["DISCHARGED TIME OF RESIN(Stage3) Collect Result_Fill1"].map(target_ratio)
test_data["DISCHARGED TIME OF RESIN(Stage3) Collect Result_Fill1_weight"].value_counts()

DISCHARGED TIME OF RESIN(Stage3) Collect Result_Fill1_weight
0.523994    7392
0.415755    3906
0.556391    2622
0.414815     909
0.517241     744
0.285714     667
0.663551     252
0.520000      71
0.400000      70
0.500000      26
0.538462      25
Name: count, dtype: int64

In [865]:
print(test_data["DISCHARGED TIME OF RESIN(Stage3) Collect Result_Fill1_weight"].isnull().sum())

677


In [866]:
# 최빈값으로 결측치 처리
mode_value = test_data["DISCHARGED TIME OF RESIN(Stage3) Collect Result_Fill1_weight"].mode()[0]
test_data["DISCHARGED TIME OF RESIN(Stage3) Collect Result_Fill1_weight"].fillna(mode_value, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_data["DISCHARGED TIME OF RESIN(Stage3) Collect Result_Fill1_weight"].fillna(mode_value, inplace=True)


In [867]:
# 85. Dispense Volume(Stage1) Collect Result_Fill1
target_ratio = df_concat.groupby("Dispense Volume(Stage1) Collect Result_Fill1")["target"].mean()
print(target_ratio)

Dispense Volume(Stage1) Collect Result_Fill1
12.16    0.398936
12.25    0.424550
12.45    0.400000
12.52    0.550000
12.54    0.592593
12.63    0.555932
12.73    0.528107
12.82    0.532726
12.92    0.714286
Name: target, dtype: float64


In [868]:
# 82번과 동일한 값이므로 삭제
del df_concat["Dispense Volume(Stage1) Collect Result_Fill1"]
print(df_concat.shape)

(3404, 122)


In [869]:
# 86. Dispense Volume(Stage2) Collect Result_Fill1
target_ratio = df_concat.groupby("Dispense Volume(Stage2) Collect Result_Fill1")["target"].mean()
print(target_ratio)

Dispense Volume(Stage2) Collect Result_Fill1
3.32    0.468531
3.42    0.443601
3.51    0.554717
3.52    0.451613
3.60    0.750000
3.61    0.581633
3.70    0.812500
3.80    0.571429
3.99    1.000000
4.08    0.000000
4.46    0.333333
4.65    0.591045
4.75    0.610101
4.84    0.666667
Name: target, dtype: float64


In [870]:
del df_concat["Dispense Volume(Stage2) Collect Result_Fill1"]
print(df_concat.shape)

(3404, 121)


In [871]:
# 87. Dispense Volume(Stage3) Collect Result_Fill1
target_ratio = df_concat.groupby("Dispense Volume(Stage3) Collect Result_Fill1")["target"].mean()
print(target_ratio)

Dispense Volume(Stage3) Collect Result_Fill1
12.16    0.414815
12.25    0.415755
12.35    0.500000
12.44    0.538462
12.45    0.520000
12.52    0.400000
12.54    0.663551
12.63    0.517241
12.73    0.556391
12.82    0.523994
12.92    0.285714
Name: target, dtype: float64


In [872]:
# 84번과 동일한 값이므로 삭제
del df_concat["Dispense Volume(Stage3) Collect Result_Fill1"]
print(df_concat.shape)

(3404, 120)


In [873]:
# 88. HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill1
target_ratio = df_concat.groupby("HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill1")["target"].mean()
print(target_ratio)

HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill1
458.2    0.400000
458.5    0.477245
458.8    0.535632
459.2    0.714286
459.5    0.655502
460.0    0.597475
460.5    0.000000
837.4    0.419689
837.9    0.410345
838.4    0.414520
Name: target, dtype: float64


In [874]:
df_concat["HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill1_weight"] = df_concat["HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill1"].map(target_ratio)
df_concat["HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill1_weight"].value_counts()

HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill1_weight
0.477245    813
0.597475    713
0.410345    580
0.535632    435
0.414520    427
0.655502    209
0.419689    193
0.714286     28
0.400000      5
0.000000      1
Name: count, dtype: int64

In [875]:
del df_concat["HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill1"]
del df_concat["HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill1_weight"]
print(df_concat.shape)

(3404, 119)


In [876]:
# 89. HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1
target_ratio = df_concat.groupby("HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1")["target"].mean()
print(target_ratio)

HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1
157.0    0.519397
157.5    0.593506
157.7    0.571429
158.0    0.628571
457.6    0.709677
457.7    0.285714
457.8    0.453659
457.9    0.412371
458.1    0.333333
458.3    0.393103
458.4    0.403579
Name: target, dtype: float64


In [877]:
df_concat["HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1_weight"] = df_concat["HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1"].map(target_ratio)
df_concat["HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1_weight"].value_counts()

HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1_weight
0.519397    1392
0.593506     770
0.403579     503
0.393103     290
0.453659     205
0.412371      97
0.333333      60
0.628571      35
0.709677      31
0.285714      14
0.571429       7
Name: count, dtype: int64

In [878]:
test_data["HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1_weight"] = test_data["HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1"].map(target_ratio)
test_data["HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1_weight"].value_counts()

HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1_weight
0.519397    4736
0.393103    2912
0.403579    2118
0.593506    2092
0.453659     669
0.412371     409
0.333333     347
0.628571     154
0.709677      86
0.285714      32
0.571429      30
Name: count, dtype: int64

In [879]:
print(test_data["HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1_weight"].isnull().sum())

3776


In [880]:
del df_concat["HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1"]
del df_concat["HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1_weight"]
del test_data["HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1_weight"]
del test_data["HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill1"]

In [881]:
#90. HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1
target_ratio = df_concat.groupby("HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1")["target"].mean()
print(target_ratio)

HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1
156.1     0.333333
156.3     0.419689
156.5     0.381503
156.8     0.438953
157.0     0.414520
430.2     0.603465
430.5     0.474623
1323.2    0.534404
Name: target, dtype: float64


In [882]:
df_concat["HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1_weight"] = df_concat["HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1"].map(target_ratio)
df_concat["HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1_weight"].value_counts()

HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1_weight
0.603465    1039
0.474623     729
0.534404     436
0.414520     427
0.438953     344
0.419689     193
0.381503     173
0.333333      63
Name: count, dtype: int64

In [883]:
test_data["HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1_weight"] = test_data["HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1"].map(target_ratio)
test_data["HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1_weight"].value_counts()

HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1_weight
0.414520    6216
0.603465    2919
0.474623    2564
0.534404    1529
0.438953    1369
0.333333    1262
0.381503     749
0.419689     695
Name: count, dtype: int64

In [884]:
print(test_data["HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1_weight"].isnull().sum())

58


In [885]:
# 최빈값으로 결측치 처리
mode_value = test_data["HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1_weight"].mode()[0]
test_data["HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1_weight"].fillna(mode_value, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_data["HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill1_weight"].fillna(mode_value, inplace=True)


In [886]:
#91. HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill1
target_ratio = df_concat.groupby("HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill1")["target"].mean()
print(target_ratio)

HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill1
430.5     0.581277
430.8     0.405316
431.1     0.489002
1322.5    0.462882
1323.0    0.608187
1323.2    0.638889
1323.5    0.422371
1324.5    0.000000
Name: target, dtype: float64


In [887]:
df_concat["HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill1_weight"] = df_concat["HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill1"].map(target_ratio)
df_concat["HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill1_weight"].value_counts()

HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill1_weight
0.581277    1175
0.405316     602
0.422371     599
0.489002     591
0.462882     229
0.608187     171
0.638889      36
0.000000       1
Name: count, dtype: int64

In [888]:
del df_concat["HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill1"]
del df_concat["HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill1_weight"]
print(df_concat.shape)

(3404, 118)


In [889]:
# 92. HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1
target_ratio = df_concat.groupby("HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1")["target"].mean()
print(target_ratio)

HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1
430.5     0.528020
430.6     0.666667
430.8     0.474623
1322.5    0.422371
1322.8    0.525000
1323.2    0.638889
1324.0    0.000000
Name: target, dtype: float64


In [890]:
df_concat["HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1_weight"] = df_concat["HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1"].map(target_ratio)
df_concat["HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1_weight"].value_counts()

HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1_weight
0.528020    1606
0.474623     729
0.422371     599
0.525000     400
0.638889      36
0.666667      33
0.000000       1
Name: count, dtype: int64

In [891]:
del df_concat["HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1"]
del df_concat["HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill1_weight"]
print(df_concat.shape)

(3404, 117)


In [892]:
# 93. HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill
target_ratio = df_concat.groupby("HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill1")["target"].mean()
print(target_ratio)

HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill1
244.2     0.537797
244.3     0.550000
244.4     0.550000
244.6     0.000000
431.1     0.405000
1323.1    0.422371
1324.1    0.000000
Name: target, dtype: float64


In [893]:
df_concat["HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill1_weight"] = df_concat["HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill1"].map(target_ratio)
df_concat["HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill1_weight"].value_counts()

HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill1_weight
0.550000    1740
0.405000     600
0.422371     599
0.537797     463
0.000000       2
Name: count, dtype: int64

In [894]:
del df_concat["HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill1"]
del df_concat["HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill1_weight"]
print(df_concat.shape)

(3404, 116)


In [895]:
# 94. HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill
target_ratio = df_concat.groupby("HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill1")["target"].mean()
print(target_ratio)

HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill1
244.200    0.537797
244.275    0.421875
244.300    0.522565
244.375    0.436364
244.400    0.535575
244.415    0.351351
244.450    0.333333
244.490    0.178571
244.505    0.420875
244.543    0.411765
244.555    0.550000
244.600    0.727273
244.618    0.338710
244.728    0.416667
Name: target, dtype: float64


In [896]:
df_concat["HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill1_weight"] = df_concat["HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill1"].map(target_ratio)
df_concat["HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill1_weight"].value_counts()

HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill1_weight
0.535575    1279
0.522565     842
0.537797     463
0.420875     297
0.338710     186
0.421875      64
0.436364      55
0.411765      51
0.550000      40
0.351351      37
0.416667      36
0.178571      28
0.333333      15
0.727273      11
Name: count, dtype: int64

In [897]:
test_data["HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill1_weight"] = test_data["HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill1"].map(target_ratio)
test_data["HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill1_weight"].value_counts()

HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill1_weight
0.535575    5821
0.522565    4658
0.537797    1385
0.420875    1310
0.338710     805
0.351351     348
0.333333     259
0.436364     240
0.421875     225
0.416667     224
0.411765     207
0.178571     124
0.550000      78
0.727273       9
Name: count, dtype: int64

In [898]:
print(test_data["HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill1_weight"].isnull().sum())

1668


In [899]:
mean_value = test_data["HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill1_weight"].mean()
test_data["HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill1_weight"].fillna(mean_value, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_data["HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill1_weight"].fillna(mean_value, inplace=True)


In [900]:
# 95. HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill
target_ratio = df_concat.groupby("HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill1")["target"].mean()
print(target_ratio)

HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill1
244.200    0.537797
244.275    0.421875
244.300    0.522565
244.375    0.436364
244.400    0.535575
244.415    0.351351
244.450    0.333333
244.490    0.178571
244.505    0.420875
244.543    0.411765
244.555    0.550000
244.600    0.727273
244.618    0.338710
244.728    0.416667
Name: target, dtype: float64


In [901]:
del df_concat["HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill1"]
print(df_concat.shape)

(3404, 116)


In [902]:
# 96. HEAD Standby Position X Collect Result_Fill1
target_ratio = df_concat.groupby("HEAD Standby Position X Collect Result_Fill1")["target"].mean()
print(target_ratio)

HEAD Standby Position X Collect Result_Fill1
244.275    0.421875
244.300    0.435644
244.375    0.436364
244.400    0.465608
244.415    0.351351
244.450    0.333333
244.490    0.178571
244.505    0.420875
244.543    0.411765
244.555    0.550000
244.618    0.338710
244.728    0.416667
289.000    0.547187
Name: target, dtype: float64


In [903]:
df_concat["HEAD Standby Position X Collect Result_Fill1_weight"] = df_concat["HEAD Standby Position X Collect Result_Fill1"].map(target_ratio)
df_concat["HEAD Standby Position X Collect Result_Fill1_weight"].value_counts()

HEAD Standby Position X Collect Result_Fill1_weight
0.547187    2204
0.420875     297
0.435644     202
0.465608     189
0.338710     186
0.421875      64
0.436364      55
0.411765      51
0.550000      40
0.351351      37
0.416667      36
0.178571      28
0.333333      15
Name: count, dtype: int64

In [904]:
test_data["HEAD Standby Position X Collect Result_Fill1_weight"] = test_data["HEAD Standby Position X Collect Result_Fill1"].map(target_ratio)
test_data["HEAD Standby Position X Collect Result_Fill1_weight"].value_counts()

HEAD Standby Position X Collect Result_Fill1_weight
0.547187    7012
0.465608    2447
0.435644    2414
0.420875    1310
0.338710     805
0.351351     348
0.333333     259
0.436364     240
0.421875     225
0.416667     224
0.411765     207
0.178571     124
0.550000      78
Name: count, dtype: int64

In [905]:
print(test_data["HEAD Standby Position X Collect Result_Fill1_weight"].isnull().sum())

1668


In [906]:
mean_value = test_data["HEAD Standby Position X Collect Result_Fill1_weight"].mean()
test_data["HEAD Standby Position X Collect Result_Fill1_weight"].fillna(mean_value, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_data["HEAD Standby Position X Collect Result_Fill1_weight"].fillna(mean_value, inplace=True)


In [907]:
# 97. HEAD Standby Position Y Collect Result_Fill1
target_ratio = df_concat.groupby("HEAD Standby Position Y Collect Result_Fill1")["target"].mean()
print(target_ratio)

HEAD Standby Position Y Collect Result_Fill1
50     0.547187
289    0.413333
Name: target, dtype: float64


In [908]:
del df_concat["HEAD Standby Position Y Collect Result_Fill1"]
print(df_concat.shape)

(3404, 116)


In [909]:
# 98. HEAD Standby Position Z Collect Result_Fill1
target_ratio = df_concat.groupby("HEAD Standby Position Z Collect Result_Fill1")["target"].mean()
print(target_ratio)

HEAD Standby Position Z Collect Result_Fill1
0     0.547187
50    0.413333
Name: target, dtype: float64


In [910]:
del df_concat["HEAD Standby Position Z Collect Result_Fill1"]
print(df_concat.shape)

(3404, 115)


In [911]:
# 99. Head Clean Position X Collect Result_Fill1
target_ratio = df_concat.groupby("Head Clean Position X Collect Result_Fill1")["target"].mean()
print(target_ratio)

Head Clean Position X Collect Result_Fill1
0.0      0.413333
123.4    0.547187
Name: target, dtype: float64


In [912]:
del df_concat["Head Clean Position X Collect Result_Fill1"]
print(df_concat.shape)

(3404, 114)


In [913]:
# 100. Head Clean Position Y Collect Result_Fill1
target_ratio = df_concat.groupby("Head Clean Position Y Collect Result_Fill1")["target"].mean()
print(target_ratio)

Head Clean Position Y Collect Result_Fill1
50.0     0.547187
123.4    0.413333
Name: target, dtype: float64


In [914]:
del df_concat["Head Clean Position Y Collect Result_Fill1"]
print(df_concat.shape)

(3404, 113)


In [915]:
# 101. Head Clean Position Z Collect Result_Fill1
target_ratio = df_concat.groupby("Head Clean Position Z Collect Result_Fill1")["target"].mean()
print(target_ratio)

Head Clean Position Z Collect Result_Fill1
50.0    0.413333
92.2    0.547187
Name: target, dtype: float64


In [916]:
del df_concat["Head Clean Position Z Collect Result_Fill1"]
print(df_concat.shape)

(3404, 112)


In [917]:
# 102. Head Purge Position X Collect Result_Fill1
target_ratio = df_concat.groupby("Head Purge Position X Collect Result_Fill1")["target"].mean()
print(target_ratio)

Head Purge Position X Collect Result_Fill1
92.2     0.413333
289.0    0.547187
Name: target, dtype: float64


In [918]:
del df_concat["Head Purge Position X Collect Result_Fill1"]
print(df_concat.shape)

(3404, 111)


In [919]:
# 103. Head Purge Position Y Collect Result_Fill1
target_ratio = df_concat.groupby("Head Purge Position Y Collect Result_Fill1")["target"].mean()
print(target_ratio)

Head Purge Position Y Collect Result_Fill1
50     0.547187
289    0.413333
Name: target, dtype: float64


In [920]:
del df_concat["Head Purge Position Y Collect Result_Fill1"]
print(df_concat.shape)

(3404, 110)


In [921]:
# 104. Head Purge Position Z Collect Result_Fill1
target_ratio = df_concat.groupby("Head Purge Position Z Collect Result_Fill1")["target"].mean()
print(target_ratio)

Head Purge Position Z Collect Result_Fill1
50     0.413333
85     0.585818
145    0.497404
Name: target, dtype: float64


In [922]:
df_concat["Head Purge Position Z Collect Result_Fill1_weight"] = df_concat["Head Purge Position Z Collect Result_Fill1"].map(target_ratio)
df_concat["Head Purge Position Z Collect Result_Fill1_weight"].value_counts()

Head Purge Position Z Collect Result_Fill1_weight
0.585818    1241
0.413333    1200
0.497404     963
Name: count, dtype: int64

In [923]:
test_data["Head Purge Position Z Collect Result_Fill1_weight"] = test_data["Head Purge Position Z Collect Result_Fill1"].map(target_ratio)
test_data["Head Purge Position Z Collect Result_Fill1_weight"].value_counts()

Head Purge Position Z Collect Result_Fill1_weight
0.413333    10349
0.585818     3571
0.497404     3441
Name: count, dtype: int64

In [924]:
print(test_data["Head Purge Position Z Collect Result_Fill1_weight"].isnull().sum())

0


In [925]:
# 105. Machine Tact time Collect Result_Fill1
target_ratio = df_concat.groupby("Machine Tact time Collect Result_Fill1")["target"].mean()
print(target_ratio)

Machine Tact time Collect Result_Fill1
40.3     1.000000
48.0     0.000000
48.1     0.600000
48.2     0.333333
48.8     0.000000
           ...   
110.9    1.000000
111.0    0.833333
111.1    0.500000
128.0    0.413333
150.1    1.000000
Name: target, Length: 122, dtype: float64


In [926]:
del df_concat["Machine Tact time Collect Result_Fill1"]
print(df_concat.shape)

(3404, 110)


In [927]:
# 106. PalletID Collect Result_Fill1
target_ratio = df_concat.groupby("PalletID Collect Result_Fill1")["target"].mean()
print(target_ratio)

PalletID Collect Result_Fill1
1.0     0.561576
2.0     0.556962
3.0     0.556522
4.0     0.556604
5.0     0.534247
          ...   
61.9    0.000000
65.8    1.000000
69.1    0.000000
83.1    1.000000
91.9    0.000000
Name: target, Length: 103, dtype: float64


In [928]:
df_concat["PalletID Collect Result_Fill1_weight"] = df_concat["PalletID Collect Result_Fill1"].map(target_ratio)
df_concat["PalletID Collect Result_Fill1_weight"].value_counts()

PalletID Collect Result_Fill1_weight
0.550420    238
0.556962    237
0.556522    230
0.554054    222
0.534247    219
0.556604    212
0.531401    207
0.561576    203
0.500000     98
0.305263     95
0.505376     93
0.439024     82
0.521127     71
0.587302     63
0.698413     63
0.333333     63
0.250000     60
0.542373     59
0.370370     54
0.452830     53
0.340426     47
0.369565     46
0.304348     46
0.441860     43
0.435897     39
0.684211     38
0.297297     37
0.305556     36
0.468750     32
0.406250     32
0.516129     31
0.354839     31
0.300000     30
0.407407     27
0.444444     27
0.423077     26
0.750000     24
0.476190     21
0.400000     20
1.000000     16
0.600000     15
0.571429     14
0.428571     14
0.642857     14
0.384615     13
0.461538     13
0.000000     12
0.454545     11
0.222222      9
0.666667      9
0.111111      9
Name: count, dtype: int64

In [929]:
test_data["PalletID Collect Result_Fill1_weight"] = test_data["PalletID Collect Result_Fill1"].map(target_ratio)
test_data["PalletID Collect Result_Fill1_weight"].value_counts()

PalletID Collect Result_Fill1_weight
0.500000    773
0.554054    701
0.556522    693
0.550420    689
0.556962    687
0.531401    682
0.561576    679
0.534247    677
0.556604    675
0.305263    672
0.505376    500
0.435897    424
0.439024    421
0.111111    339
0.333333    333
0.250000    282
0.370370    277
0.750000    262
0.305556    243
0.407407    232
0.587302    226
0.400000    224
0.304348    224
0.369565    217
0.698413    214
0.000000    213
0.461538    206
0.542373    201
0.521127    190
0.441860    189
0.516129    181
0.452830    180
0.600000    176
0.300000    170
0.428571    161
0.468750    157
0.354839    154
0.340426    148
0.297297    148
1.000000    122
0.684211    105
0.384615    101
0.406250    100
0.444444     92
0.423077     90
0.476190     86
0.571429     78
0.642857     71
0.454545     60
0.666667     51
0.222222     37
Name: count, dtype: int64

In [930]:
print(test_data["PalletID Collect Result_Fill1_weight"].isnull().sum())

2548


In [931]:
mean_value = test_data["PalletID Collect Result_Fill1_weight"].mean()
test_data["PalletID Collect Result_Fill1_weight"].fillna(mean_value, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_data["PalletID Collect Result_Fill1_weight"].fillna(mean_value, inplace=True)


In [932]:
#107. Production Qty Collect Result_Fill1
target_ratio = df_concat.groupby("Production Qty Collect Result_Fill1")["target"].mean()
print(target_ratio)

Production Qty Collect Result_Fill1
0      0.714286
1      0.428571
2      0.470588
3      0.473118
4      0.350649
         ...   
465    0.000000
468    0.000000
470    1.000000
485    1.000000
487    1.000000
Name: target, Length: 426, dtype: float64


In [933]:
del df_concat["Production Qty Collect Result_Fill1"]
print(df_concat.shape)

(3404, 110)


In [934]:
# 108. Receip No Collect Result_Fill1
target_ratio = df_concat.groupby("Receip No Collect Result_Fill1")["target"].mean()
print(target_ratio)

Receip No Collect Result_Fill1
0      0.714286
1      0.547349
2      1.000000
3      0.333333
4      0.750000
         ...   
591    0.000000
596    0.000000
597    0.000000
599    0.000000
608    0.000000
Name: target, Length: 470, dtype: float64


In [935]:
del df_concat["Receip No Collect Result_Fill1"]
print(df_concat.shape)

(3404, 109)


In [936]:
# 109WorkMode Collect Result_Fill1
target_ratio = df_concat.groupby("WorkMode Collect Result_Fill1")["target"].mean()
print(target_ratio)

WorkMode Collect Result_Fill1
1    0.397645
6    0.600000
7    0.547187
9    0.000000
Name: target, dtype: float64


In [937]:
df_concat["WorkMode Collect Result_Fill1_weight"] = df_concat["WorkMode Collect Result_Fill1"].map(target_ratio)
df_concat["WorkMode Collect Result_Fill1_weight"].value_counts()

WorkMode Collect Result_Fill1_weight
0.547187    2204
0.397645    1104
0.600000      95
0.000000       1
Name: count, dtype: int64

In [938]:
del df_concat["WorkMode Collect Result_Fill1"]
del df_concat["WorkMode Collect Result_Fill1_weight"]
print(df_concat.shape)

(3404, 108)


In [939]:
# 110. Equipment_Fill2
target_ratio = df_concat.groupby("Equipment_Fill2")["target"].mean()
print(target_ratio)

Equipment_Fill2
Fill2 dispenser #1    0.513102
Fill2 dispenser #2    0.470135
Name: target, dtype: float64


In [940]:
df_concat["Equipment_Fill2_weight"] = df_concat["Equipment_Fill2"].map(target_ratio)
df_concat["Equipment_Fill2_weight"].value_counts()

Equipment_Fill2_weight
0.513102    2366
0.470135    1038
Name: count, dtype: int64

In [941]:
test_data["Equipment_Fill2_weight"] = test_data["Equipment_Fill2"].map(target_ratio)
test_data["Equipment_Fill2_weight"].value_counts()

Equipment_Fill2_weight
0.513102    10740
0.470135     6621
Name: count, dtype: int64

In [942]:
print(test_data["Equipment_Fill2_weight"].isnull().sum())

0


In [943]:
# 111. Model.Suffix_Fill2
target_ratio = df_concat.groupby("Model.Suffix_Fill2")["target"].mean()
print(target_ratio)

Model.Suffix_Fill2
AJX75334501    0.497020
AJX75334502    0.543909
AJX75334503    0.000000
AJX75334505    0.200000
AJX75334507    0.444444
AJX75334508    0.333333
Name: target, dtype: float64


In [944]:
del df_concat["Model.Suffix_Fill2"]
print(df_concat.shape)

(3404, 108)


In [945]:
# 112. Workorder_Fill2
target_ratio = df_concat.groupby("Workorder_Fill2")["target"].mean()
print(target_ratio)

Workorder_Fill2
3F1X5847-2    0.857143
3F1X9643-1    0.500000
3F1X9644-1    0.615385
3F1X9648-1    0.555556
3F1X9648-2    0.250000
                ...   
4E1X0057-1    0.400000
4E1X9167-1    0.125000
4E1X9168-1    0.500000
4E1X9169-1    0.500000
4E1X9170-1    0.600000
Name: target, Length: 421, dtype: float64


In [946]:
del df_concat["Workorder_Fill2"]
print(df_concat.shape)

(3404, 107)


In [947]:
# 113. CURE END POSITION X Collect Result_Fill2
target_ratio = df_concat.groupby("CURE END POSITION X Collect Result_Fill2")["target"].mean()
print(target_ratio)

CURE END POSITION X Collect Result_Fill2
240    0.5
Name: target, dtype: float64


In [948]:
del df_concat["CURE END POSITION X Collect Result_Fill2"]
print(df_concat.shape)

(3404, 106)


In [949]:
# 114. CURE END POSITION Z Collect Result_Fill2
target_ratio = df_concat.groupby("CURE END POSITION Z Collect Result_Fill2")["target"].mean()
print(target_ratio)

CURE END POSITION Z Collect Result_Fill2
22    1.000000
32    0.592896
33    0.455295
Name: target, dtype: float64


In [950]:
df_concat["CURE END POSITION Z Collect Result_Fill2_weight"] = df_concat["CURE END POSITION Z Collect Result_Fill2"].map(target_ratio)
df_concat["CURE END POSITION Z Collect Result_Fill2_weight"].value_counts()

CURE END POSITION Z Collect Result_Fill2_weight
0.455295    2304
0.592896    1098
1.000000       2
Name: count, dtype: int64

In [951]:
del df_concat["CURE END POSITION Z Collect Result_Fill2"]
del df_concat["CURE END POSITION Z Collect Result_Fill2_weight"]
print(df_concat.shape)

(3404, 105)


In [952]:
# 115. CURE SPEED Collect Result_Fill2
target_ratio = df_concat.groupby("CURE SPEED Collect Result_Fill2")["target"].mean()
print(target_ratio)

CURE SPEED Collect Result_Fill2
50    0.497131
51    0.569231
55    0.678571
Name: target, dtype: float64


In [953]:
df_concat["CURE SPEED Collect Result_Fill2_weight"] = df_concat["CURE SPEED Collect Result_Fill2"].map(target_ratio)
df_concat["CURE SPEED Collect Result_Fill2_weight"].value_counts()

CURE SPEED Collect Result_Fill2_weight
0.497131    3311
0.569231      65
0.678571      28
Name: count, dtype: int64

In [954]:
test_data["CURE SPEED Collect Result_Fill2_weight"] = test_data["CURE SPEED Collect Result_Fill2"].map(target_ratio)
test_data["CURE SPEED Collect Result_Fill2_weight"].value_counts()

CURE SPEED Collect Result_Fill2_weight
0.497131    14853
0.569231      191
0.678571      149
Name: count, dtype: int64

In [955]:
print(test_data["CURE SPEED Collect Result_Fill2_weight"].isnull().sum())

2168


In [956]:
# 최빈값으로 결측치 처리
mode_value = test_data["CURE SPEED Collect Result_Fill2_weight"].mode()[0]
test_data["CURE SPEED Collect Result_Fill2_weight"].fillna(mode_value, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_data["CURE SPEED Collect Result_Fill2_weight"].fillna(mode_value, inplace=True)


In [957]:
# 116. CURE STANDBY POSITION Z Collect Result_Fill2
target_ratio = df_concat.groupby("CURE STANDBY POSITION Z Collect Result_Fill2")["target"].mean()
print(target_ratio)

CURE STANDBY POSITION Z Collect Result_Fill2
22    0.563636
23    0.550000
32    0.568151
33    0.478193
Name: target, dtype: float64


In [958]:
df_concat["CURE STANDBY POSITION Z Collect Result_Fill2_weight"] = df_concat["CURE STANDBY POSITION Z Collect Result_Fill2"].map(target_ratio)
df_concat["CURE STANDBY POSITION Z Collect Result_Fill2_weight"].value_counts()

CURE STANDBY POSITION Z Collect Result_Fill2_weight
0.478193    2568
0.568151     741
0.563636      55
0.550000      40
Name: count, dtype: int64

In [959]:
test_data["CURE STANDBY POSITION Z Collect Result_Fill2_weight"] = test_data["CURE STANDBY POSITION Z Collect Result_Fill2"].map(target_ratio)
test_data["CURE STANDBY POSITION Z Collect Result_Fill2_weight"].value_counts()

CURE STANDBY POSITION Z Collect Result_Fill2_weight
0.478193    14889
0.568151     2191
0.563636      189
0.550000       92
Name: count, dtype: int64

In [960]:
print(test_data["CURE STANDBY POSITION Z Collect Result_Fill2_weight"].isnull().sum())

0


In [961]:
# 117. CURE START POSITION X Collect Result_Fill2
target_ratio = df_concat.groupby("CURE START POSITION X Collect Result_Fill2")["target"].mean()
print(target_ratio)

CURE START POSITION X Collect Result_Fill2
1020    0.5
Name: target, dtype: float64


In [962]:
del df_concat["CURE START POSITION X Collect Result_Fill2"]
print(df_concat.shape)

(3404, 106)


In [963]:
#118. CURE START POSITION Z Collect Result_Fill2
target_ratio = df_concat.groupby("CURE START POSITION Z Collect Result_Fill2")["target"].mean()
print(target_ratio)

CURE START POSITION Z Collect Result_Fill2
22    0.563636
23    0.550000
32    0.568151
33    0.478193
Name: target, dtype: float64


In [964]:
# 116번과 동일한 값이므로 삭제
del df_concat["CURE START POSITION Z Collect Result_Fill2"]
print(df_concat.shape)

(3404, 105)


In [965]:
# 119. HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill2
target_ratio = df_concat.groupby("HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill2")["target"].mean()
print(target_ratio)

HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill2
305.0    0.423588
458.0    0.550339
499.8    0.534404
835.5    0.403010
Name: target, dtype: float64


In [966]:
df_concat["HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill2_weight"] = df_concat["HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill2"].map(target_ratio)
df_concat["HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill2_weight"].value_counts()

HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill2_weight
0.550339    1768
0.423588     602
0.403010     598
0.534404     436
Name: count, dtype: int64

In [967]:
test_data["HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill2_weight"] = test_data["HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill2"].map(target_ratio)
test_data["HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill2_weight"].value_counts()

HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill2_weight
0.550339    5482
0.403010    5258
0.423588    4682
0.534404    1530
Name: count, dtype: int64

In [968]:
print(test_data["HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill2_weight"].isnull().sum())

409


In [969]:
mean_value = test_data["HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill2_weight"].mean()
test_data["HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill2_weight"].fillna(mean_value, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_data["HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill2_weight"].fillna(mean_value, inplace=True)


In [970]:
# 120. HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill
target_ratio = df_concat.groupby("HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill2")["target"].mean()
print(target_ratio)

HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill2
156.0    0.550339
458.0    0.403010
499.8    0.423588
694.0    0.534404
Name: target, dtype: float64


In [971]:
# 119번과 동일한 값이므로 삭제
del df_concat["HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill2"]
print(df_concat.shape)

(3404, 105)


In [972]:
# 121. HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill2
target_ratio = df_concat.groupby("HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill2")["target"].mean()
print(target_ratio)

HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill2
156.0     0.403010
428.0     0.550339
694.0     0.423588
1324.2    0.534404
Name: target, dtype: float64


In [973]:
# 119번과 동일한 값이므로 삭제
del df_concat["HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill2"]
print(df_concat.shape)

(3404, 104)


In [974]:
# 122. HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill2
target_ratio = df_concat.groupby("HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill2")["target"].mean()
print(target_ratio)

HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill2
427.9     0.550339
428.0     0.403010
1324.2    0.470135
Name: target, dtype: float64


In [975]:
df_concat["HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill2_weight"] = df_concat["HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill2"].map(target_ratio)
df_concat["HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill2_weight"].value_counts()

HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill2_weight
0.550339    1768
0.470135    1038
0.403010     598
Name: count, dtype: int64

In [976]:
test_data["HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill2_weight"] = test_data["HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill2"].map(target_ratio)
test_data["HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill2_weight"].value_counts()

HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill2_weight
0.470135    6621
0.550339    5482
0.403010    5258
Name: count, dtype: int64

In [977]:
print(test_data["HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill2_weight"].isnull().sum())

0


In [978]:
#123. HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill2
target_ratio = df_concat.groupby("HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill2")["target"].mean()
print(target_ratio)

HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill2
427.9     0.403010
428.0     0.550339
1324.2    0.470135
Name: target, dtype: float64


In [979]:
# 122번과 동일한 값이므로 삭제
del df_concat["HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill2"]
print(df_concat.shape)

(3404, 104)


In [980]:
# 124. HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill2
target_ratio = df_concat.groupby("HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill2")["target"].mean()
print(target_ratio)

HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill2
243.5     0.534404
243.7     0.550339
428.0     0.403010
1324.2    0.423588
Name: target, dtype: float64


In [981]:
# 119번과 동일한 값이므로 삭제
del df_concat["HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill2"]
print(df_concat.shape)

(3404, 103)


In [982]:
# 125. HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill2
target_ratio = df_concat.groupby("HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill2")["target"].mean()
print(target_ratio)

HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill2
243.5    0.470135
243.7    0.513102
Name: target, dtype: float64


In [983]:
del df_concat["HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill2"]
print(df_concat.shape)

(3404, 102)


In [984]:
# 126. HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill
target_ratio = df_concat.groupby("HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill2")["target"].mean()
print(target_ratio)

HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill2
243.5    0.470135
243.7    0.513102
Name: target, dtype: float64


In [985]:
del df_concat["HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill2"]
print(df_concat.shape)

(3404, 101)


In [986]:
# 127. HEAD Standby Position X Collect Result_Fill2
target_ratio = df_concat.groupby("HEAD Standby Position X Collect Result_Fill2")["target"].mean()
print(target_ratio)

HEAD Standby Position X Collect Result_Fill2
243.5    0.423588
243.7    0.403010
270.0    0.547187
Name: target, dtype: float64


In [987]:
df_concat["HEAD Standby Position X Collect Result_Fill2_weight"] = df_concat["HEAD Standby Position X Collect Result_Fill2"].map(target_ratio)
df_concat["HEAD Standby Position X Collect Result_Fill2_weight"].value_counts()

HEAD Standby Position X Collect Result_Fill2_weight
0.547187    2204
0.423588     602
0.403010     598
Name: count, dtype: int64

In [988]:
test_data["HEAD Standby Position X Collect Result_Fill2_weight"] = test_data["HEAD Standby Position X Collect Result_Fill2"].map(target_ratio)
test_data["HEAD Standby Position X Collect Result_Fill2_weight"].value_counts()

HEAD Standby Position X Collect Result_Fill2_weight
0.547187    7012
0.403010    5258
0.423588    5091
Name: count, dtype: int64

In [989]:
print(test_data["HEAD Standby Position X Collect Result_Fill2_weight"].isnull().sum())

0


In [990]:
# 128. HEAD Standby Position Y Collect Result_Fill2
target_ratio = df_concat.groupby("HEAD Standby Position Y Collect Result_Fill2")["target"].mean()
print(target_ratio)

HEAD Standby Position Y Collect Result_Fill2
50     0.547187
270    0.413333
Name: target, dtype: float64


In [991]:
df_concat["HEAD Standby Position Y Collect Result_Fill2_weight"] = df_concat["HEAD Standby Position Y Collect Result_Fill2"].map(target_ratio)
df_concat["HEAD Standby Position Y Collect Result_Fill2_weight"].value_counts()

HEAD Standby Position Y Collect Result_Fill2_weight
0.547187    2204
0.413333    1200
Name: count, dtype: int64

In [992]:
test_data["HEAD Standby Position Y Collect Result_Fill2_weight"] = test_data["HEAD Standby Position Y Collect Result_Fill2"].map(target_ratio)
test_data["HEAD Standby Position Y Collect Result_Fill2_weight"].value_counts()

HEAD Standby Position Y Collect Result_Fill2_weight
0.413333    10349
0.547187     7012
Name: count, dtype: int64

In [993]:
print(test_data["HEAD Standby Position Y Collect Result_Fill2_weight"].isnull().sum())

0


In [994]:
# 129. HEAD Standby Position Z Collect Result_Fill2
target_ratio = df_concat.groupby("HEAD Standby Position Z Collect Result_Fill2")["target"].mean()
print(target_ratio)

HEAD Standby Position Z Collect Result_Fill2
-10    0.547187
 50    0.413333
Name: target, dtype: float64


In [995]:
# 128번과 동일한 값이므로 삭제
del df_concat["HEAD Standby Position Z Collect Result_Fill2"]
print(df_concat.shape)

(3404, 102)


In [996]:
# 130. Head Clean Position X Collect Result_Fill2
target_ratio = df_concat.groupby("Head Clean Position X Collect Result_Fill2")["target"].mean()
print(target_ratio)

Head Clean Position X Collect Result_Fill2
-10     0.413333
 119    0.547187
Name: target, dtype: float64


In [997]:
# 128번과 동일한 값이므로 삭제
del df_concat["Head Clean Position X Collect Result_Fill2"]
print(df_concat.shape)

(3404, 101)


In [998]:
# 131. Head Clean Position Y Collect Result_Fill2
target_ratio = df_concat.groupby("Head Clean Position Y Collect Result_Fill2")["target"].mean()
print(target_ratio)

Head Clean Position Y Collect Result_Fill2
50     0.547187
119    0.413333
Name: target, dtype: float64


In [999]:
# 128번과 동일한 값이므로 삭제
del df_concat["Head Clean Position Y Collect Result_Fill2"]
print(df_concat.shape)

(3404, 100)


In [1000]:
# 132. Head Clean Position Z Collect Result
target_ratio = df_concat.groupby("Head Clean Position Z Collect Result_Fill2")["target"].mean()
print(target_ratio)

Head Clean Position Z Collect Result_Fill2
50.0    0.413333
91.8    0.547187
Name: target, dtype: float64


In [1001]:
# 128번과 동일한 값이므로 삭제
del df_concat["Head Clean Position Z Collect Result_Fill2"]
print(df_concat.shape)

(3404, 99)


In [1002]:
# 133. Head Purge Position X Collect Result_Fill2
target_ratio = df_concat.groupby("Head Purge Position X Collect Result_Fill2")["target"].mean()
print(target_ratio)

Head Purge Position X Collect Result_Fill2
91.8     0.413333
270.0    0.547187
Name: target, dtype: float64


In [1003]:
# 128번과 동일한 값이므로 삭제
del df_concat["Head Purge Position X Collect Result_Fill2"]
print(df_concat.shape)

(3404, 98)


In [1004]:
# 134. Head Purge Position Y Collect Result_Fill2
target_ratio = df_concat.groupby("Head Purge Position Y Collect Result_Fill2")["target"].mean()
print(target_ratio)

Head Purge Position Y Collect Result_Fill2
50     0.547187
270    0.413333
Name: target, dtype: float64


In [1005]:
# 128번과 동일한 값이므로 삭제
del df_concat["Head Purge Position Y Collect Result_Fill2"]
print(df_concat.shape)

(3404, 97)


In [1006]:
# 135. Head Purge Position Z Collect Result_Fill2
target_ratio = df_concat.groupby("Head Purge Position Z Collect Result_Fill2")["target"].mean()
print(target_ratio)

Head Purge Position Z Collect Result_Fill2
50    0.413333
85    0.547187
Name: target, dtype: float64


In [1007]:
# 128번과 동일한 값이므로 삭제
del df_concat["Head Purge Position Z Collect Result_Fill2"]
print(df_concat.shape)

(3404, 96)


In [1008]:
# 136. Machine Tact time Collect Result_Fill2
target_ratio = df_concat.groupby("Machine Tact time Collect Result_Fill2")["target"].mean()
print(target_ratio)

Machine Tact time Collect Result_Fill2
18.000     1.000000
18.100     1.000000
18.200     0.500000
18.300     0.500000
18.400     0.642857
18.500     0.407407
18.600     0.610169
18.700     0.597561
18.800     0.607477
18.900     0.592000
19.000     0.625850
19.100     0.595745
19.200     0.529412
19.300     0.574074
19.400     0.604317
19.500     0.487179
19.600     0.463277
19.700     0.492958
19.800     0.476821
19.900     0.570248
20.000     0.542553
20.100     0.450704
20.200     0.533333
20.300     0.666667
20.400     0.444444
20.500     0.750000
20.600     0.500000
20.700     1.000000
20.800     0.250000
21.000     1.000000
85.000     0.415596
114.612    0.333333
Name: target, dtype: float64


In [1009]:
df_concat["Machine Tact time Collect Result_Fill2_weight"] = df_concat["Machine Tact time Collect Result_Fill2"].map(target_ratio)
df_concat["Machine Tact time Collect Result_Fill2_weight"].value_counts()

Machine Tact time Collect Result_Fill2_weight
0.415596    1167
0.487179     195
0.463277     177
0.574074     162
0.529412     153
0.476821     151
0.625850     147
0.492958     142
0.595745     141
0.604317     139
0.592000     125
0.570248     121
0.607477     107
0.542553      94
0.597561      82
0.450704      71
0.610169      59
0.333333      33
0.533333      30
0.407407      27
0.500000      20
0.666667      18
0.642857      14
0.444444       9
0.750000       8
1.000000       8
0.250000       4
Name: count, dtype: int64

In [1010]:
test_data["Machine Tact time Collect Result_Fill2_weight"] = test_data["Machine Tact time Collect Result_Fill2"].map(target_ratio)
test_data["Machine Tact time Collect Result_Fill2_weight"].value_counts()

Machine Tact time Collect Result_Fill2_weight
0.415596    8150
0.333333    2199
0.487179     592
0.463277     572
0.492958     533
0.604317     527
0.476821     519
0.574074     512
0.529412     486
0.570248     423
0.595745     409
0.592000     356
0.625850     350
0.542553     344
0.607477     304
0.450704     245
0.597561     236
0.610169     160
0.533333     109
0.407407      80
0.500000      57
0.642857      44
0.666667      42
1.000000      31
0.444444      30
0.750000      21
Name: count, dtype: int64

In [1011]:
print(test_data["Machine Tact time Collect Result_Fill2_weight"].isnull().sum())

30


In [1012]:
# 최빈값으로 결측치 처리
mode_value = test_data["Machine Tact time Collect Result_Fill2_weight"].mode()[0]
test_data["Machine Tact time Collect Result_Fill2_weight"].fillna(mode_value, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_data["Machine Tact time Collect Result_Fill2_weight"].fillna(mode_value, inplace=True)


In [1013]:
# 137 PalletID Collect Result_Fill2
target_ratio = df_concat.groupby("PalletID Collect Result_Fill2")["target"].mean()
print(target_ratio)

PalletID Collect Result_Fill2
1.0     0.563725
2.0     0.560669
3.0     0.556522
4.0     0.554502
5.0     0.532110
6.0     0.552036
7.0     0.531401
8.0     0.550420
9.0     0.442308
10.0    0.557377
11.0    0.514286
12.0    0.454545
13.0    0.587302
14.0    0.304348
15.0    0.693548
16.0    0.684211
19.1    0.363636
19.2    0.312500
19.3    0.413793
19.4    0.424658
19.5    0.428571
19.6    0.369863
19.7    0.376623
19.8    0.409396
19.9    0.428571
20.0    0.415730
20.1    0.457831
20.2    0.476923
20.3    0.413793
20.4    0.555556
20.5    1.000000
20.6    1.000000
20.7    0.500000
20.8    0.500000
20.9    0.000000
21.0    0.666667
21.2    1.000000
21.4    1.000000
Name: target, dtype: float64


In [1014]:
df_concat["PalletID Collect Result_Fill2_weight"] = df_concat["PalletID Collect Result_Fill2"].map(target_ratio)
df_concat["PalletID Collect Result_Fill2_weight"].value_counts()

PalletID Collect Result_Fill2_weight
0.428571    273
0.560669    239
0.550420    238
0.556522    230
0.552036    221
0.532110    218
0.554502    211
0.531401    207
0.563725    204
0.376623    154
0.409396    149
0.369863    146
0.415730     89
0.413793     87
0.457831     83
0.424658     73
0.514286     70
0.476923     65
0.587302     63
0.693548     62
0.557377     61
0.442308     52
0.312500     48
0.304348     46
0.454545     44
0.684211     38
0.363636     11
0.555556      9
1.000000      5
0.500000      4
0.666667      3
0.000000      1
Name: count, dtype: int64

In [1015]:
del df_concat["PalletID Collect Result_Fill2"]
del df_concat["PalletID Collect Result_Fill2_weight"]
print(df_concat.shape)

(3404, 96)


In [1016]:
# 138. Production Qty Collect Result_Fill2
target_ratio = df_concat.groupby("Production Qty Collect Result_Fill2")["target"].mean()
print(target_ratio)

Production Qty Collect Result_Fill2
0      0.714286
1      0.428571
2      0.465347
3      0.467391
4      0.350649
         ...   
465    0.000000
468    0.000000
470    1.000000
485    1.000000
487    1.000000
Name: target, Length: 427, dtype: float64


In [1017]:
del df_concat["Production Qty Collect Result_Fill2"]
print(df_concat.shape)

(3404, 95)


In [1018]:
# 139. Receip No Collect Result_Fill2
target_ratio = df_concat.groupby("Receip No Collect Result_Fill2")["target"].mean()
print(target_ratio)

Receip No Collect Result_Fill2
0      0.666667
1      0.547349
2      1.000000
3      0.333333
4      0.750000
         ...   
591    0.000000
596    0.000000
597    0.000000
599    0.000000
608    0.000000
Name: target, Length: 470, dtype: float64


In [1019]:
del df_concat["Receip No Collect Result_Fill2"]
print(df_concat.shape)

(3404, 94)


In [1020]:
# 140. WorkMode Collect Result_Fill2
target_ratio = df_concat.groupby("WorkMode Collect Result_Fill2")["target"].mean()
print(target_ratio)

WorkMode Collect Result_Fill2
0    0.547187
1    0.397645
6    0.600000
9    0.000000
Name: target, dtype: float64


In [1021]:
df_concat["WorkMode Collect Result_Fill2_weight"] = df_concat["WorkMode Collect Result_Fill2"].map(target_ratio)
df_concat["WorkMode Collect Result_Fill2_weight"].value_counts()

WorkMode Collect Result_Fill2_weight
0.547187    2204
0.397645    1104
0.600000      95
0.000000       1
Name: count, dtype: int64

In [1022]:
del df_concat["WorkMode Collect Result_Fill2"]
del df_concat["WorkMode Collect Result_Fill2_weight"]
print(df_concat.shape)

(3404, 93)
