# 제품 이상여부 판별 프로젝트

## 1. 데이터 불러오기

### 필수 라이브러리

In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

### 데이터 읽어오기

In [5]:
RANDOM_STATE = 110

train_data = pd.read_csv("C:/Users/KimDongyoung/Desktop/git_LGaimers5/Lg_aimers5/data/train_data.csv")
test_data = pd.read_csv("C:/Users/KimDongyoung/Desktop/git_LGaimers5/Lg_aimers5/data/test_data.csv")

In [6]:
def plot_box(dataframe, column_name):
    """
    주어진 데이터프레임과 열 이름에 대해 박스 플롯을 그리는 함수.

    Parameters:
    dataframe (pd.DataFrame): 데이터프레임
    column_name (str): 열 이름
    """
    plt.figure(figsize=(10, 6))
    plt.boxplot(dataframe[column_name], vert=False)
    plt.xlabel(column_name)
    plt.title(f'Box Plot of {column_name}')
    plt.show()

In [7]:
def value_counts_ratio_count(df, col_name, target_name):
    """
    주어진 데이터프레임의 특정 열에 대해 각 값마다 타겟 변수의 비율과 갯수, 총 갯수를 출력하는 함수.

    Parameters:
    df (pd.DataFrame): 데이터프레임
    col_name (str): 열 이름
    target_name (str): 타겟 변수 이름
    """
    # 각 값마다 타겟 변수의 비율 계산
    value_counts = df.groupby(col_name)[target_name].value_counts(normalize=True).unstack().fillna(0)
    
    # 각 값마다 타겟 변수의 갯수 계산
    counts = df.groupby(col_name)[target_name].value_counts().unstack().fillna(0)
    
    # 각 값마다 총 갯수 계산
    total_counts = df[col_name].value_counts().rename('Total_Count')
    
    # 비율과 갯수를 합침
    result = value_counts.join(counts, lsuffix='_ratio', rsuffix='_count')
    
    # 총 갯수를 합침
    result = result.join(total_counts, on=col_name)
    
    # 출력 형식 조정
    result.index.name = 'variable'
    print(f"\n{col_name}별 {target_name} 비율 및 갯수\n")
    print(result.rename(columns=lambda x: x.split('_')[0]))

In [8]:
def summarize_grouped_data(df, group_by_columns):
    # 데이터프레임을 그룹화
    grouped_df = df.groupby(group_by_columns)
    
    # 결과를 저장할 리스트 초기화
    results = []
    
    # 그룹화된 데이터프레임의 내용을 확인하는 코드
    for name, group in grouped_df:
        # 그룹의 갯수 계산
        group_count = group.shape[0]
        
        # 'target' 변수의 'AdNormal' 비율과 갯수 계산
        adnormal_count = group['target'].value_counts().get('AbNormal', 0)
        adnormal_ratio = adnormal_count / group_count
        
        # 결과 리스트에 추가
        results.append([name, adnormal_count, adnormal_ratio, group_count])
    
    # 결과 리스트를 데이터프레임으로 변환
    results_df = pd.DataFrame(results, columns=['group', "'AdNormal' count", 'ratio', 'Total'])
    
    # 그룹화된 변수들의 이름을 제목행으로 출력
    print(f"Grouped by: {', '.join(group_by_columns)}")
    print()
    # 데이터프레임 출력
    print(results_df)
    
    return results_df

In [10]:
def plot_abnormal_ratio(dataframe, column_name, target_name, target_value, bins=20):
    """
    주어진 데이터프레임의 특정 열에 대해 각 값마다 타겟 변수의 특정 값 비율을 계산하고 막대그래프로 표시하는 함수.

    Parameters:
    dataframe (pd.DataFrame): 데이터프레임
    column_name (str): 열 이름
    target_name (str): 타겟 변수 이름
    target_value (str): 타겟 변수의 특정 값
    bins (int): 구간의 수 (기본값은 20)
    """
    def abnormal_ratio(dataframe, column_name, target_name, target_value):
        """
        주어진 데이터프레임의 특정 열에 대해 각 값마다 타겟 변수의 특정 값 비율을 계산하는 함수.

        Parameters:
        dataframe (pd.DataFrame): 데이터프레임
        column_name (str): 열 이름
        target_name (str): 타겟 변수 이름
        target_value (str): 타겟 변수의 특정 값

        Returns:
        pd.DataFrame: 각 값마다 타겟 변수의 특정 값 비율을 포함하는 데이터프레임
        """
        # 각 값마다 타겟 변수의 특정 값 비율 계산
        value_counts = dataframe.groupby(column_name)[target_name].apply(lambda x: (x == target_value).mean()).reset_index()
        count_counts = dataframe.groupby(column_name)[target_name].count().reset_index()
        
        value_counts.columns = [column_name, 'ratio']
        count_counts.columns = [column_name, 'count']
        
        # 비율과 카운트를 병합
        result = pd.merge(value_counts, count_counts, on=column_name)
        return result

    # column_name 값을 지정된 구간으로 나누기
    dataframe[f'{column_name}_bins'] = pd.cut(dataframe[column_name], bins=bins)

    # 비율 계산
    ratios = abnormal_ratio(dataframe, f'{column_name}_bins', target_name, target_value)

    # 막대그래프 그리기
    plt.figure(figsize=(20, 10))
    barplot = sns.barplot(x=f'{column_name}_bins', y='ratio', data=ratios, color='skyblue')
    plt.xlabel(f'{column_name} (binned)')
    plt.ylabel('AbNormal Ratio')
    plt.title(f'AbNormal Ratio by {column_name} (binned)', pad=30)  # 제목과 그래프 사이의 간격 조정
    plt.xticks(rotation=45)
    plt.ylim(0, 1)

    # 각 막대 위에 비율 값과 카운트 표시
    for p in barplot.patches:
        # 막대의 x 좌표에 해당하는 구간을 찾기
        bin_label = ratios[f'{column_name}_bins'].cat.categories[int(p.get_x() + p.get_width() / 2) - 1]
        count_value = ratios.loc[ratios[f'{column_name}_bins'] == bin_label, 'count'].values[0]
        barplot.annotate(f'{format(p.get_height(), ".2f")} ({count_value})', 
                         (p.get_x() + p.get_width() / 2., p.get_height()), 
                         ha='center', va='center', 
                         xytext=(0, 9), 
                         textcoords='offset points')

    plt.show()

    # _bins 변수 드랍
    dataframe.drop(columns=[f'{column_name}_bins'], inplace=True)

## Fill2

In [11]:
# '_Fill2'를 포함하는 열 이름 필터링
Process_Desc_col = train_data.filter(like='_Fill2').columns

# 필터링된 열 이름 출력
print("<Fill2 공정 관련 변수>")
for col in Process_Desc_col:
    print(col)

<Fill2 공정 관련 변수>
CURE END POSITION X Collect Result_Fill2
CURE END POSITION Z Collect Result_Fill2
CURE SPEED Collect Result_Fill2
CURE STANDBY POSITION Z Collect Result_Fill2
CURE START POSITION X Collect Result_Fill2
CURE START POSITION Z Collect Result_Fill2
HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill2
HEAD NORMAL COORDINATE X AXIS(Stage1) Judge Value_Fill2
HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill2
HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill2
HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill2
HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill2
HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill2
HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill2
HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill2
HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill2
Head Purge Position Z Collect Result_Fill2
Machine Tact time Collect Result_Fill2
PalletID Collect Result_Fill2
Production Qty Collect Result_Fill2
Rec

### Cure 관련 변수 처리

In [42]:
train_data['Cure Position sub_X'] = train_data['CURE START POSITION X Collect Result_Fill2'] - train_data['CURE END POSITION X Collect Result_Fill2']
train_data['Cure Position sub_Z'] = train_data['CURE START POSITION Z Collect Result_Fill2'] - train_data['CURE END POSITION Z Collect Result_Fill2']

In [43]:
value_counts_ratio_count(train_data, 'Cure Position sub_X', 'target')


Cure Position sub_X별 target 비율 및 갯수

          AbNormal    Normal  AbNormal  Normal  Total
variable                                             
-780      0.060606  0.939394         8     124    132
 780      0.058008  0.941992      2342   38032  40374


In [44]:
value_counts_ratio_count(train_data, 'Cure Position sub_Z', 'target')


Cure Position sub_Z별 target 비율 및 갯수

          AbNormal    Normal  AbNormal   Normal  Total
variable                                              
-10       0.088235  0.911765      51.0    527.0    578
-1        0.054217  0.945783      54.0    942.0    996
 0        0.055409  0.944591    1990.0  33925.0  35915
 1        0.084549  0.915451     255.0   2761.0   3016
 11       0.000000  1.000000       0.0      1.0      1


In [45]:
# 'CURE SPEED Collect Result_Fill2' 변수와 'Cure Position sub_X' 변수를 그룹화하여 함수 적용
summary_df = summarize_grouped_data(train_data, ['CURE SPEED Collect Result_Fill2', 'Cure Position sub_X'])

Grouped by: CURE SPEED Collect Result_Fill2, Cure Position sub_X

        group  'AdNormal' count     ratio  Total
0   (40, 780)                 3  0.081081     37
1   (45, 780)               161  0.069367   2321
2  (48, -780)                 8  0.060606    132
3   (48, 780)               114  0.045455   2508
4   (50, 780)              2008  0.057914  34672
5   (51, 780)                37  0.069811    530
6   (53, 780)                 0  0.000000     36
7   (55, 780)                19  0.070632    269
8   (75, 780)                 0  0.000000      1


In [46]:
# 'CURE SPEED Collect Result_Fill2' 변수와 'Cure Position sub_Z' 변수를 그룹화하여 함수 적용
summary_df = summarize_grouped_data(train_data, ['CURE SPEED Collect Result_Fill2', 'Cure Position sub_Z'])

Grouped by: CURE SPEED Collect Result_Fill2, Cure Position sub_Z

        group  'AdNormal' count     ratio  Total
0     (40, 0)                 3  0.081081     37
1     (45, 0)               161  0.069367   2321
2     (48, 0)               122  0.046212   2640
3   (50, -10)                32  0.106312    301
4    (50, -1)                54  0.054217    996
5     (50, 0)              1667  0.054911  30358
6     (50, 1)               255  0.084549   3016
7    (50, 11)                 0  0.000000      1
8   (51, -10)                19  0.068592    277
9     (51, 0)                18  0.071146    253
10    (53, 0)                 0  0.000000     36
11    (55, 0)                19  0.070632    269
12    (75, 0)                 0  0.000000      1


In [13]:
value_counts_ratio_count(train_data, 'CURE SPEED Collect Result_Fill2', 'target')


CURE SPEED Collect Result_Fill2별 target 비율 및 갯수

          AbNormal    Normal  AbNormal   Normal  Total
variable                                              
40        0.081081  0.918919       3.0     34.0     37
45        0.069367  0.930633     161.0   2160.0   2321
48        0.046212  0.953788     122.0   2518.0   2640
50        0.057914  0.942086    2008.0  32664.0  34672
51        0.069811  0.930189      37.0    493.0    530
53        0.000000  1.000000       0.0     36.0     36
55        0.070632  0.929368      19.0    250.0    269
75        0.000000  1.000000       0.0      1.0      1


In [19]:
# 'Cure start position'와 '_Fill1'를 포함하는 열 이름 필터링
Process_Desc_col = train_data.filter(like='_Fill2').columns
filtered_columns = [col for col in Process_Desc_col if 'CURE START POSITION' in col]

# 필터링된 열 이름 출력
print("<CURE END POSITION와 _Fill2 공정 관련 변수>")
for col in filtered_columns:
    print(col)

<CURE END POSITION와 _Fill2 공정 관련 변수>
CURE START POSITION X Collect Result_Fill2
CURE START POSITION Z Collect Result_Fill2


In [20]:
value_counts_ratio_count(train_data, 'CURE START POSITION X Collect Result_Fill2', 'target')


CURE START POSITION X Collect Result_Fill2별 target 비율 및 갯수

          AbNormal    Normal  AbNormal  Normal  Total
variable                                             
240       0.060606  0.939394         8     124    132
1020      0.058008  0.941992      2342   38032  40374


In [21]:
value_counts_ratio_count(train_data, 'CURE START POSITION Z Collect Result_Fill2', 'target')


CURE START POSITION Z Collect Result_Fill2별 target 비율 및 갯수

          AbNormal    Normal  AbNormal  Normal  Total
variable                                             
22        0.072495  0.927505        34     435    469
23        0.107843  0.892157        22     182    204
32        0.085866  0.914134       421    4482   4903
33        0.053622  0.946378      1873   33057  34930


In [22]:
value_counts_ratio_count(train_data, 'CURE STANDBY POSITION Z Collect Result_Fill2', 'target')


CURE STANDBY POSITION Z Collect Result_Fill2별 target 비율 및 갯수

          AbNormal    Normal  AbNormal  Normal  Total
variable                                             
22        0.072495  0.927505        34     435    469
23        0.107843  0.892157        22     182    204
32        0.085866  0.914134       421    4482   4903
33        0.053622  0.946378      1873   33057  34930


In [14]:
# 'Cure end position'와 '_Fill1'를 포함하는 열 이름 필터링
Process_Desc_col = train_data.filter(like='_Fill2').columns
filtered_columns = [col for col in Process_Desc_col if 'CURE END POSITION' in col]

# 필터링된 열 이름 출력
print("<CURE END POSITION와 _Fill2 공정 관련 변수>")
for col in filtered_columns:
    print(col)

<CURE END POSITION와 _Fill2 공정 관련 변수>
CURE END POSITION X Collect Result_Fill2
CURE END POSITION Z Collect Result_Fill2


In [15]:
value_counts_ratio_count(train_data, 'CURE END POSITION X Collect Result_Fill2', 'target')


CURE END POSITION X Collect Result_Fill2별 target 비율 및 갯수

          AbNormal    Normal  AbNormal  Normal  Total
variable                                             
240       0.058008  0.941992      2342   38032  40374
1020      0.060606  0.939394         8     124    132


In [16]:
value_counts_ratio_count(train_data, 'CURE END POSITION Z Collect Result_Fill2', 'target')


CURE END POSITION Z Collect Result_Fill2별 target 비율 및 갯수

          AbNormal    Normal  AbNormal  Normal  Total
variable                                             
22        0.052083  0.947917         5      91     96
32        0.089215  0.910785       651    6646   7297
33        0.051158  0.948842      1694   31419  33113


In [24]:
# 'CURE SPEED Collect Result_Fill2' 변수와 'CURE START POSITION X Collect Result_Fill2' 변수를 그룹화하여 함수 적용
summary_df = summarize_grouped_data(train_data, ['CURE SPEED Collect Result_Fill2', 'CURE START POSITION X Collect Result_Fill2'])

Grouped by: CURE SPEED Collect Result_Fill2, CURE START POSITION X Collect Result_Fill2

        group  'AdNormal' count     ratio  Total
0  (40, 1020)                 3  0.081081     37
1  (45, 1020)               161  0.069367   2321
2   (48, 240)                 8  0.060606    132
3  (48, 1020)               114  0.045455   2508
4  (50, 1020)              2008  0.057914  34672
5  (51, 1020)                37  0.069811    530
6  (53, 1020)                 0  0.000000     36
7  (55, 1020)                19  0.070632    269
8  (75, 1020)                 0  0.000000      1


In [25]:
# 'CURE SPEED Collect Result_Fill2' 변수와 'CURE START POSITION X Collect Result_Fill2' 변수를 그룹화하여 함수 적용
summary_df = summarize_grouped_data(train_data, ['CURE SPEED Collect Result_Fill2', 'CURE START POSITION Z Collect Result_Fill2'])

Grouped by: CURE SPEED Collect Result_Fill2, CURE START POSITION Z Collect Result_Fill2

       group  'AdNormal' count     ratio  Total
0   (40, 33)                 3  0.081081     37
1   (45, 33)               161  0.069367   2321
2   (48, 33)               122  0.046212   2640
3   (50, 22)                15  0.078125    192
4   (50, 23)                22  0.107843    204
5   (50, 32)               384  0.087332   4397
6   (50, 33)              1587  0.053114  29879
7   (51, 22)                19  0.068592    277
8   (51, 32)                18  0.071146    253
9   (53, 33)                 0  0.000000     36
10  (55, 32)                19  0.075099    253
11  (55, 33)                 0  0.000000     16
12  (75, 33)                 0  0.000000      1


In [26]:
# 'CURE START POSITION X Collect Result_Fill2' 변수와 'CURE END POSITION X Collect Result_Fill2' 변수를 그룹화하여 함수 적용
summary_df = summarize_grouped_data(train_data, ['CURE START POSITION X Collect Result_Fill2', 'CURE END POSITION X Collect Result_Fill2'])

Grouped by: CURE START POSITION X Collect Result_Fill2, CURE END POSITION X Collect Result_Fill2

         group  'AdNormal' count     ratio  Total
0  (240, 1020)                 8  0.060606    132
1  (1020, 240)              2342  0.058008  40374


In [27]:
# 'CURE START POSITION Z Collect Result_Fill2' 변수와 'CURE END POSITION Z Collect Result_Fill2' 변수를 그룹화하여 함수 적용
summary_df = summarize_grouped_data(train_data, ['CURE START POSITION Z Collect Result_Fill2', 'CURE END POSITION Z Collect Result_Fill2'])

Grouped by: CURE START POSITION Z Collect Result_Fill2, CURE END POSITION Z Collect Result_Fill2

      group  'AdNormal' count     ratio  Total
0  (22, 22)                 5  0.052632     95
1  (22, 32)                29  0.077540    374
2  (23, 33)                22  0.107843    204
3  (32, 32)               367  0.093934   3907
4  (32, 33)                54  0.054217    996
5  (33, 22)                 0  0.000000      1
6  (33, 32)               255  0.084549   3016
7  (33, 33)              1618  0.050700  31913


### HEAD NORMAL COORDINATE 관련 변수 처리

In [28]:
# 'HEAD NORMAL COORDINATE'와 '_Fill2'를 포함하는 열 이름 필터링
Process_Desc_col = train_data.filter(like='_Fill2').columns
filtered_columns = [col for col in Process_Desc_col if 'HEAD NORMAL COORDINATE' in col]

# 필터링된 열 이름 출력
print("<HEAD NORMAL COORDINATE와 _Fill2 공정 관련 변수>")
for col in filtered_columns:
    print(col)

<HEAD NORMAL COORDINATE와 _Fill2 공정 관련 변수>
HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill2
HEAD NORMAL COORDINATE X AXIS(Stage1) Judge Value_Fill2
HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill2
HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill2
HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill2
HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill2
HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill2
HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill2
HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill2
HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill2


In [32]:
# OK 값이면 1, 결측 값이면 0을 부여
train_data['HEAD_NORMAL_COORDINATE_X_AXIS_stage1_ok'] = train_data['HEAD NORMAL COORDINATE X AXIS(Stage1) Judge Value_Fill2'].apply(lambda x: 1 if x == 'OK' else 0)
test_data['HEAD_NORMAL_COORDINATE_X_AXIS_stage1_ok'] = test_data['HEAD NORMAL COORDINATE X AXIS(Stage1) Judge Value_Fill2'].apply(lambda x: 1 if x == 'OK' else 0)

# HEAD NORMAL COORDINATE X AXIS(Stage1) Judge Value_Fill2 변수 제거
train_data.drop(columns=['HEAD NORMAL COORDINATE X AXIS(Stage1) Judge Value_Fill2'], inplace=True)
test_data.drop(columns=['HEAD NORMAL COORDINATE X AXIS(Stage1) Judge Value_Fill2'], inplace=True)

# 결과 확인
value_counts_ratio_count(train_data, 'HEAD_NORMAL_COORDINATE_X_AXIS_stage1_ok', 'target')


HEAD_NORMAL_COORDINATE_X_AXIS_stage1_ok별 target 비율 및 갯수

          AbNormal    Normal  AbNormal  Normal  Total
variable                                             
0         0.063465  0.936535      1854   27359  29213
1         0.043921  0.956079       496   10797  11293


In [33]:
# 'HEAD NORMAL COORDINATE'와 '_Fill2'를 포함하는 열 이름 필터링
Process_Desc_col = train_data.filter(like='_Fill2').columns
filtered_columns = [col for col in Process_Desc_col if 'HEAD NORMAL COORDINATE' in col]

# 'Stage1'을 포함하는 열 이름 추가 필터링
stage1_columns = [col for col in filtered_columns if 'Stage1' in col]

print("\n<HEAD NORMAL COORDINATE와 _Fill1 공정 관련 변수 중 Stage1 포함 변수>")
for col in stage1_columns:
    print(col)


<HEAD NORMAL COORDINATE와 _Fill1 공정 관련 변수 중 Stage1 포함 변수>
HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill2
HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill2
HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill2


In [34]:
train_data = pd.read_csv("C:/Users/KimDongyoung/Desktop/git_LGaimers5/Lg_aimers5/data/trim_train_data.csv")
test_data = pd.read_csv("C:/Users/KimDongyoung/Desktop/git_LGaimers5/Lg_aimers5/data/trim_test_data.csv")

In [35]:
summary_df = summarize_grouped_data(train_data, ['Equipment_Fill2','HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill2', 'HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill2', 'HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill2'])

Grouped by: Equipment_Fill2, HEAD NORMAL COORDINATE X AXIS(Stage1) Collect Result_Fill2, HEAD NORMAL COORDINATE Y AXIS(Stage1) Collect Result_Fill2, HEAD NORMAL COORDINATE Z AXIS(Stage1) Collect Result_Fill2

                                        group  'AdNormal' count     ratio  \
0   (Fill2 dispenser #1, 835.5, 428.0, 243.7)              1481  0.059178   
1  (Fill2 dispenser #2, 304.8, 1324.2, 243.5)                44  0.049107   
2  (Fill2 dispenser #2, 305.0, 1324.2, 243.5)               825  0.056569   

   Total  
0  25026  
1    896  
2  14584  


In [36]:
summary_df = summarize_grouped_data(train_data, ['Equipment_Fill2','HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill2', 'HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill2', 'HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill2'])

Grouped by: Equipment_Fill2, HEAD NORMAL COORDINATE X AXIS(Stage2) Collect Result_Fill2, HEAD NORMAL COORDINATE Y AXIS(Stage2) Collect Result_Fill2, HEAD NORMAL COORDINATE Z AXIS(Stage2) Collect Result_Fill2

                                        group  'AdNormal' count     ratio  \
0   (Fill2 dispenser #1, 458.0, 427.9, 243.7)              1481  0.059178   
1  (Fill2 dispenser #2, 499.8, 1324.2, 243.5)               869  0.056137   

   Total  
0  25026  
1  15480  


In [37]:
summary_df = summarize_grouped_data(train_data, ['Equipment_Fill2','HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill2', 'HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill2', 'HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill2'])

Grouped by: Equipment_Fill2, HEAD NORMAL COORDINATE X AXIS(Stage3) Collect Result_Fill2, HEAD NORMAL COORDINATE Y AXIS(Stage3) Collect Result_Fill2, HEAD NORMAL COORDINATE Z AXIS(Stage3) Collect Result_Fill2

                                        group  'AdNormal' count     ratio  \
0   (Fill2 dispenser #1, 156.0, 428.0, 243.7)              1481  0.059178   
1  (Fill2 dispenser #2, 692.8, 1324.2, 243.5)                44  0.049107   
2  (Fill2 dispenser #2, 694.0, 1324.2, 243.5)               825  0.056569   

   Total  
0  25026  
1    896  
2  14584  


## Purge 변수

In [38]:
value_counts_ratio_count(train_data, 'Head Purge Position Z Collect Result_Fill2', 'target')


Head Purge Position Z Collect Result_Fill2별 target 비율 및 갯수

          AbNormal    Normal  AbNormal  Normal  Total
variable                                             
85.000    0.058709  0.941291      2083   33397  35480
114.612   0.053124  0.946876       267    4759   5026
