# Parts 분리

In [21]:
import pandas as pd
import numpy as np
import pyarrow as pa
import pyarrow.parquet as pq
import matplotlib.pyplot as plt
import ast
from scipy.stats import ttest_ind
import seaborn as sns
import time
import networkx as nx
import warnings


# SettingWithCopyWarning 무시
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

# 모든 출력을 보여주도록 설정
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)


In [22]:
parquet_file = 'final_data.parquet'
# Parquet 파일 열기
table = pq.read_table(parquet_file)

# Table에서 데이터를 DataFrame으로 변환
df = table.to_pandas()
print(df.shape)

(13522573, 27)


In [23]:
# Print the column names
column_names = df.columns
print("Column Names:", column_names)

Column Names: Index(['MONTH', 'RECLAIM_PERIOD', 'SUBSIDIARY_NAME', 'CAPTIVE_CHANNEL_FLAG',
       'COMPANY_CODE', 'DIVISION_NAME', 'MODEL_CODE', 'PARTS_NO1', 'PARTS_NO2',
       'PARTS_NO3', 'PARTS_NO4', 'PARTS_NO5', 'RECEIPT_SYMPTOM', 'CENTER_CODE',
       'PREV_CENTER_CODE', 'PRODUCT_NAME', 'RECLAIM', 'WARRANTY_TYPE',
       'PREV_WARRANTY_TYPE', 'SVC_TYPE', 'PREV_SVC_TYPE', 'ENGINEER_CODE',
       'PREV_ENGINEER_CODE', 'ENGINEER_REPAIR', 'PREV_ENGINEER_SYMPTOM',
       'MULTICLAIM', 'PREV_RECEIPT_SYMPTOM'],
      dtype='object')


In [24]:
df.head(10)

Unnamed: 0,MONTH,RECLAIM_PERIOD,SUBSIDIARY_NAME,CAPTIVE_CHANNEL_FLAG,COMPANY_CODE,DIVISION_NAME,MODEL_CODE,PARTS_NO1,PARTS_NO2,PARTS_NO3,PARTS_NO4,PARTS_NO5,RECEIPT_SYMPTOM,CENTER_CODE,PREV_CENTER_CODE,PRODUCT_NAME,RECLAIM,WARRANTY_TYPE,PREV_WARRANTY_TYPE,SVC_TYPE,PREV_SVC_TYPE,ENGINEER_CODE,PREV_ENGINEER_CODE,ENGINEER_REPAIR,PREV_ENGINEER_SYMPTOM,MULTICLAIM,PREV_RECEIPT_SYMPTOM
0,1,47.0,LGEAI,Non Captive,HE,LTV,OLED65C7P,LGE_PARTS_993,LGE_PARTS_18111,LGE_PARTS_20442,,,,LGE_CEN_0,LGE_CEN_0,OLED TV,1,IW,IW,IH,IH,LGE_ENG_0,LGE_ENG_0,Part replaced-electrical (ref#),Image Retention,2,Power
1,1,10.0,LGEAI,Non Captive,HE,LTV,65UN6955ZUF,LGE_PARTS_4492,,,,,Repair Status/Issue,LGE_CEN_1,LGE_CEN_1,LED LCD TV,1,IW,IW,IH,IH,LGE_ENG_1,LGE_ENG_1,Exchange-PC Board,No indication of power and totally inoperative...,2,Audio
3,1,39.0,LGEAI,Non Captive,HE,LTV,70UP7070PUE,LGE_PARTS_9199,,,,,,LGE_CEN_3,LGE_CEN_3,LED LCD TV,1,IW,IW,IH,IH,LGE_ENG_3,LGE_ENG_3,Part replaced-electrical (ref#),No indication of power and totally inoperative...,2,Power
5,1,21.0,LGEAI,Non Captive,HE,LTV,50UN6955ZUF,LGE_PARTS_24647,LGE_PARTS_35272,,,,,LGE_CEN_3,LGE_CEN_3,LED LCD TV,1,IW,IW,IH,IH,LGE_ENG_3,LGE_ENG_3,Part replaced-electrical (ref#),No indication of power and totally inoperative...,2,Video
6,1,41.0,LGEAI,Non Captive,HE,LTV,55UN7300AUD,LGE_PARTS_5204,LGE_PARTS_21513,,,,Network,LGE_CEN_0,LGE_CEN_0,LED LCD TV,1,IW,IW,IH,IH,LGE_ENG_0,LGE_ENG_0,Part replaced-electrical (ref#),No indication of power and totally inoperative...,2,Network
7,1,69.0,LGEAI,Non Captive,HE,LTV,65UK6500AUA,LGE_PARTS_6727,LGE_PARTS_9427,,,,Video,LGE_CEN_5,LGE_CEN_3087,LED LCD TV,1,IW,IW,IH,IH,LGE_ENG_5,LGE_ENG_28700,Part replaced-electrical (ref#),No indication of power and totally inoperative...,2,Power
9,1,6.0,LGEAI,Non Captive,HE,LTV,OLED55CXPUA,LGE_PARTS_14364,,,,,Power,LGE_CEN_7,LGE_CEN_7,OLED TV,1,IW,IW,IH,IH,LGE_ENG_7,LGE_ENG_7,Part replaced-electrical (ref#),No indication of power and totally inoperative...,2,Power
10,1,22.0,LGEAI,Non Captive,HE,LTV,OLED55CXPUA,LGE_PARTS_20646,LGE_PARTS_28802,LGE_PARTS_22800,,,Power,LGE_CEN_8,LGE_CEN_811,OLED TV,1,IW,IW,IH,IH,LGE_ENG_8,LGE_ENG_1757,Part replaced-electrical (ref#),No indication of power and totally inoperative...,2,Power
11,1,35.0,LGEAI,Non Captive,HE,LTV,70UN7070PUA,LGE_PARTS_27662,LGE_PARTS_9611,,,,Power,LGE_CEN_9,LGE_CEN_9,LED LCD TV,1,IW,IW,IH,IH,LGE_ENG_9,LGE_ENG_9,Part replaced-electrical (ref#),Intermittent display,2,
12,1,44.0,LGEAI,Non Captive,HE,LTV,OLED55C1AUB,LGE_PARTS_35070,,,,,Connections,LGE_CEN_9,LGE_CEN_9,OLED TV,1,IW,IW,IH,IH,LGE_ENG_9,LGE_ENG_9,Part replaced-electrical (ref#),Intermittent display,2,Application


In [32]:
id_column = ['MONTH', 'RECLAIM_PERIOD', 'SUBSIDIARY_NAME', 'CAPTIVE_CHANNEL_FLAG',
       'COMPANY_CODE', 'DIVISION_NAME', 'MODEL_CODE', 'RECEIPT_SYMPTOM', 'CENTER_CODE',
       'PREV_CENTER_CODE', 'PRODUCT_NAME', 'RECLAIM', 'WARRANTY_TYPE',
       'PREV_WARRANTY_TYPE', 'SVC_TYPE', 'PREV_SVC_TYPE', 'ENGINEER_CODE',
       'PREV_ENGINEER_CODE', 'ENGINEER_REPAIR', 'PREV_ENGINEER_SYMPTOM',
       'MULTICLAIM', 'PREV_RECEIPT_SYMPTOM']

In [33]:
parts_column = ['PARTS_NO1', 'PARTS_NO2', 'PARTS_NO3', 'PARTS_NO4', 'PARTS_NO5']

In [34]:
# Melt the DataFrame to convert it from wide to long format
df_melted = pd.melt(df, id_vars=id_column, value_vars=parts_column, var_name='PART', value_name='VALUE')

In [35]:
# Drop rows where 'value' is None
df_melted = df_melted.dropna(subset=['VALUE'])

In [36]:
print(df_melted.shape)

(6650959, 24)


In [37]:
df_melted.head(10)

Unnamed: 0,MONTH,RECLAIM_PERIOD,SUBSIDIARY_NAME,CAPTIVE_CHANNEL_FLAG,COMPANY_CODE,DIVISION_NAME,MODEL_CODE,RECEIPT_SYMPTOM,CENTER_CODE,PREV_CENTER_CODE,PRODUCT_NAME,RECLAIM,WARRANTY_TYPE,PREV_WARRANTY_TYPE,SVC_TYPE,PREV_SVC_TYPE,ENGINEER_CODE,PREV_ENGINEER_CODE,ENGINEER_REPAIR,PREV_ENGINEER_SYMPTOM,MULTICLAIM,PREV_RECEIPT_SYMPTOM,PART,VALUE
0,1,47.0,LGEAI,Non Captive,HE,LTV,OLED65C7P,,LGE_CEN_0,LGE_CEN_0,OLED TV,1,IW,IW,IH,IH,LGE_ENG_0,LGE_ENG_0,Part replaced-electrical (ref#),Image Retention,2,Power,PARTS_NO1,LGE_PARTS_993
1,1,10.0,LGEAI,Non Captive,HE,LTV,65UN6955ZUF,Repair Status/Issue,LGE_CEN_1,LGE_CEN_1,LED LCD TV,1,IW,IW,IH,IH,LGE_ENG_1,LGE_ENG_1,Exchange-PC Board,No indication of power and totally inoperative...,2,Audio,PARTS_NO1,LGE_PARTS_4492
2,1,39.0,LGEAI,Non Captive,HE,LTV,70UP7070PUE,,LGE_CEN_3,LGE_CEN_3,LED LCD TV,1,IW,IW,IH,IH,LGE_ENG_3,LGE_ENG_3,Part replaced-electrical (ref#),No indication of power and totally inoperative...,2,Power,PARTS_NO1,LGE_PARTS_9199
3,1,21.0,LGEAI,Non Captive,HE,LTV,50UN6955ZUF,,LGE_CEN_3,LGE_CEN_3,LED LCD TV,1,IW,IW,IH,IH,LGE_ENG_3,LGE_ENG_3,Part replaced-electrical (ref#),No indication of power and totally inoperative...,2,Video,PARTS_NO1,LGE_PARTS_24647
4,1,41.0,LGEAI,Non Captive,HE,LTV,55UN7300AUD,Network,LGE_CEN_0,LGE_CEN_0,LED LCD TV,1,IW,IW,IH,IH,LGE_ENG_0,LGE_ENG_0,Part replaced-electrical (ref#),No indication of power and totally inoperative...,2,Network,PARTS_NO1,LGE_PARTS_5204
5,1,69.0,LGEAI,Non Captive,HE,LTV,65UK6500AUA,Video,LGE_CEN_5,LGE_CEN_3087,LED LCD TV,1,IW,IW,IH,IH,LGE_ENG_5,LGE_ENG_28700,Part replaced-electrical (ref#),No indication of power and totally inoperative...,2,Power,PARTS_NO1,LGE_PARTS_6727
6,1,6.0,LGEAI,Non Captive,HE,LTV,OLED55CXPUA,Power,LGE_CEN_7,LGE_CEN_7,OLED TV,1,IW,IW,IH,IH,LGE_ENG_7,LGE_ENG_7,Part replaced-electrical (ref#),No indication of power and totally inoperative...,2,Power,PARTS_NO1,LGE_PARTS_14364
7,1,22.0,LGEAI,Non Captive,HE,LTV,OLED55CXPUA,Power,LGE_CEN_8,LGE_CEN_811,OLED TV,1,IW,IW,IH,IH,LGE_ENG_8,LGE_ENG_1757,Part replaced-electrical (ref#),No indication of power and totally inoperative...,2,Power,PARTS_NO1,LGE_PARTS_20646
8,1,35.0,LGEAI,Non Captive,HE,LTV,70UN7070PUA,Power,LGE_CEN_9,LGE_CEN_9,LED LCD TV,1,IW,IW,IH,IH,LGE_ENG_9,LGE_ENG_9,Part replaced-electrical (ref#),Intermittent display,2,,PARTS_NO1,LGE_PARTS_27662
9,1,44.0,LGEAI,Non Captive,HE,LTV,OLED55C1AUB,Connections,LGE_CEN_9,LGE_CEN_9,OLED TV,1,IW,IW,IH,IH,LGE_ENG_9,LGE_ENG_9,Part replaced-electrical (ref#),Intermittent display,2,Application,PARTS_NO1,LGE_PARTS_35070


In [38]:
df_result = df_melted.drop(columns=['PART'])

In [40]:
df_result = df_result.rename(columns={'VALUE': 'PARTS_NO1'})

In [42]:
df_result.head(10)

Unnamed: 0,MONTH,RECLAIM_PERIOD,SUBSIDIARY_NAME,CAPTIVE_CHANNEL_FLAG,COMPANY_CODE,DIVISION_NAME,MODEL_CODE,RECEIPT_SYMPTOM,CENTER_CODE,PREV_CENTER_CODE,PRODUCT_NAME,RECLAIM,WARRANTY_TYPE,PREV_WARRANTY_TYPE,SVC_TYPE,PREV_SVC_TYPE,ENGINEER_CODE,PREV_ENGINEER_CODE,ENGINEER_REPAIR,PREV_ENGINEER_SYMPTOM,MULTICLAIM,PREV_RECEIPT_SYMPTOM,PARTS_NO1
0,1,47.0,LGEAI,Non Captive,HE,LTV,OLED65C7P,,LGE_CEN_0,LGE_CEN_0,OLED TV,1,IW,IW,IH,IH,LGE_ENG_0,LGE_ENG_0,Part replaced-electrical (ref#),Image Retention,2,Power,LGE_PARTS_993
1,1,10.0,LGEAI,Non Captive,HE,LTV,65UN6955ZUF,Repair Status/Issue,LGE_CEN_1,LGE_CEN_1,LED LCD TV,1,IW,IW,IH,IH,LGE_ENG_1,LGE_ENG_1,Exchange-PC Board,No indication of power and totally inoperative...,2,Audio,LGE_PARTS_4492
2,1,39.0,LGEAI,Non Captive,HE,LTV,70UP7070PUE,,LGE_CEN_3,LGE_CEN_3,LED LCD TV,1,IW,IW,IH,IH,LGE_ENG_3,LGE_ENG_3,Part replaced-electrical (ref#),No indication of power and totally inoperative...,2,Power,LGE_PARTS_9199
3,1,21.0,LGEAI,Non Captive,HE,LTV,50UN6955ZUF,,LGE_CEN_3,LGE_CEN_3,LED LCD TV,1,IW,IW,IH,IH,LGE_ENG_3,LGE_ENG_3,Part replaced-electrical (ref#),No indication of power and totally inoperative...,2,Video,LGE_PARTS_24647
4,1,41.0,LGEAI,Non Captive,HE,LTV,55UN7300AUD,Network,LGE_CEN_0,LGE_CEN_0,LED LCD TV,1,IW,IW,IH,IH,LGE_ENG_0,LGE_ENG_0,Part replaced-electrical (ref#),No indication of power and totally inoperative...,2,Network,LGE_PARTS_5204
5,1,69.0,LGEAI,Non Captive,HE,LTV,65UK6500AUA,Video,LGE_CEN_5,LGE_CEN_3087,LED LCD TV,1,IW,IW,IH,IH,LGE_ENG_5,LGE_ENG_28700,Part replaced-electrical (ref#),No indication of power and totally inoperative...,2,Power,LGE_PARTS_6727
6,1,6.0,LGEAI,Non Captive,HE,LTV,OLED55CXPUA,Power,LGE_CEN_7,LGE_CEN_7,OLED TV,1,IW,IW,IH,IH,LGE_ENG_7,LGE_ENG_7,Part replaced-electrical (ref#),No indication of power and totally inoperative...,2,Power,LGE_PARTS_14364
7,1,22.0,LGEAI,Non Captive,HE,LTV,OLED55CXPUA,Power,LGE_CEN_8,LGE_CEN_811,OLED TV,1,IW,IW,IH,IH,LGE_ENG_8,LGE_ENG_1757,Part replaced-electrical (ref#),No indication of power and totally inoperative...,2,Power,LGE_PARTS_20646
8,1,35.0,LGEAI,Non Captive,HE,LTV,70UN7070PUA,Power,LGE_CEN_9,LGE_CEN_9,LED LCD TV,1,IW,IW,IH,IH,LGE_ENG_9,LGE_ENG_9,Part replaced-electrical (ref#),Intermittent display,2,,LGE_PARTS_27662
9,1,44.0,LGEAI,Non Captive,HE,LTV,OLED55C1AUB,Connections,LGE_CEN_9,LGE_CEN_9,OLED TV,1,IW,IW,IH,IH,LGE_ENG_9,LGE_ENG_9,Part replaced-electrical (ref#),Intermittent display,2,Application,LGE_PARTS_35070


In [43]:
df_result.to_parquet('stage2_data.parquet', index=False)