In [1]:
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")

In [40]:
class MeltingData() :
    def __init__(self,data_path) :
        self.data_path = data_path

    def step1(self) :
        """load data : 데이터 불러오기"""
        try :
            self.df = pd.read_csv(self.data_path, encoding='cp949')
        except :
            self.df = pd.read_csv(self.data_path, encoding='utf8')
        return self
    
    def step2(self) :
        """filtering data : 데이터 필터링"""
        self.df_filtered = self.df[self.df['속성 그룹 코드'].isin(['03_DATA'])]
        return self
    
    def step3(self, col_list=None, key=None) :
        """extract the common part in dataframe : 데이터프레임의 공통 속성 부분 추출 : 기본값은 'SR No', '공정', 'C|C|T'"""
        if col_list is None :
            self.df_common = self.df_filtered[self.df_filtered['속성 그룹 코드'].isin(['03_DATA'])]
            self.df_common = self.df_common[['SR No', '공정', 'C|C|T']]
            self.df_common.drop_duplicates(subset=['SR No'], keep='first', inplace=True)
        else :
            self.df_common = self.df_filtered[self.df_filtered['속성 그룹 코드'].isin(['03_DATA'])]
            self.df_common = self.df_common[col_list]
            self.df_common.drop_duplicates(subset=[key], keep='first', inplace=True)
        
        return self
    
    # def step4(self) :
    #     """extract the attribute part header in dataframe : 데이터프레임에서 속성값 부분 추출"""
    #     self.attr_1_col_no = self.df_filtered.columns.get_loc('속성1')
    #     self.df_attribute_header = self.df_filtered[self.df_filtered['속성 그룹 코드'].isin(['01_속성명'])]
    #     self.df_attribute_header = self.df_attribute_header[['C|C|T'] + self.df_filtered.columns[self.attr_1_col_no:].to_list()]
    #     return self
    
    # def step5(self) :
    #     """make dictionary for attribute heeder : 속성명과 순번에 대한 딕셔너리 생성"""
    #     self.header_list = []
    #     for i in tqdm(range(len(self.df_attribute_header))):
    #         self.header_list.append(self.df_attribute_header.iloc[i].to_dict())
    #     return self
    
    def step4(self) :
        """make attribute dataframe data : 속성값 데이터프레임 생성"""
        self.attr_1_col_no = self.df_filtered.columns.get_loc('속성1')
        self.df_attrs = self.df_filtered[self.df_filtered['속성 그룹 코드'].isin(['03_DATA'])]
        self.df_attrs = self.df_attrs[['SR No'] + self.df_attrs.columns[self.attr_1_col_no:].to_list()]
        
        # melt
        self.df_attrs = pd.melt(self.df_attrs, id_vars=['SR No'], value_vars=self.df_attrs.iloc[:,1:].columns.to_list(), var_name='속성순번', value_name='속성값', col_level=None, ignore_index=True)
        self.df_attrs_2 = self.df_attrs.dropna()
        self.df_attrs_2
        
        return self

    def step5(self) :
        """merge common dataframe and attribute dataframe : 공통 데이터프레임과 개별속성 데이터프레임 병합"""
        self.df_indiv = pd.merge(self.df_attrs_2, self.df_common, on='SR No', how='left')
        return self
    
    def step6(self) :
        """make dictionary : 딕셔너리 생성"""
        self.idx_list = [i for i in range(len(self.header_list))]
        self.cct_list = [self.header_list[i]['C|C|T'] for i in range(len(self.header_list))]

        self.dict_idx = dict(zip(self.cct_list, self.idx_list))
        return self
    
    def step7(self) :
        """change_attribute_name : 속성명 변경"""
        def change_attribute_name(dict_idx, value_name, cct, header_list) :
            idx = dict_idx[cct]
            dict_attribute_nm = header_list[idx]
            new_nm = dict_attribute_nm[value_name]
            return new_nm
        
        self.df_indiv['속성명'] = self.df_indiv.apply(lambda x : change_attribute_name(self.dict_idx, x['속성명'], x['C|C|T'], self.header_list), axis=1)
        return self
    
    def execute(self) :
        self.step1()
        self.step2()
        self.step3()
        self.step4()
        self.step5()
        self.step6()
        self.step7()
        # self.step8()
        # self.step9()
        return self.df_indiv
    
    def help(self) :
        print('step1() : load data : 데이터 불러오기') 
        print('step2() : filtering data : 데이터 필터링')
        print('step3() : extract the common part in dataframe : 데이터프레임의 공통 속성 부분 추출')
        # print('step4() : extract the attribute part header in dataframe : 데이터프레임에서 속성값 부분 추출')
        # print('step5() : make dictionary for attribute heeder : 속성명과 순번에 대한 딕셔너리 생성')
        print('step4() : make attribute dataframe data : 속성값 데이터프레임 생성')
        print('step5() : merge common dataframe and attribute dataframe : 공통 데이터프레임과 개별속성 데이터프레임 병합')
        print('step6() : make dictionary : 딕셔너리 생성')
        print('step7() : change_attribute_name : 속성명 변경')
        print('execute() : run all steps')

    def show_attributes(self):
        # 인스턴스 속성
        instance_attributes = self.__dict__
        print("Instance attributes:")
        for attr, value in instance_attributes.items():
            print(f"{attr}")

In [41]:
## step 0 : define the path of the data
data_path = "C:\\Users\\ASUS\\Documents\\00_GS_DT\\Data_Insight\\Support Process Management\\test_file"
data_path = os.path.join(data_path, '2101.csv')

In [42]:
melting_data = MeltingData(data_path)

In [43]:
melting_data.help()

step1() : load data : 데이터 불러오기
step2() : filtering data : 데이터 필터링
step3() : extract the common part in dataframe : 데이터프레임의 공통 속성 부분 추출
step4() : make attribute dataframe data : 속성값 데이터프레임 생성
step5() : merge common dataframe and attribute dataframe : 공통 데이터프레임과 개별속성 데이터프레임 병합
step6() : make dictionary : 딕셔너리 생성
step7() : change_attribute_name : 속성명 변경
execute() : run all steps


In [46]:
melting_data.step1()
melting_data.step2()
melting_data.step3()
melting_data.step4()
melting_data.step5()
melting_data.show_attributes()

Instance attributes:
data_path
df
df_filtered
df_common
attr_1_col_no
df_attrs
df_attrs_2
df_indiv


In [47]:
df_indiv = melting_data.df_indiv
df_indiv

Unnamed: 0,SR No,속성순번,속성값,공정,C|C|T
0,DOF522002061,속성1,3810mm /2740mm,522,FIXED EQUIPMENT|VESSEL|COLUMN
1,DOF522004113,속성1,1680mm / 3048mm,522,FIXED EQUIPMENT|VESSEL|COLUMN
2,DOF522005601,속성1,1220|mm,522,FIXED EQUIPMENT|VESSEL|COLUMN
3,DOF522000246,속성1,1900|mm,522,FIXED EQUIPMENT|VESSEL|HORIZONTAL
4,DOF522000344,속성1,2300|mm,522,FIXED EQUIPMENT|VESSEL|HORIZONTAL
...,...,...,...,...,...
53870,HH86082,속성55,50|degC,522,FIXED EQUIPMENT|AIR COOLER|PLUG
53871,DOF522001061,속성55,40700|kcal/.hr.m^2,522,FIXED EQUIPMENT|FIRED HEATER|CABIN/BOX TYPE
53872,DOF522003260,속성55,40700|kcal/.hr.m^2,522,FIXED EQUIPMENT|FIRED HEATER|CABIN/BOX TYPE
53873,DOF522004381,속성55,27100|kcal/.hr.m^2,522,FIXED EQUIPMENT|FIRED HEATER|CABIN/BOX TYPE
