In [1]:
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")

In [28]:
class MeltingData() :
    def __init__(self,data_path) :
        self.data_path = data_path

    def step1(self) :
        """load data : 데이터 불러오기"""
        try :
            self.df = pd.read_csv(self.data_path, encoding='cp949')
        except :
            self.df = pd.read_excel(self.data_path, encoding='utf8')
        return self
    
    def step2(self) :
        """filtering data : 데이터 필터링"""
        self.df_filtered = self.df[self.df['속성 그룹 코드'].isin(['01_속성명', '03_DATA'])]
        return self
    
    def step3(self) :
        """extract the common part in dataframe : 데이터프레임의 공통 속성 부분 추출"""
        self.df_common = self.df_filtered[self.df_filtered['속성 그룹 코드'].isin(['03_DATA'])]
        self.df_common = self.df_common[['SR No', '공정', 'C|C|T']]
        return self
    
    def step4(self) :
        """extract the attribute part header in dataframe : 데이터프레임에서 속성값 부분 추출"""
        self.attr_1_col_no = self.df_filtered.columns.get_loc('속성1')
        self.df_attribute_header = self.df_filtered[self.df_filtered['속성 그룹 코드'].isin(['01_속성명'])]
        self.df_attribute_header = self.df_attribute_header[['C|C|T'] + self.df_filtered.columns[self.attr_1_col_no:].to_list()]
        return self
    
    def step5(self) :
        """make dictionary for attribute heeder : 속성명과 순번에 대한 딕셔너리 생성"""
        self.header_list = []
        for i in tqdm(range(len(self.df_attribute_header))):
            self.header_list.append(self.df_attribute_header.iloc[i].to_dict())
        return self
    
    def step6(self) :
        """make attribute dataframe data : 속성값 데이터프레임 생성"""
        self.df_attrs = self.df_filtered[self.df_filtered['속성 그룹 코드'].isin(['03_DATA'])]
        self.df_attrs = self.df_attrs[['SR No'] + self.df_attrs.columns[self.attr_1_col_no:].to_list()]
        
        # melt
        self.df_attrs = pd.melt(self.df_attrs, id_vars=['SR No'], value_vars=self.df_attrs.iloc[:,1:].columns.to_list(), var_name='속성명', value_name='속성값', col_level=None, ignore_index=True)
        self.df_attrs_2 = self.df_attrs.dropna()
        self.df_attrs_2
        
        return self

    def step7(self) :
        """merge common dataframe and attribute dataframe : 공통 데이터프레임과 개별속성 데이터프레임 병합"""
        self.df_indiv = pd.merge(self.df_attrs_2, self.df_common, on='SR No', how='left')
        return self
    
    def step8(self) :
        """make dictionary : 딕셔너리 생성"""
        self.idx_list = [i for i in range(len(self.header_list))]
        self.cct_list = [self.header_list[i]['C|C|T'] for i in range(len(self.header_list))]

        self.dict_idx = dict(zip(self.cct_list, self.idx_list))
        return self
    
    def step9(self) :
        """change_attribute_name : 속성명 변경"""
        def change_attribute_name(dict_idx, value_name, cct, header_list) :
            idx = dict_idx[cct]
            dict_attribute_nm = header_list[idx]
            new_nm = dict_attribute_nm[value_name]
            return new_nm
        
        self.df_indiv['속성명'] = self.df_indiv.apply(lambda x : change_attribute_name(self.dict_idx, x['속성명'], x['C|C|T'], self.header_list), axis=1)
        return self
    
    def execute(self) :
        self.step1()
        self.step2()
        self.step3()
        self.step4()
        self.step5()
        self.step6()
        self.step7()
        self.step8()
        self.step9()
        return self.df_indiv
    
    def help(self) :
        print('step1() : load data : 데이터 불러오기') 
        print('step2() : filtering data : 데이터 필터링')
        print('step3() : extract the common part in dataframe : 데이터프레임의 공통 속성 부분 추출')
        print('step4() : extract the attribute part header in dataframe : 데이터프레임에서 속성값 부분 추출')
        print('step5() : make dictionary for attribute heeder : 속성명과 순번에 대한 딕셔너리 생성')
        print('step6() : make attribute dataframe data : 속성값 데이터프레임 생성')
        print('step7() : merge common dataframe and attribute dataframe : 공통 데이터프레임과 개별속성 데이터프레임 병합')
        print('step8() : make dictionary : 딕셔너리 생성')
        print('step9() : change_attribute_name : 속성명 변경')
        print('execute() : run all steps')

In [14]:
## step 0 : define the path of the data
data_path = os.path.join(os.getcwd(), '2101.csv')

In [29]:
melt_data = MeltingData(data_path)

In [30]:
melt_data.help()

step1() : load data : 데이터 불러오기
step2() : filtering data : 데이터 필터링
step3() : extract the common part in dataframe : 데이터프레임의 공통 속성 부분 추출
step4() : extract the attribute part header in dataframe : 데이터프레임에서 속성값 부분 추출
step5() : make dictionary for attribute heeder : 속성명과 순번에 대한 딕셔너리 생성
step6() : make attribute dataframe data : 속성값 데이터프레임 생성
step7() : merge common dataframe and attribute dataframe : 공통 데이터프레임과 개별속성 데이터프레임 병합
step8() : make dictionary : 딕셔너리 생성
step9() : change_attribute_name : 속성명 변경
execute() : run all steps


In [31]:
melt_data.execute()

100%|██████████| 142/142 [00:00<00:00, 615.69it/s]


Unnamed: 0,SR No,속성명,속성값,공정,C|C|T
0,DOF522002061,HEADER SIZE ID,3810mm /2740mm,522,FIXED EQUIPMENT|VESSEL|COLUMN
1,DOF522004113,HEADER SIZE ID,1680mm / 3048mm,522,FIXED EQUIPMENT|VESSEL|COLUMN
2,DOF522005601,HEADER SIZE ID,1220|mm,522,FIXED EQUIPMENT|VESSEL|COLUMN
3,DOF522000246,HEADER SIZE ID,1900|mm,522,FIXED EQUIPMENT|VESSEL|HORIZONTAL
4,DOF522000344,HEADER SIZE ID,2300|mm,522,FIXED EQUIPMENT|VESSEL|HORIZONTAL
...,...,...,...,...,...
53870,HH86082,OPERATING CONDITIONS PERFORMANCE DATA TUBE SID...,50|degC,522,FIXED EQUIPMENT|AIR COOLER|PLUG
53871,DOF522001061,PROCESS DESIGN CONDITIONS OPERATING CASE AVERA...,40700|kcal/.hr.m^2,522,FIXED EQUIPMENT|FIRED HEATER|CABIN/BOX TYPE
53872,DOF522003260,PROCESS DESIGN CONDITIONS OPERATING CASE AVERA...,40700|kcal/.hr.m^2,522,FIXED EQUIPMENT|FIRED HEATER|CABIN/BOX TYPE
53873,DOF522004381,PROCESS DESIGN CONDITIONS OPERATING CASE AVERA...,27100|kcal/.hr.m^2,522,FIXED EQUIPMENT|FIRED HEATER|CABIN/BOX TYPE


In [3]:
## step 1 : load data
df = pd.read_csv(data_path, encoding='cp949')
#df

In [4]:
## step 2 : filtering data
df_filtered = df[df['속성 그룹 코드'].isin(['01_속성명', '03_DATA'])]

## step 3 : extract the common part in dataframe
df_common = df_filtered[df_filtered['속성 그룹 코드'].isin(['03_DATA'])]
df_common = df_common[['SR No', '공정', 'C|C|T']]
# df_common

In [5]:
## step 4 : extract the attribute part header in dataframe
attr_1_col_no = df_filtered.columns.get_loc('속성1')
df_attribute_header = df_filtered[df_filtered['속성 그룹 코드'].isin(['01_속성명'])]
df_attribute_header = df_attribute_header[['C|C|T'] + df_filtered.columns[attr_1_col_no:].to_list()]
# df_attribute_header

## step 5: make dictionary for attribute heeder
header_list = []
for i in tqdm(range(len(df_attribute_header))):
    header_list.append(df_attribute_header.iloc[i].to_dict())

100%|██████████| 142/142 [00:00<00:00, 606.50it/s]


In [6]:
## setp 6: make attribute dataframe data

df_attrs = df_filtered[df_filtered['속성 그룹 코드'].isin(['03_DATA'])]
df_attrs = df_attrs[['SR No'] + df_attrs.columns[attr_1_col_no:].to_list()]
# df_attrs

# melt
df_attrs = pd.melt(df_attrs, id_vars=['SR No'], value_vars=df_attrs.iloc[:,1:].columns.to_list(), var_name='속성명', value_name='속성값', col_level=None, ignore_index=True)
df_attrs_2 = df_attrs.dropna()
df_attrs_2

Unnamed: 0,SR No,속성명,속성값
0,DOF522002061,속성1,3810mm /2740mm
1,DOF522004113,속성1,1680mm / 3048mm
2,DOF522005601,속성1,1220|mm
3,DOF522000246,속성1,1900|mm
4,DOF522000344,속성1,2300|mm
...,...,...,...
345801,HH86082,속성55,50|degC
345802,DOF522001061,속성55,40700|kcal/.hr.m^2
345803,DOF522003260,속성55,40700|kcal/.hr.m^2
345804,DOF522004381,속성55,27100|kcal/.hr.m^2


In [7]:
## step 7 : merge common dataframe and attribute dataframe

df_indiv = pd.merge(df_attrs_2, df_common, on='SR No', how='left')
df_indiv

Unnamed: 0,SR No,속성명,속성값,공정,C|C|T
0,DOF522002061,속성1,3810mm /2740mm,522,FIXED EQUIPMENT|VESSEL|COLUMN
1,DOF522004113,속성1,1680mm / 3048mm,522,FIXED EQUIPMENT|VESSEL|COLUMN
2,DOF522005601,속성1,1220|mm,522,FIXED EQUIPMENT|VESSEL|COLUMN
3,DOF522000246,속성1,1900|mm,522,FIXED EQUIPMENT|VESSEL|HORIZONTAL
4,DOF522000344,속성1,2300|mm,522,FIXED EQUIPMENT|VESSEL|HORIZONTAL
...,...,...,...,...,...
53870,HH86082,속성55,50|degC,522,FIXED EQUIPMENT|AIR COOLER|PLUG
53871,DOF522001061,속성55,40700|kcal/.hr.m^2,522,FIXED EQUIPMENT|FIRED HEATER|CABIN/BOX TYPE
53872,DOF522003260,속성55,40700|kcal/.hr.m^2,522,FIXED EQUIPMENT|FIRED HEATER|CABIN/BOX TYPE
53873,DOF522004381,속성55,27100|kcal/.hr.m^2,522,FIXED EQUIPMENT|FIRED HEATER|CABIN/BOX TYPE


In [8]:
## step 8 : make dictionary
idx_list = [i for i in range(len(header_list))]
cct_list = [header_list[i]['C|C|T'] for i in range(len(header_list))]

dict_idx = dict(zip(cct_list, idx_list))
# print(dict_idx)

In [9]:
print(header_list[0])

{'C|C|T': 'FIXED EQUIPMENT|VESSEL|COLUMN', '속성1': 'HEADER SIZE ID', '속성2': 'HEADER LENGTH', '속성3': 'HEADER SCOPE OF INTERNAL', '속성4': 'OPERATING CONDITION LOCATION TRAYNUMBER TOP', '속성5': 'OPERATING CONDITION LOCATION TRAYNUMBER INTER', '속성6': 'OPERATING CONDITION LOCATION TRAYNUMBER BOTTOM', '속성7': 'OPERATING CONDITION LOCATION OPERATING PRESSURE TOP NORMAL', '속성8': 'OPERATING CONDITION LOCATION OPERATING PRESSURE INTER NORMAL', '속성9': 'OPERATING CONDITION LOCATION OPERATING PRESSURE BOTTOM NORMAL', '속성10': 'OPERATING CONDITION LOCATION OPERATING PRESSURE TOP MAXIMUM', '속성11': 'OPERATING CONDITION LOCATION OPERATING PRESSURE INTER MAXIMUM', '속성12': 'OPERATING CONDITION LOCATION OPERATING PRESSURE BOTTOM MAXIMUM', '속성13': 'OPERATING CONDITION LOCATION OPERATING PRESSURE TOP MINIMUM', '속성14': 'OPERATING CONDITION LOCATION OPERATING PRESSURE INTER MINIMUM', '속성15': 'OPERATING CONDITION LOCATION OPERATING PRESSURE BOTTOM MINIMUM', '속성16': 'OPERATING CONDITION LOCATION OPERATING TEMPERATUR

In [10]:
def change_attribute_name(value_name, cct, header_list) :
    idx = dict_idx[cct]
    dict_attribute_nm = header_list[idx]
    new_nm = dict_attribute_nm[value_name]
    return new_nm

In [11]:
df_indiv['속성명'] = df_indiv.apply(lambda x : change_attribute_name(x['속성명'], x['C|C|T'], header_list), axis=1)
df_indiv

Unnamed: 0,SR No,속성명,속성값,공정,C|C|T
0,DOF522002061,HEADER SIZE ID,3810mm /2740mm,522,FIXED EQUIPMENT|VESSEL|COLUMN
1,DOF522004113,HEADER SIZE ID,1680mm / 3048mm,522,FIXED EQUIPMENT|VESSEL|COLUMN
2,DOF522005601,HEADER SIZE ID,1220|mm,522,FIXED EQUIPMENT|VESSEL|COLUMN
3,DOF522000246,HEADER SIZE ID,1900|mm,522,FIXED EQUIPMENT|VESSEL|HORIZONTAL
4,DOF522000344,HEADER SIZE ID,2300|mm,522,FIXED EQUIPMENT|VESSEL|HORIZONTAL
...,...,...,...,...,...
53870,HH86082,OPERATING CONDITIONS PERFORMANCE DATA TUBE SID...,50|degC,522,FIXED EQUIPMENT|AIR COOLER|PLUG
53871,DOF522001061,PROCESS DESIGN CONDITIONS OPERATING CASE AVERA...,40700|kcal/.hr.m^2,522,FIXED EQUIPMENT|FIRED HEATER|CABIN/BOX TYPE
53872,DOF522003260,PROCESS DESIGN CONDITIONS OPERATING CASE AVERA...,40700|kcal/.hr.m^2,522,FIXED EQUIPMENT|FIRED HEATER|CABIN/BOX TYPE
53873,DOF522004381,PROCESS DESIGN CONDITIONS OPERATING CASE AVERA...,27100|kcal/.hr.m^2,522,FIXED EQUIPMENT|FIRED HEATER|CABIN/BOX TYPE


In [12]:
df_indiv

Unnamed: 0,SR No,속성명,속성값,공정,C|C|T
0,DOF522002061,HEADER SIZE ID,3810mm /2740mm,522,FIXED EQUIPMENT|VESSEL|COLUMN
1,DOF522004113,HEADER SIZE ID,1680mm / 3048mm,522,FIXED EQUIPMENT|VESSEL|COLUMN
2,DOF522005601,HEADER SIZE ID,1220|mm,522,FIXED EQUIPMENT|VESSEL|COLUMN
3,DOF522000246,HEADER SIZE ID,1900|mm,522,FIXED EQUIPMENT|VESSEL|HORIZONTAL
4,DOF522000344,HEADER SIZE ID,2300|mm,522,FIXED EQUIPMENT|VESSEL|HORIZONTAL
...,...,...,...,...,...
53870,HH86082,OPERATING CONDITIONS PERFORMANCE DATA TUBE SID...,50|degC,522,FIXED EQUIPMENT|AIR COOLER|PLUG
53871,DOF522001061,PROCESS DESIGN CONDITIONS OPERATING CASE AVERA...,40700|kcal/.hr.m^2,522,FIXED EQUIPMENT|FIRED HEATER|CABIN/BOX TYPE
53872,DOF522003260,PROCESS DESIGN CONDITIONS OPERATING CASE AVERA...,40700|kcal/.hr.m^2,522,FIXED EQUIPMENT|FIRED HEATER|CABIN/BOX TYPE
53873,DOF522004381,PROCESS DESIGN CONDITIONS OPERATING CASE AVERA...,27100|kcal/.hr.m^2,522,FIXED EQUIPMENT|FIRED HEATER|CABIN/BOX TYPE
