1. ocr 작업(필요시)을 통한 텍스트 인식<br>
2. 텍스트 및 표 추출
3. <br>
    1. 텍스트를 문장 또는 청크 단위로 분할<br>
    2. 맥락 유지를 위한 청크 중복<br>
    3. 불필요한 정보(해더, 푸터) 삭제<br>
4. <br>
    1. 표 처리. 별도 형식으로 저장(csv, json)<br>
    2. 표 처리. 표와 문장을 결합하여 청크로 저장<br>
5. 최종 청크 생성 : json 형태로 저장

In [1]:
import os
import json
import re

import numpy as np
import pandas as pd
from transformers import GPT2Tokenizer
from PyPDF2 import PdfReader
import pdfplumber

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
pdf_folder_path = os.path.join(os.getcwd(), 'pdf')
pdf_files = os.listdir(pdf_folder_path)
pdf_files

['61105-60-01-PP-501002_UPDATED PMS FOR PLANT AREA_241220_2_우림.pdf',
 'SGC-3100-LEZ-002 PIPING MATERIAL SPECIFICATION_Rev.B_수정중_이테크.pdf']

In [3]:
def get_pdf_text(pdf_path):
    with pdfplumber.open(pdf_path) as pdf:
        text = ''
        for page in pdf.pages:
            text += page.extract_text()
    return text

In [4]:
def get_df_from_nested_list(nested_list, header_row=0, data_row=1):
    headers = nested_list[header_row]
    rows = nested_list[data_row:]
    df = pd.DataFrame(rows, columns=headers)
    return df

In [5]:
def get_pdf_table(pdf_path) :
    with pdfplumber.open(pdf_path) as pdf:
        tables = []
        for page in pdf.pages:
            tables += page.extract_tables()
    return tables

In [6]:
# 이태크로 테스트
pdf_path = pdf_files[1]

#### **텍스트 추출**

In [7]:
# 1. 텍스트 추출
pdf_text = get_pdf_text(os.path.join(pdf_folder_path, pdf_path))

In [8]:
# 2. 테이블 추출
pdf_tables = get_pdf_table(os.path.join(pdf_folder_path, pdf_path))

In [9]:
# 2.1 테이블을 데이터프레임으로 변환
pdf_dfs = []
for i in range(len(pdf_tables)):
    try:
        df = get_df_from_nested_list(pdf_tables[i])
        pdf_dfs.append(df)
    except:
        print('테이블 추출 실패', i)

In [10]:
print(len(pdf_dfs))

97


#### **텍스트 전처리**

In [11]:
pdf_lines = pdf_text.split('\n')

In [12]:
pdf_lines

['Job No. MSPP',
 'PIPING MATERIAL',
 'Doc. No. SGC-3100-LEZ-002',
 'SPECIFICATION Rev. No. B',
 'Date 2024. 12. 16',
 'PIPING MATERIAL SPECIFICATION',
 'Discipline : Piping Team',
 'Project Title : Moorim Steam Piping Project',
 'Location : Ulsan, Korea',
 'Client : MOORIM P&P',
 'B 2024.12.16 Issue For Approval J.G.KIM G.T.LEE S.M.PARK K.C.ROH Y.J.HWANG',
 'A 2024.11.28 Issue For Approval J.G.KIM G.T.LEE S.M.PARK K.C.ROH Y.J.HWANG',
 'Rev.',
 'Date Description Prep’d Revw’d Chec’d Chec’d Appr’d Remark',
 'No.',
 '1 / 46Job No. MSPP',
 'PIPING MATERIAL',
 'Doc. No. SGC-3100-LEZ-002',
 'SPECIFICATION Rev. No. B',
 'Date 2024. 12. 16',
 'REVISION LOG',
 'REV. NO. REV. DATE REVISED PAGE REVISION DESCRIPTION',
 '- Minimum Wall Thickness 기준 변경(#3RB 기준 준수)',
 'B 2024.12.16 6 DN 50 and Smaller : Sch.40 → Sch.80',
 'DN 300 and Larger : Sch.20 → STD',
 '37 - P & ID 및 Line Condition 설계에 따른, 관련 내용 수정',
 '- Minimum Wall Thickness 기준 변경에 따른 Schedule 수정',
 '38 ~ 46',
 '- Line Condition 변경에 따른, 압력 /

In [13]:
"""header 제거
'Job No. MSPP',
'PIPING MATERIAL',
'Doc. No. SGC-3100-LEZ-002',
'SPECIFICATION Rev. No. B',
'Date 2024. 12. 16',
'PIPING MATERIAL SPECIFICATION',
"""

pdf_headers = [
    'Job No. MSPP',
    'PIPING MATERIAL',
    'Doc. No. SGC-3100-LEZ-002',
    'SPECIFICATION Rev. No. B',
    'Date 2024. 12. 16',
    'PIPING MATERIAL SPECIFICATION',]

"""footer 제거
숫자[1-2자리] / 46Job No. MSPP
"""
footer_pattern = r'\b\d{1,2} / 46Job No\. MSPP\b'

In [14]:
# header 제거
preprocessing_lines = []
for line in pdf_lines :
    if line in pdf_headers :
        pass
    else :
        preprocessing_lines.append(line)

In [15]:
# footer 제거
preprocessing_lines_2 = []
for line in preprocessing_lines :
    matches = re.findall(footer_pattern, line)
    if not matches :
        preprocessing_lines_2.append(line)

In [16]:
preprocessing_lines_2
preprocessing_lines_2 = ['PIPING MATERIAL SPECIFICATION'] + preprocessing_lines_2
preprocessing_lines_2

['PIPING MATERIAL SPECIFICATION',
 'Discipline : Piping Team',
 'Project Title : Moorim Steam Piping Project',
 'Location : Ulsan, Korea',
 'Client : MOORIM P&P',
 'B 2024.12.16 Issue For Approval J.G.KIM G.T.LEE S.M.PARK K.C.ROH Y.J.HWANG',
 'A 2024.11.28 Issue For Approval J.G.KIM G.T.LEE S.M.PARK K.C.ROH Y.J.HWANG',
 'Rev.',
 'Date Description Prep’d Revw’d Chec’d Chec’d Appr’d Remark',
 'No.',
 'REVISION LOG',
 'REV. NO. REV. DATE REVISED PAGE REVISION DESCRIPTION',
 '- Minimum Wall Thickness 기준 변경(#3RB 기준 준수)',
 'B 2024.12.16 6 DN 50 and Smaller : Sch.40 → Sch.80',
 'DN 300 and Larger : Sch.20 → STD',
 '37 - P & ID 및 Line Condition 설계에 따른, 관련 내용 수정',
 '- Minimum Wall Thickness 기준 변경에 따른 Schedule 수정',
 '38 ~ 46',
 '- Line Condition 변경에 따른, 압력 / 온도 수정',
 'TABLE OF CONTENTS',
 '1. GENERAL',
 '2. CODE & STANDARDS',
 '3. GENERAL REQUIREMENT',
 '4. CONNECTIONS',
 '5. INSTRUMENT CONNECTION DETAILS',
 '6. SAMPLE NOZZLE INSTALLATION DETAIL',
 '7. ABBREVIATIONS',
 '8. MATERIAL CLASS IDENTIF

#### **해더 테이블 제거**

In [17]:
for table in pdf_tables :
    if ['', 'PIPING MATERIAL\nSPECIFICATION', 'Job No. MSPP'] in table :
        pdf_tables.remove(table)

In [18]:
pdf_dfs = []
for i in range(len(pdf_tables)):
    try:
        df = get_df_from_nested_list(pdf_tables[i])
        pdf_dfs.append(df)
    except:
        print('테이블 추출 실패', i)

In [19]:
len(pdf_dfs)

58

#### **header를 제외한 나머지 58개 테이블에 대한 상세한 전처리**

In [20]:
preprocessing_dfs = []

In [21]:
"마지막 열이 해더로 가고 빈 값 삭제"
pdf_dfs[0]

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
0,,,,,,,,,
1,,,,,,,,,
2,B,2024.12.16,Issue For Approval,J.G.KIM,G.T.LEE,S.M.PARK,K.C.ROH,Y.J.HWANG,
3,A,2024.11.28,Issue For Approval,J.G.KIM,G.T.LEE,S.M.PARK,K.C.ROH,Y.J.HWANG,
4,Rev.\nNo.,Date,Description,Prep’d,Revw’d,Chec’d,Chec’d,Appr’d,Remark


In [22]:
table_name = 'revision approval'
df = pdf_dfs[0]
df.columns = df.iloc[-1]
df = df.drop(df.index[-1])
df = df.drop(df.index[:2])
df.reset_index(drop=True, inplace=True)
dict_name_df = {table_name: df}
preprocessing_dfs.append(dict_name_df)
df

4,Rev.\nNo.,Date,Description,Prep’d,Revw’d,Chec’d,Chec’d.1,Appr’d,Remark
0,B,2024.12.16,Issue For Approval,J.G.KIM,G.T.LEE,S.M.PARK,K.C.ROH,Y.J.HWANG,
1,A,2024.11.28,Issue For Approval,J.G.KIM,G.T.LEE,S.M.PARK,K.C.ROH,Y.J.HWANG,


In [23]:
# table name = revision history
print(pdf_dfs[1].head())
pdf_dfs[1].tail()

  REV. NO.   REV. DATE REVISED PAGE  \
0        B  2024.12.16            6   
1                                37   
2                           38 ~ 46   
3                                     
4                                     

                                REVISION DESCRIPTION  
0  - Minimum Wall Thickness 기준 변경(#3RB 기준 준수)\nDN...  
1         - P & ID 및 Line Condition 설계에 따른, 관련 내용 수정  
2  - Minimum Wall Thickness 기준 변경에 따른 Schedule 수정...  
3                                                     
4                                                     


Unnamed: 0,REV. NO.,REV. DATE,REVISED PAGE,REVISION DESCRIPTION
19,,,,
20,,,,
21,,,,
22,,,,
23,,,,


In [24]:
table_name = 'revision history'
df = pdf_dfs[1]
df.replace('', np.nan, inplace=True)
df = df.dropna(axis=0, how='all')
df
dict_name_df = {table_name: df}
preprocessing_dfs.append(dict_name_df)
print(len(preprocessing_dfs))
df

2


Unnamed: 0,REV. NO.,REV. DATE,REVISED PAGE,REVISION DESCRIPTION
0,B,2024.12.16,6,- Minimum Wall Thickness 기준 변경(#3RB 기준 준수)\nDN...
1,,,37,"- P & ID 및 Line Condition 설계에 따른, 관련 내용 수정"
2,,,38 ~ 46,- Minimum Wall Thickness 기준 변경에 따른 Schedule 수정...


In [25]:
pdf_dfs[2]

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4


In [26]:
pdf_dfs[3]

Unnamed: 0,ASME,American Society of Mechanical Engineers
0,ASME B31.1,Power piping
1,ASME B31.3,Process piping
2,ASTM,American Society of Testing and Material
3,PFI,Pipe Fabrication Institute
4,API,American Petroleum Institute
5,MSS,Manufacturers Standardization Society of the V...
6,ANSI,American National Standards Institute
7,AWWA,American Water Works Association
8,AISC,American Institute of Steel Construction
9,AWS,American Welding Society


In [27]:
table_name = "CODE & STANDARDS"
df = pdf_dfs[3]
dict_name_df = {table_name: df}
preprocessing_dfs.append(dict_name_df)
print(len(preprocessing_dfs))

3


In [28]:
pdf_dfs[4]

Unnamed: 0, Welded and seamless steel pipe,: ASME B36.10M:
0, Stainless steel pipe,: ASME B36.19M
1, Other materials,: Relevant code & Standard


In [29]:
pdf_dfs[5]

Unnamed: 0,Pipe Size,Minimum Wall Thickness,None
0,,Carbon & Alloy steel,Stainless Steel
1,DN 50 and Smaller,Sch.80,Sch.40s
2,DN 65 Through 250,Sch.40 / STD,Sch.10s
3,DN 300 and Larger,STD,Sch.10s


In [30]:
table_name = "PIPE SIZE AND WALL THICKNESS BY MATERIAL"
df = pdf_dfs[5]
new_header = ['Pipe_Size', 'Minimum_Wall_Thickness_Carbon_and_Alloy_steel', 'Minimum_Wall_Thickness_Stainless_Steel']
df.columns = new_header
df = df.drop(df.index[0])
dict_name_df = {table_name: df}
preprocessing_dfs.append(dict_name_df)
print(len(preprocessing_dfs))
df

4


Unnamed: 0,Pipe_Size,Minimum_Wall_Thickness_Carbon_and_Alloy_steel,Minimum_Wall_Thickness_Stainless_Steel
1,DN 50 and Smaller,Sch.80,Sch.40s
2,DN 65 Through 250,Sch.40 / STD,Sch.10s
3,DN 300 and Larger,STD,Sch.10s


In [31]:
pdf_dfs[6]

Unnamed: 0, DN 50 and smaller,: Socket weld/Screwed /Flanged
0, DN 65 and larger,: Butt weld / Flanged


In [32]:
pdf_dfs[6]

Unnamed: 0, DN 50 and smaller,: Socket weld/Screwed /Flanged
0, DN 65 and larger,: Butt weld / Flanged


In [33]:
pdf_dfs[7:10]

[    Factory-made wrought steel buttwelding fittings        : ASME B16.9
 0        Forged fitting socket-welding and threaded       : ASME B16.11
 1                                  Buttwelding ends       : ASME B16.25
 2                   Malleable iron threaded fitting        : ASME B16.3
 3   Wrought steel buttwelding short radius elbow...       : ASME B16.28
 4                              Non metallic fitting  : Maker’s standard,
       ASME Class 150 to 2500 (DN600 and under)            : ASME B16.5
 0         ASME Class 150 to 900 (DN650 & over)  : ASME B16.47 Series A
 1                ASME Cast Iron Class 125, 250            : ASME B16.1
 2   AWWA Class D (DN650 to 3000) for CW system             : AWWA C207,
              Spiral wound gaskets  : ASME B16.20
 0   Ring joint gasket and grooves  : ASME B16.20
 1       Non-metallic flat gaskets  : ASME B16.21]

In [34]:
pdf_dfs[10:12]

[   Square and Hex Bolts and Screws (Inch Series)  : ASME B18.2.1
 0              Square and Hex Nuts (Inch Series)  : ASME B18.2.2,
 Empty DataFrame
 Columns: [ Continuous and Double End Studs (Inch Series), : ASME B18.31.2]
 Index: []]

In [35]:
pdf_dfs[12]

Unnamed: 0,ASME\nClass,Gate,Globe,Ball,Butterfly
0,150#\n300#\n600#\n900#\n1500#\n2500#,DN300 & larger\nDN300 & larger\nDN250 & larger...,DN300 & larger\nDN300 & larger\nDN200 & larger...,DN250 & larger\nDN200 & larger\nDN200 & larger...,DN300 & larger\nDN300 & larger


In [36]:
table_name = "Valve Size and Rating for Hand-gear operation"
df_header = ['ASME_Class', 'Gate', 'Globe', 'Ball', 'Butterfly']
value_class = pdf_dfs[12]['ASME\nClass'].iloc[0].split('\n')
value_gate = pdf_dfs[12]['Gate'].iloc[0].split('\n')
value_globe = pdf_dfs[12]['Globe'].iloc[0].split('\n')
value_ball = pdf_dfs[12]['Ball'].iloc[0].split('\n')
value_butterfly = pdf_dfs[12]['Butterfly'].iloc[0].split('\n')

row_0 = [value_class[0], value_gate[0], value_globe[0], value_ball[0], value_butterfly[0]]
row_1 = [value_class[1], value_gate[1], value_globe[1], value_ball[1], value_butterfly[1]]
row_2 = [value_class[2], value_gate[2], value_globe[2], value_ball[2]]
row_3 = [value_class[3], value_gate[3], value_globe[3], value_ball[3]]
row_4 = [value_class[4], value_gate[4], value_globe[4]]
row_5 = [value_class[5], value_gate[5], value_globe[5]]

df = pd.DataFrame([row_0, row_1, row_2, row_3, row_4, row_5], columns=df_header)
dict_name_df = {table_name: df}
preprocessing_dfs.append(dict_name_df)
print(len(preprocessing_dfs))
df

5


Unnamed: 0,ASME_Class,Gate,Globe,Ball,Butterfly
0,150#,DN300 & larger,DN300 & larger,DN250 & larger,DN300 & larger
1,300#,DN300 & larger,DN300 & larger,DN200 & larger,DN300 & larger
2,600#,DN250 & larger,DN200 & larger,DN200 & larger,
3,900#,DN250 & larger,DN200 & larger,DN200 & larger,
4,1500#,DN150 & larger,DN150 & larger,,
5,2500#,DN150 & larger,DN80 & larger,,


In [37]:
pdf_dfs[13]

Unnamed: 0,Valve Class,DN 65 and Larger,DN 50 and Smaller
0,ASME 300 Class and Below,Bolted Bonnet,Bolted Bonnet
1,ASME 600 Class,Pressure Seal Type,Bolted Bonnet
2,ASME 900 Class and Higher,Pressure Seal Type,Welded Bonnet


In [38]:
table_name = "Valve Bonnet or Cover"
df = pdf_dfs[13]
dict_name_df = {table_name: df}
preprocessing_dfs.append(dict_name_df)
print(len(preprocessing_dfs))
df

6


Unnamed: 0,Valve Class,DN 65 and Larger,DN 50 and Smaller
0,ASME 300 Class and Below,Bolted Bonnet,Bolted Bonnet
1,ASME 600 Class,Pressure Seal Type,Bolted Bonnet
2,ASME 900 Class and Higher,Pressure Seal Type,Welded Bonnet


In [39]:
pdf_dfs[14]

Unnamed: 0, DN 50 and Smaller,: Solid Wedge
0, DN 65 and Larger,: Flexible Wedge


In [40]:
pdf_dfs[15:21]

[   DN 50 and Smaller  : Plug type
 0   DN 65 and Larger  : Cone type,
    DN 50 and Smaller  \
 0   DN 65 and Larger   
 
   : Lift type for ASME CL.600 and under,\nY-Lift with spring type for ASME CL.900 and above  
 0  : Swing type for ASME CL.600 and under,\nTilti...                                         ,
    Face-to-Face and End-to-End dimensions of valves  : ASME B16.10
 0                                  Buttwelding Ends  : ASME B16.25
 1          Valves-Flanged, Threaded and Welding End  : ASME B16.34
 2             Fire Test for Soft-Seated Ball Valves      : API 607,
    Threaded end pipe        : Use threaded coupling
 0     Plain end pipe  : Use socket welding coupling
 1   Beveled end pipe                     : Buttweld,
                       Grinding work with power tool
 0                                      Welding work
 1   Surface preparation with power tool (SSPC-SP 3)
 2   Painting with metallic zinc rich epoxy prime...,
                         

In [41]:
pdf_dfs[21]

Unnamed: 0,Unnamed: 1,Main Line Size,Valve Size
0,General Piping,DN50A & Smaller,DN20 (Except for DN15 Line)
1,,DN65 ~ DN250,DN25
2,,DN300 & Larger,DN25
3,Lined Piping,DN65 & Larger,DN50


In [42]:
table_name = "Vent and Drain Connection"
df = pdf_dfs[21]
df.columns = ['General/Lined', 'Main_Line_Size', 'Valve_Size']
df.loc[1, 'General/Lined'] = 'General Piping'
df.loc[2, 'General/Lined'] = 'General Piping'
dict_name_df = {table_name: df}
preprocessing_dfs.append(dict_name_df)
print(len(preprocessing_dfs))
df

7


Unnamed: 0,General/Lined,Main_Line_Size,Valve_Size
0,General Piping,DN50A & Smaller,DN20 (Except for DN15 Line)
1,General Piping,DN65 ~ DN250,DN25
2,General Piping,DN300 & Larger,DN25
3,Lined Piping,DN65 & Larger,DN50


In [43]:
pdf_dfs[22]

Unnamed: 0,1. Detail specification of valves shall be in compliance with the applicable Piping Material\nSpecification.
0,2. Double valve shall be used on all piping cl...
1,3. The design drawings will indicate where val...


In [44]:
pdf_dfs[23]

Unnamed: 0,4. Valves smaller than DN25 shall be used only where the size of main line is DN20 &\nsmaller.
0,5. Connections for lined piping shall be in ac...


In [45]:
pdf_dfs[24]

Unnamed: 0,Main Pipe Line Class,Pressure Instrument Connection,Temperature Instrument Connection
0,2500# and Higher,"DN20 Socket welding Half coupling\n(Note.3,4)",- Pipe wall thickness greater than\n19.05mm(No...
1,1500# and 900#,"DN20 Socket welding Half coupling\n(Note.3,4)",
2,600# and Lower,"DN20 Socket welding Half coupling\n(Note.3,4)","- Threaded half-coupling(Note.3,4)\n- DN80 & s..."
3,Lined Pipe,See Para 5.9 “Lined piping connection(for rubb...,


In [46]:
table_name = 'Pressure & Temperature Instrument Connection'
df = pdf_dfs[24]
dict_name_df = {table_name: df}
preprocessing_dfs.append(dict_name_df)
print(len(preprocessing_dfs))
df

8


Unnamed: 0,Main Pipe Line Class,Pressure Instrument Connection,Temperature Instrument Connection
0,2500# and Higher,"DN20 Socket welding Half coupling\n(Note.3,4)",- Pipe wall thickness greater than\n19.05mm(No...
1,1500# and 900#,"DN20 Socket welding Half coupling\n(Note.3,4)",
2,600# and Lower,"DN20 Socket welding Half coupling\n(Note.3,4)","- Threaded half-coupling(Note.3,4)\n- DN80 & s..."
3,Lined Pipe,See Para 5.9 “Lined piping connection(for rubb...,


In [47]:
pdf_dfs[25]

Unnamed: 0,Pipe material specification,None,Fitting,Thermowell,None.1
0,,,,Bar,Forging
1,Carbon\nSteel,"A53 Gr.B,\nA106 Gr.B,\nA106 Gr.C,\nA672 Gr.C60...","A105,\nA234 Gr.WPB,\nKS B1542/B1533\nPH420","A675 Gr.70,\nA479 Gr.316/316L\n(Threaded Joint...","A105,\nA182 Gr.F316/316L\n(Threaded Joint\nOnly)"
2,Low Alloy\nSteel,"A335 Gr.P11,\nA335 Gr.P22,","A182 Gr.F11,\nA182 Gr.F22",Not Applicable,"A182 Gr.F11,\nA182 Gr.F22,"
3,Stainless\nSteel,A312 Gr.TP304/304L\nA312 Gr.TP316/316L\nA358 G...,A182 Gr.F304/304L\nA182 Gr.F316/316L,A479 Gr.316/316L,"A182 Gr.F316/316L,"


In [48]:
table_name = 'Thermowell materila selection chart'
df = pdf_dfs[25]
df.columns = ['Pipe_material_specification', 'Pipe_material_specification_detail', 'Fitting', 'Thermowell_Bar', 'Thermowell_Bar_Forging']
df = df.drop(df.index[0])
dict_name_df = {table_name: df}
preprocessing_dfs.append(dict_name_df)
print(len(preprocessing_dfs))
df

9


Unnamed: 0,Pipe_material_specification,Pipe_material_specification_detail,Fitting,Thermowell_Bar,Thermowell_Bar_Forging
1,Carbon\nSteel,"A53 Gr.B,\nA106 Gr.B,\nA106 Gr.C,\nA672 Gr.C60...","A105,\nA234 Gr.WPB,\nKS B1542/B1533\nPH420","A675 Gr.70,\nA479 Gr.316/316L\n(Threaded Joint...","A105,\nA182 Gr.F316/316L\n(Threaded Joint\nOnly)"
2,Low Alloy\nSteel,"A335 Gr.P11,\nA335 Gr.P22,","A182 Gr.F11,\nA182 Gr.F22",Not Applicable,"A182 Gr.F11,\nA182 Gr.F22,"
3,Stainless\nSteel,A312 Gr.TP304/304L\nA312 Gr.TP316/316L\nA358 G...,A182 Gr.F304/304L\nA182 Gr.F316/316L,A479 Gr.316/316L,"A182 Gr.F316/316L,"


In [49]:
pdf_dfs[26]

Unnamed: 0,1. Sharp corners shall be removed before lining.
0,2. The standard shall be submitted to purchase...


In [50]:
pdf_dfs[27]

Unnamed: 0,"1. For sample nozzle detail, refer to sample nozzle data sheets."
0,2. Nozzle should be installed in lower half of...
1,3. All dimensions are in millimeters.


In [51]:
pdf_dfs[28]

Unnamed: 0,CODE,ABBREVIATION,DESCRIPTION
0,A,A/G\nANGL GLB\nAS,Above Ground\nAngle Globe Type\nAlloy Steel
1,B,BB\nBE\nBHN\nBW\nB-FLY\nBALL\nB&S,Bolted Bonnet\nBeveled Ends\nBrinelled Hardnes...
2,C,CL\nCPVC\nCS,Class\nChlorinate Polyvinyle Chloride\nCarbon ...
3,D,DWG,Drawing
4,E,EFW\nERW,Electric Fusion Welding\nElectric Resistance W...
5,F,FB\nFF\nFLG\nFLGD\nF to F\nFLX DSC\nFP\nF.V,Full Bore Type\nFull Face (Flat Face)\nFlange\...
6,G,GALV\nGR\nGR OP\nGRP\nGRE,Galvanized\nGrade\nGear Operator\nGlassfibre R...
7,H,HB\nHD\nHEX. NUT\nHF\nHOR,Brinell Hardness Number symbol per ASTM E10\n(...


In [52]:
pdf_dfs[29]

Unnamed: 0,CODE,ABBREVIATION,DESCRIPTION
0,I,ID\nISNS\nISRS\nINTM,Inside Diameter\nInside Screw and Non-Rising S...
1,L,LIFT\nLJ\nL. DSC\nLVR OP\nLR,Lift type\nLapped (Loose) Joint\nLoose Disc\nL...
2,M,MAT’L\nMAX\nMIN\nM. BOLT\nM & F\nMFR\nMJ\nMTL ST,Material\nMaximum\nMinimum\nMachine Bolt\nLarg...
3,N,NB\nNO\nNOM,Non-Bonnet\nNumber\nNominal
4,O,OD\nOSND\nOS & Y,Outside Diameter\nOutside Screw Non-Bonnet\nOu...
5,P,PSB\nPSC\nPE\nPL\nPLG DSC\nPSTN\nPPL\nPVC,Pressure Seal Bonnet\nPressure Seal Cap/Cover\...
6,R,R\nRF\nRL\nRP\nRTJ\nRTFE,Radius\nRaised Face\nRubber Lined\nReduction P...
7,S,SB,Screwed Bonnet


In [53]:
pdf_dfs[30]

Unnamed: 0,CODE,ABBREVIATION,DESCRIPTION
0,,SC\nS. BOLT\nS. CHECK\nSCR’D\nSCH\nSMLS\nSO\nS...,Screwed Cap/Cover\nStud Bolt\nStop Check Valve...
1,T,T or THK\nt & g\nT & G\nTE\nTFE\nTFE SLV\nTFE ...,"Thickness (Inch, mm)\nSmall Tongue and Groove ..."
2,U,UB\nUC\nU/G(UG),Union Bonnet\nUnion Cap/Cover\nUnder Ground
3,V,VRT,Vertical Installation
4,W,W\nWC\nWN\nWB\nW. S GATE,Welded Product\nWelded Cap/Cover\nWelded Neck\...
5,Y,Y TYPE,Y-Type / Y-Pattern


In [54]:
df1 = pdf_dfs[28]
df2 = pdf_dfs[29]
df3 = pdf_dfs[30]

table_name = 'Abbreviations for this specification'
df = pd.concat([df1, df2, df3], ignore_index=True)
df.loc[16, 'CODE'] = 'S'

new_rows = []

for _, row in df.iterrows():
    code = row['CODE']
    abbreviation = row['ABBREVIATION'].split('\n')
    description = row['DESCRIPTION'].split('\n')

    for abbr, desc in zip(abbreviation, description):
        new_rows.append({'CODE' : code, 'ABBREVIATION' : abbr, 'DESCRIPTION' : desc})
new_df = pd.DataFrame(new_rows)

dict_name_df = {table_name: new_df}
preprocessing_dfs.append(dict_name_df)
print(len(preprocessing_dfs))
new_df

10


Unnamed: 0,CODE,ABBREVIATION,DESCRIPTION
0,A,A/G,Above Ground
1,A,ANGL GLB,Angle Globe Type
2,A,AS,Alloy Steel
3,B,BB,Bolted Bonnet
4,B,BE,Beveled Ends
...,...,...,...
103,W,WC,Welded Cap/Cover
104,W,WN,Welded Neck
105,W,WB,Welded Bonnet
106,W,W. S GATE,Water Seal Gate Valve


In [55]:
pdf_dfs[31]

Unnamed: 0,① Class / Rating
0,② Material
1,③ Material Grade Sequence Number


In [56]:
pdf_dfs[32]

Unnamed: 0,2500,:,2500#,ASME B16.5
0,1500,:,1500#,ASME B16.5
1,900,:,900#,ASME B16.5
2,600,:,600#,ASME B16.5
3,300,:,300#,ASME B16.5
4,150,:,150#,ASME B16.5/ B16.47/AWWA C207


In [57]:
pdf_dfs[33]

Unnamed: 0,A,: Alloy Steel
0,C,: Carbon Steel
1,S,: Stainless Steel
2,D,: DUPLEX S.S (S32205)
3,H,: HDPE
4,F,: PVC / UPVC / CPVC
5,G,: GRP
6,N,: Carbon Steel – Galvanized
7,Q\nR\nZ,: Carbon Steel – PE Coated\n: Carbon Steel – P...


In [58]:
pdf_dfs[34].head()

Unnamed: 0,SHORT CODE,DESIGNATION
0,P,Pipe
1,,
2,NPP(3/4/6),Nipple PBE (? inch) Long
3,NTT(3/4/6),Nipple TBE (? inch) Long
4,NPT(3/4/6),Nipple TOE (? inch) Long


In [59]:
pdf_dfs[35].head()

Unnamed: 0,SHORT CODE,DESIGNATION
0,RC,Reducer Concentric.
1,RE,Reducer Eccentric.
2,C,Cap
3,FC,Full Coupling
4,HC,Half Coupling


In [60]:
pdf_dfs[36].head()

Unnamed: 0,SHORT CODE,DESIGNATION
0,FF,Flange for Flat Face Type
1,FJ,Flange with Jack Screw
2,FJ1,Flange with Jack Screw for\nHigher Rating
3,FR,Flange Reducing
4,FSB,Figure-8 Blank or Paddle\nSpacer & Blank


In [61]:
pdf_dfs[37].head()

Unnamed: 0,SHORT CODE,DESIGNATION
0,CW,Check Valve Wafer Type
1,CH,Check Valve
2,GA,Gate Valve
3,GAF,Gate Valve Flanged Ends
4,GAX,Gate Valve SW x TE


In [62]:
pdf_dfs[38].head()

Unnamed: 0,SHORT CODE,DESIGNATION
0,SRC,Strainer Cone Type(Temp.)
1,SRT,Strainer T-Type
2,SRY,Strainer Y-Type
3,SRS,Strainer Special Type
4,EXP,Expansion Joint


In [63]:
pdf_dfs[39].tail()

Unnamed: 0,SHORT CODE,DESIGNATION
10,TCR,Tube Concentric Reducer
11,TER,Tube Eccentric Reducer
12,TFC,Female Tube CONN
13,TMC,Male Tube CONN
14,TBU,Tube Union


In [64]:
df1 = pdf_dfs[35]
df2 = pdf_dfs[36]
df3 = pdf_dfs[37]
df4 = pdf_dfs[38]
df5 = pdf_dfs[39]

table_name = 'Short Code List'
df = pd.concat([df1, df2, df3, df4, df5], ignore_index=True)
dict_name_df = {table_name: df}
preprocessing_dfs.append(dict_name_df)
print(len(preprocessing_dfs))
df

11


Unnamed: 0,SHORT CODE,DESIGNATION
0,RC,Reducer Concentric.
1,RE,Reducer Eccentric.
2,C,Cap
3,FC,Full Coupling
4,HC,Half Coupling
...,...,...
99,TCR,Tube Concentric Reducer
100,TER,Tube Eccentric Reducer
101,TFC,Female Tube CONN
102,TMC,Male Tube CONN


In [65]:
pdf_dfs[40]

Unnamed: 0,EZIS\nNUR,15,F,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,...,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21
0,,20,F,F,,,,,,,...,,,,,,,,,,
1,,25,F,F,F,,,,,,...,,,,,,,,,,
2,,50,F,F,F,F,,,,,...,,,,,,,,,,
3,,65,S,S,S,*,B,,,,...,,,,,,,,,,
4,,80,S,S,S,S,B,B,,,...,,,,,,,,,,
5,,100,S,S,S,S,B,B,B,,...,,,,,,,,,,
6,,150,S,S,S,S,B,B,B,B,...,,,,,,,,,,
7,,200,S,S,S,S,W,W,B,B,...,,,,,,,,,,
8,,250,S,S,S,S,W,W,B,B,...,B,,,,,,,,,
9,,300,S,S,S,S,W,W,W,B,...,B,B,,,,,,,,


In [84]:
df = pdf_dfs[40]
table_name = 'Branch Table : Steam, Condensate, Water, Utility for 150# over'
df_transposed = df.T
df_transposed_reset = df_transposed.reset_index()
df2 = df_transposed_reset.T
df2.drop(columns=0, inplace=True)
df2.drop(columns=[19, 20], inplace=True)
df2.columns = ['Run_size', 'branch_size_15', 'branch_size_20', 'branch_size_25', 'branch_size_50', 'branch_size_65', 'branch_size_80', 'branch_size_100', 'branch_size_150', 'branch_size_200', 'branch_size_250', 'branch_size_300', 'branch_size_350', 'branch_size_400', 'branch_size_450', 'branch_size_500', 'branch_size_550', 'branch_size_600']
df2.drop(index=[17, 18], inplace=True)
df2.loc[16, 'branch_size_350'] = 'The criteria is ASME B 16.9 for TEE size range'
df2.loc[16, 'branch_size_400'] = 'The criteria is ASME B 16.9 for TEE size range'
df2.loc[16, 'branch_size_450'] = 'The criteria is ASME B 16.9 for TEE size range'
df2.loc[16, 'branch_size_500'] = 'The criteria is ASME B 16.9 for TEE size range'
df2.loc[16, 'branch_size_550'] = 'The criteria is ASME B 16.9 for TEE size range'
df2.loc[16, 'branch_size_600'] = 'The criteria is ASME B 16.9 for TEE size range'
df2.reset_index(drop=True, inplace=True)
dict_name_df = {table_name: df2}
preprocessing_dfs.append(dict_name_df)
print(len(preprocessing_dfs))
df2

12


Unnamed: 0,Run_size,branch_size_15,branch_size_20,branch_size_25,branch_size_50,branch_size_65,branch_size_80,branch_size_100,branch_size_150,branch_size_200,branch_size_250,branch_size_300,branch_size_350,branch_size_400,branch_size_450,branch_size_500,branch_size_550,branch_size_600
0,15,F,,,,,,,,,,,,,,,,
1,20,F,F,,,,,,,,,,,,,,,
2,25,F,F,F,,,,,,,,,,,,,,
3,50,F,F,F,F,,,,,,,,,,,,,
4,65,S,S,S,*,B,,,,,,,,,,,,
5,80,S,S,S,S,B,B,,,,,,,,,,,
6,100,S,S,S,S,B,B,B,,,,,,,,,,
7,150,S,S,S,S,B,B,B,B,,,,,,,,,
8,200,S,S,S,S,W,W,B,B,B,,,,,,,,
9,250,S,S,S,S,W,W,B,B,B,B,,,,,,,


In [87]:
pdf_dfs[41]

Unnamed: 0,Piping\nMaterial\nClass,Basic\nMaterial,Class\nRating,Appli-\ncable\nCode,C.A\n(mm),Line\nService,Design\nCondition,None,Service,Branch,Remark
0,,,,,,,Press\n(barg),Temp\n(℃),,,
1,150C2A,A106-B\nSMLS,150.0,ASME\nB31.1,1.6,LSC\nHSC,5.0\n9.7\n9.7\n5.0\n5.0,130.0\n130.0\n130.0\n270.0\n230.0,Condensate Tank to Pump Suction\nCondensate Re...,BR-1,
2,300C2A,A106-B\nSMLS,300.0,ASME\nB31.1,1.6,HPS,20.0\n20.0\n20.0\n20.0,370.0\n370.0\n300.0\n300.0,MP Stem (to S-Oil)\nHPS Reducing Valve Downstr...,BR-1,
3,600C2A,A106-B\nSMLS,600.0,ASME\nB31.1,1.6,HPS2\nLFW,41.0\n41.0\n80.0\n55.0,390.0\n390.0\n163.0\n163.0,TBN Extraction Steam (from #RB3)\nReducing Val...,BR-1,
4,300A2H,A335-P22\nSMLS,300.0,ASME\nB31.1,0.25,HPS\nBSC,20.0\n5.0,370.0\n310.0,HPS Reducing Valve Downstream\n(Transition Pie...,BR-1,
5,600A2H,A335-P22\nSMLS,600.0,ASME\nB31.1,0.25,HHS3\nBSC,43.2\n50.0\n5.0,413.0\n480.0\n330.0,HP Steam (to S-Oil)\nMain Steam Drain to #3RB ...,BR-1,
6,900A1A,A335-P22\nSMLS,900.0,ASME\nB31.1,0.25,HHS,75.0\n75.0,470.0\n470.0,Main Steam (from #PB1 Boiler)\nReducing Valve ...,BR-1,
7,1500A2H,A335-P22\nSMLS,1500.0,ASME\nB31.1,0.25,HHS2,101.0\n101.0\n101.0,505.0\n505.0\n505.0,Main Steam (from #RB3 Boiler)\nMain Steam Drai...,BR-1,
8,150S1A,A312-\nTP304\nSMLS,150.0,ASME\nB31.1,0.0,DMW\nINA\nLSC,5.0\n15.0\n15.0\n15.0\n15.0\n10.0\n5.0\n15.0\n...,70.0\n70.0\n70.0\n70.0\n70.0\n70.0\n70.0\n70.0...,Demi. Water Tank to Pump Suction\nDemi. Water ...,BR-1,
9,300S1A,A312-\nTP304\nSMLS,300.0,ASME\nB31.1,0.0,HPS,20.0,300.0,Samples from MP Steam,BR-1,


In [115]:
df = pdf_dfs[41]
table_name = 'Service System Index'
columns = df.columns.to_list()
new_columns = [
    'Piping_Material_Class',
    'Basic_Material',
    'Class_Rating',
    'Applicable_Code',
    'C.A_(mm)',
    'Line_Service',
    'Design_Condition_Pressure',
    'Design_Condition_Temperature',
    'Service',
    'Branch',
    'Remark'
]
df.columns = new_columns
df['Basic_Material'] = df['Basic_Material'].str.replace('\n', ' ')
df['Applicable_Code'] = df['Applicable_Code'].str.replace('\n', ' ')

## line service
new_rows = []
for _, row in df.iterrows():
    piping_material_class = row['Piping_Material_Class']
    basic_material = row['Basic_Material']
    class_rating = row['Class_Rating']
    applicable_code = row['Applicable_Code']
    ca_mm = row['C.A_(mm)']
    line_service = row['Line_Service'].split('\n')
    desing_condition_pressure = row['Design_Condition_Temperature']
    desing_condition_temperature = row['Design_Condition_Temperature']
    service = row['Service']
    branch = row['Branch']
    remark = row['Remark']

    for value in line_service :
        new_rows.append({
            'Piping_Material_Class' : piping_material_class,
            'Basic_Material' : basic_material,
            'Class_Rating' : class_rating,
            'Applicable_Code' : applicable_code,
            'C.A_(mm)' : ca_mm,
            'Line_Service' : value,
            'Design_Condition_Pressure' : desing_condition_pressure,
            'Design_Condition_Temperature' : desing_condition_temperature,
            'Service' : service,
            'Branch' : branch,
            'Remark' : remark
        })
df = pd.DataFrame(new_rows)

# design condition pressure
new_rows = []
for _, row in df.iterrows():
    piping_material_class = row['Piping_Material_Class']
    basic_material = row['Basic_Material']
    class_rating = row['Class_Rating']
    applicable_code = row['Applicable_Code']
    ca_mm = row['C.A_(mm)']
    line_service = row['Line_Service']
    design_condition_pressure = row['Design_Condition_Pressure'].split('\n')
    design_condition_temperature = row['Design_Condition_Temperature'].split('\n')
    service = row['Service'].split('\n')
    branch = row['Branch']
    remark = row['Remark']

    for pressure, temperature, serv in zip(design_condition_pressure, design_condition_temperature, service):
        new_rows.append({
            'Piping_Material_Class': piping_material_class,
            'Basic_Material': basic_material,
            'Class_Rating': class_rating,
            'Applicable_Code': applicable_code,
            'C.A_(mm)': ca_mm,
            'Line_Service': line_service,
            'Design_Condition_Pressure': pressure,
            'Design_Condition_Temperature': temperature,
            'Service': serv,
            'Branch': branch,
            'Remark': remark
        })

df = pd.DataFrame(new_rows)

dict_name_df = {table_name: df}
preprocessing_dfs.append(dict_name_df)
print(len(preprocessing_dfs))
df

13


Unnamed: 0,Piping_Material_Class,Basic_Material,Class_Rating,Applicable_Code,C.A_(mm),Line_Service,Design_Condition_Pressure,Design_Condition_Temperature,Service,Branch,Remark
0,150C2A,A106-B SMLS,150,ASME B31.1,1.6,LSC,130.0,130.0,Condensate Tank to Pump Suction,BR-1,
1,150C2A,A106-B SMLS,150,ASME B31.1,1.6,LSC,130.0,130.0,Condensate Return from S-Oil to TK,BR-1,
2,150C2A,A106-B SMLS,150,ASME B31.1,1.6,LSC,130.0,130.0,Condensate after Pump Discharge,BR-1,
3,150C2A,A106-B SMLS,150,ASME B31.1,1.6,LSC,270.0,270.0,Reducing Valve Drain after Steam Trap,BR-1,
4,150C2A,A106-B SMLS,150,ASME B31.1,1.6,LSC,230.0,230.0,(HPS 3-1),BR-1,
...,...,...,...,...,...,...,...,...,...,...,...
69,150S1A,A312- TP304 SMLS,150,ASME B31.1,0,LSC,70.0,70.0,CW Transfer Pump Discharge,BR-1,
70,150S1A,A312- TP304 SMLS,150,ASME B31.1,0,LSC,70.0,70.0,Sampling Rack Drain to Trench,BR-1,
71,150S1A,A312- TP304 SMLS,150,ASME B31.1,0,LSC,70.0,70.0,Instrument Air form #3RB INA HDR,BR-1,
72,150S1A,A312- TP304 SMLS,150,ASME B31.1,0,LSC,130.0,130.0,Instrument Air form #1PB INA HDR,BR-1,


In [118]:
pdf_dfs[42].head(10)

Unnamed: 0,PIPING CLASS – 150C2A,None,None.1,None.2,None.3,None.4,None.5,None.6,None.7,None.8,None.9,None.10,None.11
0,SERVICE,,,,,,PRESS.\n(barg),TEMP.\n(℃),,Flange Rating,ASME 150#,,
1,LSC,,,,,,5.8,103.0,,Base Material,CARBON STEEL,,
2,LSC,,,,,,9.7,130.0,,Design Code,ASME B31.1,,
3,HSC,,,,,,5.0,270.0,,Corrosion Allowance,1.6 mm,,
4,,,,,,,,,,Branch Table,BR-1,,
5,PIPE AND FITTINGS,,,,,,,,,,,,
6,ITEM,,SIZE (DN),,SCHEDULE/,,MATERIAL,,DESCRIPTION,,,NOTE,
7,,,,,RATING,,,,,,,,
8,PIPE,,15 ~ 50,SCH.80,,,A106 Gr. B,,SMLS PE,,,,
9,,,65 ~ 200,STD WT,,,A106 Gr. B,,SMLS BE,,,,


In [119]:
pdf_dfs[43].head(10)

Unnamed: 0,PIPING CLASS – 300C2A,None,None.1,None.2,None.3,None.4,None.5,None.6,None.7,None.8,None.9,None.10,None.11
0,SERVICE,,,,,,PRESS.\n(barg),TEMP.\n(℃),,Flange Rating,ASME 300#,,
1,HPS,,,,,,20.0,370,,Base Material,CARBON STEEL,,
2,HPS,,,,,,20.0,300,,Design Code,ASME B31.1,,
3,,,,,,,,,,Corrosion Allowance,1.6 mm,,
4,,,,,,,,,,Branch Table,BR-1,,
5,PIPE AND FITTINGS,,,,,,,,,,,,
6,ITEM,,SIZE (DN),,SCHEDULE/,,MATERIAL,,DESCRIPTION,,,NOTE,
7,,,,,RATING,,,,,,,,
8,PIPE,,15 ~ 50,SCH.80,,,A106 Gr. B,,SMLS PE,,,,
9,,,65 ~ 250,STD WT,,,A106 Gr. B,,SMLS BE,,,,


In [120]:
pdf_dfs[44].head(10)

Unnamed: 0,Unnamed: 1,PIPING MATERIAL\nSPECIFICATION,Job No. MSPP
0,,,Doc. No. SGC-3100-LEZ-002
1,,,Rev. No. B
2,,,Date 2024. 12. 16


In [121]:
pdf_dfs[45].head(10)

Unnamed: 0,PIPING CLASS – 600C2A,None,None.1,None.2,None.3,None.4,None.5,None.6,None.7,None.8,None.9,None.10,None.11
0,SERVICE,,,,,,PRESS.\n(barg),TEMP.\n(℃),,Flange Rating,ASME 600#,,
1,HPS2,,,,,,41.0,390.0,,Base Material,CARBON STEEL,,
2,LFW,,,,,,80.0,163.0,,Design Code,ASME B31.1,,
3,LFW,,,,,,55.0,163.0,,Corrosion Allowance,1.6 mm,,
4,,,,,,,,,,Branch Table,BR-1,,
5,PIPE AND FITTINGS,,,,,,,,,,,,
6,ITEM,,SIZE (DN),,SCHEDULE/,,MATERIAL,,DESCRIPTION,,,NOTE,
7,,,,,RATING,,,,,,,,
8,PIPE,,15 ~ 50,SCH.80,,,A106 Gr. B,,SMLS PE,,,,
9,,,65 ~ 150,STD WT,,,A106 Gr. B,,SMLS BE,,,,


In [122]:
pdf_dfs[46].head(10)

Unnamed: 0,Unnamed: 1,PIPING MATERIAL\nSPECIFICATION,Job No. MSPP
0,,,Doc. No. SGC-3100-LEZ-002
1,,,Rev. No. B
2,,,Date 2024. 12. 16


In [123]:
pdf_dfs[47].head(10)

Unnamed: 0,PIPING CLASS – 300A2H,None,None.1,None.2,None.3,None.4,None.5,None.6,None.7,None.8,None.9,None.10,None.11
0,SERVICE,,,,,,PRESS.\n(barg),TEMP.\n(℃),,Flange Rating,ASME 300#,,
1,HPS (Reducing V/V Downstream),,,,,,20.0,370.0,,Base Material,LOW ALLOY(P22),,
2,BSC (Reducing Valve Drain to Trench),,,,,,5.0,310.0,,Design Code,ASME B31.1,,
3,,,,,,,,,,Corrosion Allowance,0.25 mm,,
4,,,,,,,,,,Branch Table,BR-1,,
5,PIPE AND FITTINGS,,,,,,,,,,,,
6,ITEM,,SIZE (DN),,SCHEDULE/,,MATERIAL,,DESCRIPTION,,,NOTE,
7,,,,,RATING,,,,,,,,
8,PIPE,,15 ~ 50,SCH.80,,,SA335 Gr. P22,,SMLS PE,,,,
9,,,65 ~ 250,STD WT,,,SA335 Gr. P22,,SMLS BE,,,,


In [124]:
pdf_dfs[48].head(10)

Unnamed: 0,Unnamed: 1,PIPING MATERIAL\nSPECIFICATION,Job No. MSPP
0,,,Doc. No. SGC-3100-LEZ-002
1,,,Rev. No. B
2,,,Date 2024. 12. 16


In [125]:
pdf_dfs[49].head(10)

Unnamed: 0,PIPING CLASS – 600A2H,None,None.1,None.2,None.3,None.4,None.5,None.6,None.7,None.8,None.9,None.10,None.11
0,SERVICE,,,,,,PRESS.\n(barg),TEMP.\n(℃),,Flange Rating,ASME 600#,,
1,HHS3 (HP Steam to S-Oil),,,,,,43.2,413.0,,Base Material,LOW ALLOY(P22),,
2,BSC (Main Steam Drain),,,,,,50.0,480.0,,Design Code,ASME B31.1,,
3,BSC (Reducing Valve Drain to Trench),,,,,,5.0,330.0,,Corrosion Allowance,0.25 mm,,
4,,,,,,,,,,Branch Table,BR-1,,
5,PIPE AND FITTINGS,,,,,,,,,,,,
6,ITEM,,SIZE (DN),,SCHEDULE/,,MATERIAL,,DESCRIPTION,,,NOTE,
7,,,,,RATING,,,,,,,,
8,PIPE,,15 ~ 50,SCH.80,,,SA335 Gr. P22,,SMLS PE,,,,
9,,,65 ~ 200,STD WT,,,SA335 Gr. P22,,SMLS BE,,,,


In [126]:
pdf_dfs[50].head(10)

Unnamed: 0,Unnamed: 1,PIPING MATERIAL\nSPECIFICATION,Job No. MSPP
0,,,Doc. No. SGC-3100-LEZ-002
1,,,Rev. No. B
2,,,Date 2024. 12. 16


In [127]:
pdf_dfs[51].head(10)

Unnamed: 0,PIPING CLASS – 900A1A,None,None.1,None.2,None.3,None.4,None.5,None.6,None.7,None.8,None.9,None.10,None.11
0,SERVICE,,,,,,PRESS.\n(barg),TEMP.\n(℃),,Flange Rating,ASME 900#,,
1,HHS (Main Steam from #1PB Boiler)C,,,,,,75.0,470.0,,Base Material,LOW ALLOY(P22),,
2,,,,,,,,,,Design Code,ASME B31.1,,
3,,,,,,,,,,Corrosion Allowance,0.25 mm,,
4,,,,,,,,,,Branch Table,BR-1,,
5,PIPE AND FITTINGS,,,,,,,,,,,,
6,ITEM,,SIZE (DN),,SCHEDULE/,,MATERIAL,,DESCRIPTION,,,NOTE,
7,,,,,RATING,,,,,,,,
8,PIPE,,15 ~ 50,SCH.80,,,SA335 Gr. P22,,SMLS PE,,,,
9,,,65 ~ 100,STD WT,,,SA335 Gr. P22,,SMLS BE,,,,


In [128]:
pdf_dfs[52].head(10)

Unnamed: 0,Unnamed: 1,PIPING MATERIAL\nSPECIFICATION,Job No. MSPP
0,,,Doc. No. SGC-3100-LEZ-002
1,,,Rev. No. B
2,,,Date 2024. 12. 16


In [129]:
pdf_dfs[53].head(10)

Unnamed: 0,PIPING CLASS – 1500A2H,None,None.1,None.2,None.3,None.4,None.5,None.6,None.7,None.8,None.9,None.10,None.11
0,SERVICE,,,,,,PRESS.\n(barg),TEMP.\n(℃),,Flange Rating,ASME 1500#,,
1,HHS2 (Main Steam from #RB3 Boiler),,,,,,101.0,505.0,,Base Material,LOW ALLOY(P22),,
2,,,,,,,,,,Design Code,ASME B31.1,,
3,,,,,,,,,,Corrosion Allowance,0.25 mm,,
4,,,,,,,,,,Branch Table,BR-1,,
5,PIPE AND FITTINGS,,,,,,,,,,,,
6,ITEM,,SIZE (DN),,SCHEDULE/,,MATERIAL,,DESCRIPTION,,,NOTE,
7,,,,,RATING,,,,,,,,
8,PIPE,,15 ~ 50,SCH.80,,,SA335 Gr. P22,,SMLS PE,,,,
9,,,65 ~ 100,SCH.80,,,SA335 Gr. P22,,SMLS BE,,,,


In [130]:
pdf_dfs[54].head(10)

Unnamed: 0,Unnamed: 1,PIPING MATERIAL\nSPECIFICATION,Job No. MSPP
0,,,Doc. No. SGC-3100-LEZ-002
1,,,Rev. No. B
2,,,Date 2024. 12. 16


In [131]:
pdf_dfs[55].head(10)

Unnamed: 0,PIPING CLASS – 150S1A,None,None.1,None.2,None.3,None.4,None.5,None.6,None.7,None.8,None.9,None.10,None.11,None.12
0,SERVICE,,,,,,,,PRESS. (barg),TEMP.\n(℃),Flange Rating,ASME 150#,,
1,DMW,,,,,,,,15.0,70.0,Base Material,STAINLESS,,
2,INA,,,,,,,,9.7,70.0,Design Code,ASME B31.1,,
3,LSC,,,,,,,,9.7,130.0,Corrosion,0.0 mm,,
4,,,,,,,,,,,Branch Table,BR-1,,
5,PIPE AND FITTINGS,,,,,,,,,,,,,
6,ITEM,,SIZE (DN),,,,SCHEDULE/,,MATERIAL,DESCRIPTION,,,NOTE,
7,,,,,,,RATING,,,,,,,
8,PIPE,,10 ~ 50,,,SCH.40S,,,A312-TP304,SMLS PE,,,,
9,,,65 ~ 150,,,SCH.40S,,,A312-TP304,WLD BE,,,,


In [132]:
pdf_dfs[56].head(10)

Unnamed: 0,Unnamed: 1,PIPING MATERIAL\nSPECIFICATION,Job No. MSPP
0,,,Doc. No. SGC-3100-LEZ-002
1,,,Rev. No. B
2,,,Date 2024. 12. 16


In [134]:
pdf_dfs[57].head(10)

Unnamed: 0,PIPING CLASS – 300S1A,None,None.1,None.2,None.3,None.4,None.5,None.6,None.7,None.8,None.9,None.10,None.11,None.12
0,SERVICE,,,,,,,,PRESS. (barg),TEMP.\n(℃),Flange Rating,ASME 300#,,
1,HPS (MP Steam Sampling),,,,,,,,20.4,300.0,Base Material,STAINLESS,,
2,,,,,,,,,,,Design Code,ASME B31.1,,
3,,,,,,,,,,,Corrosion,0.0 mm,,
4,,,,,,,,,,,Branch Table,BR-1,,
5,PIPE AND FITTINGS,,,,,,,,,,,,,
6,ITEM,,SIZE (DN),,,,SCHEDULE/,,MATERIAL,DESCRIPTION,,,NOTE,
7,,,,,,,RATING,,,,,,,
8,PIPE,,15 ~ 50,,,SCH.40S,,,A312-TP304or304L,SMLS PE,,,,
9,,,,,,,,,,,,,,
