# USPTO Patent Dataset
* 가상환경 sait
* Target subclass: H01L
* Target period: 2020~2022

In [1]:
import pandas as pd
import glob, os
import pickle
from tqdm import tqdm
from nltk import sent_tokenize
tqdm.pandas()

In [2]:
data_filepaths = glob.glob(os.path.join('Data', '*'))
data_filepaths

['Data/H01L_2020-2022_9585.json']

## Preprocessing

In [3]:
dfs = []
for filepath in data_filepaths:
    one_df = pd.read_json(filepath)
    one_df['year'] = os.path.basename(filepath)[:4]
    dfs.append(one_df)
df = pd.concat(dfs)
# df.drop(columns=['is_target', 'authors'], inplace=True) # author information in unavailable.
# df.reset_index(inplace=True)
df

Unnamed: 0,index,publication_title,publication_number,publication_date,application_type,sections,section_classes,section_class_subclasses,section_class_subclass_groups,abstract,descriptions,claims,year
0,0,Wafer polishing system,US10525568,20200107,utility,"[B, H]","[B24, H01]","[B24B, H01L]","[B24B 37/04, B24B 37/34, B24B 57/02, H01L 21/306]",The wafer polishing system is disclosed. The w...,CROSS REFERENCE TO RELATED APPLICATION\nThis a...,1. A wafer polishing system comprising:\na pol...,H01L
1,1,Slurry composition for CMP and polishing metho...,US10526508,20200107,utility,"[C, H]","[C09, H01]","[C09G, H01L]","[C09G 1/02, H01L 21/304, H01L 21/321, H01L 21/...",Provided are a slurry composition for CMP and ...,TECHNICAL FIELD\nThe present invention relates...,1. A slurry composition for chemical-mechanica...,H01L
2,2,Methods for controlling the substrate temperat...,US10526705,20200107,utility,"[C, H]","[C23, H01]","[C23C, H01L]","[C23C 16/46, C23C 16/458, H01L 21/687, H01L 21...","In a CVD reactor, flushing gases of different ...",RELATED APPLICATIONS\nThis application is a Di...,1. A method for depositing a plurality of laye...,H01L
3,3,In-situ metrology method for thickness measure...,US10527407,20200107,utility,"[G, H, C]","[G01, H01, C23]","[G01B, H01L, C23C, H01J]","[G01B 11/06, H01L 21/66, G01B 7/06, C23C 16/52...",Embodiments of the present disclosure relate t...,CLAIM OF PRIORITY UNDER 35 U.S.C. 119\nThis ap...,"1. A processing chamber, comprising:\na substr...",H01L
4,4,Semiconductor inspection device,US10527648,20200107,utility,"[G, H]","[G01, H01]","[G01R, H01L]","[G01R 1/073, G01R 1/067, H01L 21/67]","According to the present invention, a semicond...",BACKGROUND OF THE INVENTION\nField\nThe presen...,1. A semiconductor inspection device comprisin...,H01L
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9580,2571,Method for manufacturing semiconductor device,US11538921,20221227,utility,[H],[H01],[H01L],"[H01L 21/336, H01L 29/40, H01L 29/66, H01L 21/...","A source electrode (5), a drain electrode (6) ...",FIELD\nThe present invention relates to a meth...,1. A method for manufacturing a semiconductor ...,H01L
9581,2572,Method for etching back hard mask layer on top...,US11538923,20221227,utility,[H],[H01],[H01L],"[H01L 29/66, H01L 21/02, H01L 21/033, H01L 21/...",A method for etching back a hard mask layer on...,CROSS-REFERENCES TO RELATED APPLICATIONS\nThis...,1. A method for semiconductor process for maki...,H01L
9582,2573,Schottky diode integrated into superjunction p...,US11538933,20221227,utility,[H],[H01],[H01L],"[H01L 29/78, H01L 21/266, H01L 21/28, H01L 21/...",A trench metal-oxide-semiconductor field-effec...,CLAIM OF PRIORITY\nThis application is a conti...,1. A trench metal-oxide-semiconductor field-ef...,H01L
9583,2574,Methods and apparatus to control zone temperat...,US11538956,20221227,utility,"[H, F]","[H01, H05, F27]","[H01L, H05B, F27B]","[H01L 31/18, H05B 6/06, H01L 21/67, F27B 17/00]",Methods and apparatus to control zone temperat...,BACKGROUND\nThis disclosure relates to solar c...,"1. A furnace to fire photovoltaic cells, the f...",H01L


In [4]:
list_abstract = list(df['abstract'])
list_descriptions = list(df['descriptions'])
list_claims = list(df['claims'])

In [42]:
list_descriptions[4]

'BACKGROUND OF THE INVENTION\nField\nThe present invention relates to a semiconductor inspection device and relates to a semiconductor inspection device suitable for use in a collective contact type semiconductor wafer measuring device.\nBackground\nJP 2005-317561 A discloses a semiconductor inspection device using a probe card that can simultaneously contact a plurality of chips.\nSemiconductor inspection device provided with a probe card which can simultaneously contact a plurality of chips needs to control many signals to inspect the plurality of chips. Thus, the semiconductor inspection device is mounted with many parts to control signals, which leads to an increase in size of the device.\nSUMMARY\nThe present invention has been implemented to solve the above-described problem and it is an object of the present invention to provide a semiconductor inspection device that can downsize the device while using a probe card that can simultaneously contact a plurality of chips.\nThe featu

## Background 부분 Parsing
Parsing 방법:
```
\n(0개~)BACKGROUND\n(1개~)
(Background 내용)
\n(0개~)AA BBB CCC\n(1개~)
→ (Background 내용) 부분 추출
```

'BACKGROUND' 대신 사용되는 표현
- 커버 O
    - BACKGROUND OF THE INVENTION
    - BACKGROUND ART
    - BACKGROUND OF THE DISCLOSURE
    - BACKGROUND AND SUMMARY
- 커버 X
    - RELATED ART
    - DISCUSSION OF RELATED ART
    - DESCRIPTION OF THE RELATED ART
    - CROSS REFERENCE TO RELATED APPLICATION
    - CROSS REFERENCE TO RELATED APPLICATIONS
    - CROSS-REFERENCE TO RELATED APPLICATION
    - CROSS-REFERENCE TO RELATED APPLICATIONS
    - TECHNICAL FIELD
    - TECHNICAL FIELD AND PRIOR ART

In [95]:
import re

pattern = r"\n*BACKGROUND(?: OF THE INVENTION| ART| OF THE DISCLOSURE| AND SUMMARY)?\n+(.*?)\n*\s*[A-Z\s]+\n+"
list_background = []
none_index = []

for i, description in enumerate(tqdm(list_descriptions)):
    matches = re.search(pattern, description, re.DOTALL)
    
    if matches:
        list_background.append(matches.group(1))
    else:
        list_background.append(None)
        none_index.append(i)

100%|██████████| 9585/9585 [00:01<00:00, 8157.27it/s]


In [96]:
# 9585개 중 292개 제외하고는 background 추출함
list_background.count(None)

292

In [114]:
# 추출된 background
for i, b in enumerate(list_background[:10]):
    print(i)
    print(b)
    print()

0
Recently, the high integration of semiconductors has increased the processing and storage capacity of information per unit area. This has led to demands for large diameter semiconductor wafers, miniaturization of circuit line width, and multilayer wiring. In order to form a multi-layered wiring on a semiconductor wafer, high-level flatness of the wafer is required, and a wafer flattening process is required for such high-level flatness.
One of the wafer flattening processes is a wafer polishing process. The wafer polishing process is a step of polishing the upper and lower surfaces of the wafer with a polishing pad. The wafer polishing process is carried out using a polishing system having a polishing unit provided with an upper plate, a lower plate and a means for supplying polishing slurry to the polishing unit.
A pipe connected to the polishing unit for supplying the slurry to the polishing unit may be provided in the polishing system. However, the abrasive grains contained in the

In [116]:
# 추출된 background 앞뒤 헤더 포함해서 print
import re

pattern = r"\n*BACKGROUND(?: OF THE INVENTION| ART| OF THE DISCLOSURE| AND SUMMARY)?\n+(.*?)\n*\s*[A-Z\s]+\n+"

for description in tqdm(list_descriptions[:10]):
    matches = re.search(pattern, description, re.DOTALL)
    
    if matches:
        print(matches.group())
        print('-'*100)
    else:
        pass

100%|██████████| 10/10 [00:00<00:00, 5523.91it/s]


BACKGROUND
Recently, the high integration of semiconductors has increased the processing and storage capacity of information per unit area. This has led to demands for large diameter semiconductor wafers, miniaturization of circuit line width, and multilayer wiring. In order to form a multi-layered wiring on a semiconductor wafer, high-level flatness of the wafer is required, and a wafer flattening process is required for such high-level flatness.
One of the wafer flattening processes is a wafer polishing process. The wafer polishing process is a step of polishing the upper and lower surfaces of the wafer with a polishing pad. The wafer polishing process is carried out using a polishing system having a polishing unit provided with an upper plate, a lower plate and a means for supplying polishing slurry to the polishing unit.
A pipe connected to the polishing unit for supplying the slurry to the polishing unit may be provided in the polishing system. However, the abrasive grains contai




In [113]:
# Background 추출 안 된 description
for i in none_index[:2]:
    print(list_descriptions[i])
    print('-'*100)

FIELD OF INVENTION
The present application claims priority to and the benefit of Chinese Patent Application No. 201410749979.2, filed on Dec. 10, 2014, and the disclosure of which is hereby incorporated herein by reference in its entirety.
The present invention relates to the manufacturing equipment in semiconductor industry, especially to a device of changing the gas flow pattern in the process chamber and a wafer processing method and apparatus.
RELATED ART
The silicon etching is dominated by the chemical action, the gas delivery and flow pattern in the process chamber of etching apparatus will significantly affect the etching performance. A gas center ring (GCR), which is employed in the process chamber for silicon etching, can change the gas flow pattern in the process chamber according to the specific requirements of different etching processes.
As shown in FIG. 1, the etching apparatus comprises a process chamber 300, and a wafer 500 placed on a base 400 at bottom of the process 

In [98]:
# df에 추가
df['background'] = list_background
df

Unnamed: 0,index,publication_title,publication_number,publication_date,application_type,sections,section_classes,section_class_subclasses,section_class_subclass_groups,abstract,descriptions,claims,year,background
0,0,Wafer polishing system,US10525568,20200107,utility,"[B, H]","[B24, H01]","[B24B, H01L]","[B24B 37/04, B24B 37/34, B24B 57/02, H01L 21/306]",The wafer polishing system is disclosed. The w...,CROSS REFERENCE TO RELATED APPLICATION\nThis a...,1. A wafer polishing system comprising:\na pol...,H01L,"Recently, the high integration of semiconducto..."
1,1,Slurry composition for CMP and polishing metho...,US10526508,20200107,utility,"[C, H]","[C09, H01]","[C09G, H01L]","[C09G 1/02, H01L 21/304, H01L 21/321, H01L 21/...",Provided are a slurry composition for CMP and ...,TECHNICAL FIELD\nThe present invention relates...,1. A slurry composition for chemical-mechanica...,H01L,With an increase in the degree of integration ...
2,2,Methods for controlling the substrate temperat...,US10526705,20200107,utility,"[C, H]","[C23, H01]","[C23C, H01L]","[C23C 16/46, C23C 16/458, H01L 21/687, H01L 21...","In a CVD reactor, flushing gases of different ...",RELATED APPLICATIONS\nThis application is a Di...,1. A method for depositing a plurality of laye...,H01L,Such a CVD reactor is described in JP 2002-146...
3,3,In-situ metrology method for thickness measure...,US10527407,20200107,utility,"[G, H, C]","[G01, H01, C23]","[G01B, H01L, C23C, H01J]","[G01B 11/06, H01L 21/66, G01B 7/06, C23C 16/52...",Embodiments of the present disclosure relate t...,CLAIM OF PRIORITY UNDER 35 U.S.C. 119\nThis ap...,"1. A processing chamber, comprising:\na substr...",H01L,Field\nEmbodiments of the present disclosure r...
4,4,Semiconductor inspection device,US10527648,20200107,utility,"[G, H]","[G01, H01]","[G01R, H01L]","[G01R 1/073, G01R 1/067, H01L 21/67]","According to the present invention, a semicond...",BACKGROUND OF THE INVENTION\nField\nThe presen...,1. A semiconductor inspection device comprisin...,H01L,Field\nThe present invention relates to a semi...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9580,2571,Method for manufacturing semiconductor device,US11538921,20221227,utility,[H],[H01],[H01L],"[H01L 21/336, H01L 29/40, H01L 29/66, H01L 21/...","A source electrode (5), a drain electrode (6) ...",FIELD\nThe present invention relates to a meth...,1. A method for manufacturing a semiconductor ...,H01L,A gate electrode having a T-shaped cross secti...
9581,2572,Method for etching back hard mask layer on top...,US11538923,20221227,utility,[H],[H01],[H01L],"[H01L 29/66, H01L 21/02, H01L 21/033, H01L 21/...",A method for etching back a hard mask layer on...,CROSS-REFERENCES TO RELATED APPLICATIONS\nThis...,1. A method for semiconductor process for maki...,H01L,With continuous reduction of process nodes in ...
9582,2573,Schottky diode integrated into superjunction p...,US11538933,20221227,utility,[H],[H01],[H01L],"[H01L 29/78, H01L 21/266, H01L 21/28, H01L 21/...",A trench metal-oxide-semiconductor field-effec...,CLAIM OF PRIORITY\nThis application is a conti...,1. A trench metal-oxide-semiconductor field-ef...,H01L,
9583,2574,Methods and apparatus to control zone temperat...,US11538956,20221227,utility,"[H, F]","[H01, H05, F27]","[H01L, H05B, F27B]","[H01L 31/18, H05B 6/06, H01L 21/67, F27B 17/00]",Methods and apparatus to control zone temperat...,BACKGROUND\nThis disclosure relates to solar c...,"1. A furnace to fire photovoltaic cells, the f...",H01L,This disclosure relates to solar cell producti...


## Save

In [18]:
with open('Preprocessed_Data/H01L_2020-2022_9585_abstract.pickle','wb') as fw:
    pickle.dump(list_abstract, fw)
with open('Preprocessed_Data/H01L_2020-2022_9585_background.pickle','wb') as fw:
    pickle.dump(list_background, fw)
with open('Preprocessed_Data/H01L_2020-2022_9585_descriptions.pickle','wb') as fw:
    pickle.dump(list_descriptions, fw)
with open('Preprocessed_Data/H01L_2020-2022_9585_claims.pickle','wb') as fw:
    pickle.dump(list_claims, fw)