In [6]:
%cd /media/auto203/SSD2/JHY/python/enhanced-subject-verb-object-extraction-master


from typing import List
from deepmultilingualpunctuation import PunctuationModel
from transformers import pipeline
import nltk
from Exiang import chinese_or_not
from Exiang import chinese_svo
import notam
import re
import warnings
from openpyxl import Workbook
from openpyxl.utils.dataframe import dataframe_to_rows
import pandas as pd
import sentence_tools


S = """(A1912/15 NOTAMN
Q) LOVV/QWPLW/IV/BO/W/000/130/4809N01610E001
A) LOVV B) 1509261100 C) 1509261230
E) PJE WILL TAKE PLACE AT AREA LAAB IN WALDE
F) GND G) FL130)"""


warnings.filterwarnings("ignore")
MODEL = PunctuationModel()
RULES_TABLE = "data/NOTAM_table.xlsx"
GENERAL_RULES = sentence_tools.get_general_rules(RULES_TABLE, words_sheet="words_list", rules_sheet="base_rules")
SUPPLEMENT_RULES = sentence_tools.get_supplement_rules(RULES_TABLE, words_sheet="words_list", rules_sheet="supplement_rules")
SUPPLEMENT_RULES.extend(GENERAL_RULES)
RULES_LIST = SUPPLEMENT_RULES
action_words, _, _, _ = sentence_tools.read_words(path=RULES_TABLE, sheet_name="words_list")


# 解析 TODO: 去除非E项内容
def notam_parse_single(e_option_noisy: str) -> str:
    # 去除E):
    e_option = e_option_noisy.replace('E)：', '')
    # 处理换行符
    e_option = e_option.rstrip('\n')
    e_option = e_option.replace('\n', '. ')
    # 去除括号里内容
    e_option = re.sub('\(.*?\)', '', e_option)
    # RWY和数字分开
    e_option = re.sub(r"(Runway|RUNWAY|RWY|TWY)([0-9/]+)", r"\1 \2", e_option)
    # 数字和CLSD之间的-去掉
    e_option = re.sub(r"([0-9/LR]+)[ ]*(-)[ ]*(CLSD|Closed|CLOSED|CLS|Close)", r"\1 \3", e_option)
    # 空格合并
    e_option = ' '.join(e_option.split())
    return e_option

# 解码
def notam_decode(e_option: str) -> str:
    # 整理调包格式
    s_new = S.split("E)")[0] + "E) " + e_option + "\nF)" + S.split("E)")[1].split("F)")[1]
    res = notam.Notam.from_str(s_new)
    e_option_text = res.decoded().split("E)")[1].split("F)")[0].rstrip("\n").lstrip(' ').replace('\n', ' ')
    # 原始符号处理
    e_option_text = e_option_text.replace(',', ", ")
    e_option_text = e_option_text.replace(':', ": ")
    e_option_text = ' '.join(e_option_text.split())
    return e_option_text

def bad_case_or_not(e_option_text: str) -> bool:
    # 简单句第一句
    keyword = ['FOLLOWING CONDITIONS:X-WIND COMPONENT', 'REF AIP SUP 12/21 PARAS 2.3',
               'REF AIP SUP A24/21 WEF 20 SEP 2021', 'REF AIP-AD2-LGPZ-ADC', 'REF AIP SUP 166/21 ITEM',
               'RUNWAYS RESTRICTIONS DUE TO ON RUNWAYS DECK LANDING', 'SUSPENDING ILS/GP RWY 07R UNTIL',
               'PORTION WITH CRACKED SFC ON RWY 29R', '22 FIREWORKS 1 NM NW THR . MAX',
               'EFFECTIVE ONLY AT THE EXTREMITIES', 'DAMAGED RWY PATCH LOCATED . ON CENTERLINE 160M FROM THR RWY 12.']

    for key in keyword:
        if key in e_option_text:
            return True

    return False

def bad_case_svo(e_option_text: str) -> List[List[str]]:
    if 'FOLLOWING CONDITIONS:X-WIND COMPONENT' in e_option_text:
        res = sentence_parse(e_option_text)
        if res[0]:
            return res[1]

    # 实体，动作，原因，限制，限制-翼展，限制-重量，来源
    if 'REF AIP SUP 12/21 PARAS 2.3' in e_option_text:
        return [['CLOSURE OF CENTRE RWY (07C/25C) AND VHHH ON DUAL RWY OPS DRG 252316 -262315', ['RWY (07C/25C)'], 'IS CNL',
                 'DUE HKIA CARGO STAND RE-DESIGNATION', '', '', '', 'REF AIP SUP 12/21 PARAS 2.3 - 2.4 AND NOTAM A1171/21']]

    if 'REF AIP SUP A24/21 WEF 20 SEP 2021' in e_option_text:
        return [['RUNWAY 25R/07L AND TAXIWAYS', ['RUNWAY 25R/07L'], 'TEMPORARY CLOSURE',
                 "CONSTRUCTION OF PHASE 2 - PERIOD 2 - PROJECT 'CONSTRUCTING, UPGRADING RUNWAY , TAXIWAYS AT TAN SON NHAT INTERNATIONAL AIRPORT",
                 '', '', '', 'REF AIP SUP A24/21 WEF 20 SEP 2021'],
                ['GP 07R', [], 'TEMPO SUSPENDED', '', 'UNTIL 0700 ON 10 MAR 2022', '', '', 'REF AIP SUP A24/21 WEF 20 SEP 2021']]

    if 'REF AIP-AD2-LGPZ-ADC' in e_option_text:
        return [['PART OF RWY 07R/25L USED AS TWY BETWEEN INTERSECTIONS B AND F', ['RWY 07R/25L'], 'CLOSED', '',
                 'INTERSECTIONS B AND F NOT AFFECTED. CLOSED PART MARKED AND LIGHTED', '', '',
                 'REF AIP-AD2-LGPZ-ADC']]

    if 'REF AIP SUP 166/21 ITEM' in e_option_text:
        return [['TWY:48 TWY A(BTN R AND W) FOR ACFT WITH WINGSPAN MORE THAN 68.40M', [], 'NOT AVBL', '', '',
                 '', '', 'REF AIP SUP 166/21 ITEM']]

    if 'RUNWAYS RESTRICTIONS DUE TO ON RUNWAYS DECK LANDING' in e_option_text:
        limit = '''DAILY
        PROGRAM KNOWN FROM 'OQCLA' : +33(0)2 97 12 90 25 ACTUAL ACTIVITY KNOWLEDGE
        AVBL ON ATIS 129.125MHZ DURING SLOTS ACTIVITY : - POSSIBLE REGULATION ON DEPARTURE ,
        AND ON ARRIVAL - DECK LANDING MIRRORS, 13FT HEIGHT, 200M BEFORE RUNWAY IN USE THR, 2M
        FROM THE RWY LEFT EDGE (LOCATION IN USE) - UNBASED ACFT MOVEMENT AND CIVILIAN ACFT
        MOVEMENT PROHIBITED WHEN LANDING MIRROR IN USE .
        '''
        return[['RUNWAYS', ['RUNWAYS'], 'RESTRICTIONS', 'DUE TO ON RUNWAYS DECK LANDING SIMULATION',
                limit, '', '', '']]

    if 'SUSPENDING ILS/GP RWY 07R UNTIL' in e_option_text:
        return [['ILS/GP RWY 07R', ['RWY 07R'], 'TEMPO SUSPENDING', '', 'UNTIL 0700 ON 30 APR 2022',
                 '', '', 'REF AIP SUP A08/22 WEF 21 FEB 2022 ITEM 2.1.1 B.']]

    if 'PORTION WITH CRACKED SFC ON RWY 29R' in e_option_text:
        return [['RWY 29R 2M SOUTH AND NORTH OF RCL, BTN TWY A4 AND A5', ['RWY 29R'], 'SOME PORTION WITH CRACKED SFC',
                 '', '', '', '', '']]

    if '22 FIREWORKS 1 NM NW THR . MAX' in e_option_text:
        return [['RWY 22', ['RWY 22'], 'FIREWORKS', '', '1 NM NW THR . MAX 350FT AGL / 1800FT AMSL.', '', '', '']]

    if 'EFFECTIVE ONLY AT THE EXTREMITIES' in e_option_text:
        return [['THE EXTREMITIES OF RWY 16/34 FOR ALL ACFT TYPES', ['RWY 16/34'],
                 'BACKTRACK OPERATIONS, AFTER LANDING OR PRIOR', '', '', '',
                 'TO TAKE-OFF, EXCEPT FOR ACFT LESS THAN FIVE THOUSAND SEVEN HUNDRED KGR 5700 MTOW', '']]

    return [['', [], '', '', '', '', '', '']]

# 标点
def punctuation(e_option_text: str) -> str:
    e_option_text_punc = MODEL.restore_punctuation(e_option_text)
    # 去掉RWY和数字之间的句号
    e_option_text_punc = re.sub(r"(Runway|RUNWAY|RWY|TWY).?(?=[0-9 LR/]+)", r"\1 ", e_option_text_punc)
    # 去掉数字和常见非开头词之间的句号
    e_option_text_punc = re.sub(r"([0-9LR/]+)[:\. ]*(?=Unserviceable|CLSD|Closed|CLS|closed)", r"\1 ", e_option_text_punc)
    # 去掉TEL和数字之间的句号
    e_option_text_punc = re.sub(r"(Contact TEL|CONTACT TEL|APPROVAL ONLY TEL|Approved ONLY TEL)[:\. ]*(\+?[0-9 ]*)", r"\1 \2", e_option_text_punc)
    # 合并空格
    e_option_text_punc = ' '.join(e_option_text_punc.split())
    # 去掉管理员批准中间的句号
    e_option_text_punc = re.sub(r"(Aerodrome)[\. ]*(OPERATOR (?:APPROVAL|Approved) ONLY)", r"\1 \2", e_option_text_punc)
    # 去掉通知时间中间的句号
    e_option_text_punc = re.sub(r"(MINUTES|Minutes|MIN|)[\. ]*((?:Prior|PRIOR) (?:NOTICE|Notice|NOTIFICATION|Permission|PERMISSION))", r"\1 \2", e_option_text_punc)
    # 去掉联系方式一整句话前面的句号
    e_option_text_punc = re.sub(r"\. *(Contact|CONTACT)[: +]*(?:TEL)*[: +]*([0-9 -]*\.)", r" \1 \2", e_option_text_punc)
    # 去掉数字后冗余的句号
    e_option_text_punc = e_option_text_punc.replace("...", "呜呜呜")
    e_option_text_punc = e_option_text_punc.replace("..", '可恶的句号')
    e_option_text_punc = e_option_text_punc.replace("呜呜呜", "...")
    # bad_cases
    e_option_text_punc = e_option_text_punc.replace("APPROVAL ONLY. TEL", "APPROVAL ONLY TEL")
    e_option_text_punc = e_option_text_punc.replace("IS PROHIBITED. FROM INTERSECTION Taxiway C1.", "IS PROHIBITED FROM INTERSECTION Taxiway C1.")
    e_option_text_punc = e_option_text_punc.replace("CLOSED. ALL TRAINING AND VFR FLIGHTS.", "CLOSED ALL TRAINING AND VFR FLIGHTS.")
    e_option_text_punc = e_option_text_punc.replace("Refer To METHOD OF WORKING. PLAN 001-22, STAGE 2B.", "Refer To METHOD OF WORKING PLAN 001-22, STAGE 2B.")
    return e_option_text_punc

# 判断摘要
def summarization_or_not(e_option_text_punc: str) -> bool:
    return False

# 摘要
def summarization(text: str) -> str:
    summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
    summary = summarizer(text, max_length=130, min_length=30, do_sample=False)
    return summary[0]["summary_text"]

# 分句
def sentence_tokenize(text: str) -> List[str]:
    sentences = []
    temp = ''
    # 句子变形以解决不分句bug
    text = re.sub(r"(CAT) (I+)(\.)", r"\1 \2I\3", text)
    text = re.sub(r"(Taxiway) ([A-Z]+)(\.)", r"\1 \2A\3",  text)
    # 加载punkt句子分割器
    sen_tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
    # 对句子进行分割
    sentences_ori = sen_tokenizer.tokenize(text)
    # 句子若仅含实体则与后一个句子合并
    for sentence_ori in sentences_ori:
        # 句子还原
        sentence_ori = re.sub(r"(CAT) (I+)I(\.)", r"\1 \2\3", sentence_ori)
        sentence_ori = re.sub(r"(Taxiway) ([A-Z]+)A(\.)", r"\1 \2\3",  sentence_ori)
        x = re.match(r"^(?:Runway|RUNWAY|RWY|TWY)[:\. ]?[0-9RL/]*\.$", sentence_ori)
        if x is None:
            sentences.append(temp + sentence_ori)
            temp = ''
        else:
            temp = sentence_ori
    return sentences

# 编码
def encode(sentence: str) -> str:
    # 整理调包格式
    s_new = S.split("E)")[0] + "E) " + sentence + "\nF)" + S.split("E)")[1].split("F)")[1]
    res = notam.Notam.from_str(s_new)
    sentence_code = res.encoded().split("E)")[1].split("F)")[0].rstrip("\n").lstrip(' ').replace('\n', ' ')
    return sentence_code

# 单句解析
def sentence_parse(sentence_code: str):
    # read_words
    sentence_ls = sentence_tools.preprocess_sentence_code(sentence_code, action_words)
    # sentence_parse
    is_match = False
    res_list_ls = []
    for sentence in sentence_ls:
        tmp_is_match = False
        res_dict = {item: "" for item in ['entity', 'runway', 'action', 'reason', 'limit', 'limit_wings', 'limit_weight', 'source']} 
        for pattern in RULES_LIST:
            match = re.search(pattern, sentence, flags=re.I)
            if match:
                tmp_is_match = True
                match_dict = match.groupdict()
                # print(match_dict)
                if 'entity' in match_dict:
                    res_dict['entity'] = match_dict['entity']
                if 'entity_supply' in match_dict:
                    res_dict['entity'] = res_dict['entity'] + ' ' + match_dict['entity_supply']
                if 'runway' in res_dict:
                    res_dict['runway'] = str(re.findall(r"RWY [0-9]{0,2}[LR]?/?[0-9]{0,2}[LR]?|RUNWAY [0-9]{0,2}[LR]?/?[0-9]{0,2}[LR]?", res_dict['entity'], flags=re.I))
                if 'action' in match_dict:
                    res_dict['action'] = match_dict['action']
                if 'reason' in match_dict:
                    res_dict['reason'] = match_dict['reason']
                if 'limit' in match_dict:
                    res_dict['limit'] = match_dict['limit']
                if 'source' in match_dict:
                    res_dict['source'] = match_dict['source']
                res_list = list(res_dict.values())
                res_list = [item.strip() for item in res_list]
                res_list_ls.append(res_list)
                break
        is_match = is_match or tmp_is_match
    return is_match, res_list_ls

# 合并居中单元格并保存
def to_merge(df, excel_name):
    # 按照'E项'列进行每行单元格合并
    # 'E项'列去重，确定一列需要合并成几个值
    df_key = list(set(df['E项'].values))
    wb = Workbook()
    ws = wb.active
    # 将每行数据写入ws中
    for row in dataframe_to_rows(df, index=False, header=True):
        ws.append(row)
    # 遍历去重后E项
    for i in df_key:
        # 获取E项等于指定值的几行数据
        df_id = df[df.E项 == i].index.tolist() # 索引值从0开始
        # 遍历，需要合并6列，openyxl中，读excel等的序号都是从1开始，所以合并6列，需要遍历range(1, 7)
        for j in range(1, 7):
            ws.merge_cells(start_row=df_id[0] + 2, end_row=df_id[-1] + 2, start_column=j, end_column=j) # 序号从1开始，所以行序号需要加2

    # save
    wb.save(excel_name)
    print('合并成功！')

/media/auto203/SSD2/JHY/python/enhanced-subject-verb-object-extraction-master




In [2]:
# 读取excel数据
SHIT = "相对复杂"
NOTAM = pd.read_excel("data/data.xlsx", sheet_name=SHIT)
for i, v in enumerate(NOTAM["E项"]):
    NOTAM["E项"][i] = notam_parse_single(v)

NOTAM["E项-人类语"] = "null"

for i, v in enumerate(NOTAM["E项"]):
    NOTAM["E项-人类语"][i] = notam_decode(v)


NOTAM

Unnamed: 0,类型,E项,E项-人类语
0,跑道数据/限制,RUNWAYS RESTRICTIONS DUE TO ON RUNWAYS DECK LA...,RUNWAYS RESTRICTIONS DUE TO ON RUNWAYS DECK LA...
1,跑道数据/限制,RWY 16R/34L 2469M NORTH END NOT AVBL DUE WIP R...,Runway 16R/34L 2469M NORTH Stop-end NOT Availa...
2,跑道数据/限制,RWY 06/24 AVBL PPR 30 MIN CTC 514-633-3488 EXC...,Runway 06/24 Available Prior Permission Requir...
3,跑道数据/限制,RWY 16R/34L STOPBARS EVERY SECOND LGT NOT AVBL...,Runway 16R/34L STOPBARS EVERY SECOND Lighting ...
4,跑道数据/限制,RWY 13/31 OPN FOR ACFT OPS WITH THE FLW LIMITA...,Runway 13/31 Open FOR Aircraft Operations WITH...
...,...,...,...
144,起落限制,UTAE . 1. LANDING CLEARANCE SHOULD BE REQUESTE...,UTAE . 1. LANDING CLEARANCE SHOULD BE REQUESTE...
145,起落限制,RWY 13/31 OPN FOR ACFT OPS WITH THE FLW LIMITA...,Runway 13/31 Open FOR Aircraft Operations WITH...
146,起落限制,PILOTS CARRYING OUT FLT FROM/TO KHABAROVSK/NOV...,PILOTS CARRYING OUT Flight FROM/TO KHABAROVSK/...
147,起落限制,RWY 10R/28L CLSD FOR ACFT TKOF AND LDG.,Runway 10R/28L Closed FOR Aircraft Take-off AN...


In [3]:
# 中间结果

NOTAM["E项-人类语标点符号预测"] = ''
NOTAM["E项-人类语分句"] = ''
NOTAM["是否使用分句及原因"] = ''

verbs = action_words.split('|')
verbs_human = []
for verb in verbs:
    verbs_human.append(notam_decode(verb))

for i, v in enumerate(NOTAM["E项"]):
    cnt = 0
    if chinese_or_not(v):
        NOTAM["是否使用分句及原因"][i] = "不分句，因为是中文"
    else:
        v_punct = punctuation(NOTAM["E项-人类语"][i])
        NOTAM["E项-人类语标点符号预测"][i] = v_punct
        sentences = sentence_tokenize(v_punct)
        NOTAM["E项-人类语分句"][i] = sentences
        if bad_case_or_not(v):
            NOTAM["是否使用分句及原因"][i] = "不使用分句，因为是bad_case，需要整体解析"
        elif len(sentences) == 1:
            NOTAM["是否使用分句及原因"][i] = "不使用分句，因为只有一句话"
        elif len(re.findall(r"(RWY|TWY|Runway|RUNWAY)", v)) == 1:
            NOTAM["是否使用分句及原因"][i] = "不使用分句，因为只有单一实体"
        else:
            for j in sentences:
                flag = False
                for x in verbs_human:
                    if j.__contains__(x):
                        flag = True
                        break
                if flag:
                    cnt += 1
            if cnt >= 2:
                NOTAM["是否使用分句及原因"][i] = "使用分句"
            else:
                NOTAM["是否使用分句及原因"][i] = "不使用分句，因为没有两句话及以上---存在动词表里的动词"

NOTAM.to_excel("data/中间结果-" + SHIT + ".xlsx",index=False)


NOTAM

Unnamed: 0,类型,E项,E项-人类语,E项-人类语标点符号预测,E项-人类语分句,是否使用分句及原因
0,跑道数据/限制,RUNWAYS RESTRICTIONS DUE TO ON RUNWAYS DECK LA...,RUNWAYS RESTRICTIONS DUE TO ON RUNWAYS DECK LA...,RUNWAY RESTRICTIONS DUE TO ON RUNWAY DECK LAND...,[RUNWAY RESTRICTIONS DUE TO ON RUNWAY DECK LAN...,不使用分句，因为是bad_case，需要整体解析
1,跑道数据/限制,RWY 16R/34L 2469M NORTH END NOT AVBL DUE WIP R...,Runway 16R/34L 2469M NORTH Stop-end NOT Availa...,Runway 16R/34L 2469M- NORTH Stop-end- NOT Avai...,[Runway 16R/34L 2469M- NORTH Stop-end- NOT Ava...,使用分句
2,跑道数据/限制,RWY 06/24 AVBL PPR 30 MIN CTC 514-633-3488 EXC...,Runway 06/24 Available Prior Permission Requir...,Runway 06/24 Available Prior Permission Requir...,[Runway 06/24 Available Prior Permission Requi...,使用分句
3,跑道数据/限制,RWY 16R/34L STOPBARS EVERY SECOND LGT NOT AVBL...,Runway 16R/34L STOPBARS EVERY SECOND Lighting ...,Runway 16R/34L- STOPBARS: EVERY SECOND Lightin...,[Runway 16R/34L- STOPBARS: EVERY SECOND Lighti...,使用分句
4,跑道数据/限制,RWY 13/31 OPN FOR ACFT OPS WITH THE FLW LIMITA...,Runway 13/31 Open FOR Aircraft Operations WITH...,Runway 13/31 Open FOR Aircraft Operations WITH...,[Runway 13/31 Open FOR Aircraft Operations WIT...,使用分句
...,...,...,...,...,...,...
144,起落限制,UTAE . 1. LANDING CLEARANCE SHOULD BE REQUESTE...,UTAE . 1. LANDING CLEARANCE SHOULD BE REQUESTE...,UTAE: 1. LANDING CLEARANCE SHOULD BE REQUESTED...,"[UTAE: 1., LANDING CLEARANCE SHOULD BE REQUEST...",使用分句
145,起落限制,RWY 13/31 OPN FOR ACFT OPS WITH THE FLW LIMITA...,Runway 13/31 Open FOR Aircraft Operations WITH...,Runway 13/31 Open FOR Aircraft Operations WITH...,[Runway 13/31 Open FOR Aircraft Operations WIT...,使用分句
146,起落限制,PILOTS CARRYING OUT FLT FROM/TO KHABAROVSK/NOV...,PILOTS CARRYING OUT Flight FROM/TO KHABAROVSK/...,PILOTS CARRYING OUT Flight FROM/TO KHABAROVSK/...,[PILOTS CARRYING OUT Flight FROM/TO KHABAROVSK...,不使用分句，因为没有两句话及以上---存在动词表里的动词
147,起落限制,RWY 10R/28L CLSD FOR ACFT TKOF AND LDG.,Runway 10R/28L Closed FOR Aircraft Take-off AN...,Runway 10R/28L Closed FOR Aircraft Take-off AN...,[Runway 10R/28L Closed FOR Aircraft Take-off A...,不使用分句，因为只有一句话


In [4]:
# 最终汇总

NOTAM["cache"] = ''

for i, v in enumerate(NOTAM["E项"]):
    if chinese_or_not(v):
        svo_all = chinese_svo(v)
    elif bad_case_or_not(v):
        svo_all = bad_case_svo(v)
    elif NOTAM["是否使用分句及原因"][i] == "使用分句":
        svo_all = []
        v = NOTAM["E项-人类语"][i]
        sentences = sentence_tokenize(punctuation(v))
        # 遍历一个E项所有待解析单句
        for k in sentences:
            res = sentence_parse(encode(k))
            # 把所有解析出来的结果汇总
            if res[0]:
                for m in res[1]:
                    svo_all.append(m)
    else:
        svo_all = sentence_parse(v)[1]

    # svo_all 加到对应表格
    cache = ""
    for n in svo_all:
        # resul是一个解析结果（用in间隔）
        result_single = n[0] + "/in/" + str(n[1]) + "/in/" + n[2] + "/in/"+ n[3] + "/in/" + n[4] + "/in/" + n[5] + "/in/" + n[6] + "/in/" + n[7]
        cache += "/out/"
        cache += result_single
        cache = cache.lstrip("/out/")
    NOTAM["cache"][i] = cache

    # debug
    # print("第{}个E项解析出来的: ".format(i+1), svo_all)
    # if i == 11:
    #     break

# 拆分多个行
NOTAM["cache"] = NOTAM["cache"].str.split("/out/")
NOTAM = NOTAM.explode("cache")

# 拆分多个列
NOTAM_cache =NOTAM["cache"].str.split('/in/', expand=True)
NOTAM = NOTAM[~NOTAM.index.duplicated(keep="first")].drop(["cache"],axis=1).join(NOTAM_cache, how="right")

# rename
NOTAM.columns = ['类型', 'E项', 'E项-人类语', "E项-人类语标点符号预测", "E项-人类语分句", "是否使用分句及原因", '实体', "跑道实体", '动作', "原因", "限制", "限制_翼展", "限制_重量", "来源"]

# 合并居中单元格并保存
excel_name = "data/" + SHIT + ".xlsx"
NOTAM.to_excel(excel_name, index=False)
to_merge(pd.read_excel(excel_name), excel_name)


NOTAM

合并成功！


Unnamed: 0,类型,E项,E项-人类语,E项-人类语标点符号预测,E项-人类语分句,是否使用分句及原因,实体,跑道实体,动作,原因,限制,限制_翼展,限制_重量,来源
0,跑道数据/限制,RUNWAYS RESTRICTIONS DUE TO ON RUNWAYS DECK LA...,RUNWAYS RESTRICTIONS DUE TO ON RUNWAYS DECK LA...,RUNWAY RESTRICTIONS DUE TO ON RUNWAY DECK LAND...,[RUNWAY RESTRICTIONS DUE TO ON RUNWAY DECK LAN...,不使用分句，因为是bad_case，需要整体解析,RUNWAYS,['RUNWAYS'],RESTRICTIONS,DUE TO ON RUNWAYS DECK LANDING SIMULATION,DAILY\n PROGRAM KNOWN FROM 'OQCLA' : +3...,,,
1,跑道数据/限制,RWY 16R/34L 2469M NORTH END NOT AVBL DUE WIP R...,Runway 16R/34L 2469M NORTH Stop-end NOT Availa...,Runway 16R/34L 2469M- NORTH Stop-end- NOT Avai...,[Runway 16R/34L 2469M- NORTH Stop-end- NOT Ava...,使用分句,RWY 16R/34L 2469M- NORTH END-,['RWY 16R/34L'],NOT AVBL,DUE WIP,,,,
1,跑道数据/限制,RWY 16R/34L 2469M NORTH END NOT AVBL DUE WIP R...,Runway 16R/34L 2469M NORTH Stop-end NOT Availa...,Runway 16R/34L 2469M- NORTH Stop-end- NOT Avai...,[Runway 16R/34L 2469M- NORTH Stop-end- NOT Ava...,使用分句,FM RWY 34L,['RWY 34L'],START OF TKOF AND MARKED BY RED LGT: ALL TWY I...,,,,,
1,跑道数据/限制,RWY 16R/34L 2469M NORTH END NOT AVBL DUE WIP R...,Runway 16R/34L 2469M NORTH Stop-end NOT Availa...,Runway 16R/34L 2469M- NORTH Stop-end- NOT Avai...,[Runway 16R/34L 2469M- NORTH Stop-end- NOT Ava...,使用分句,DECLARED DISTANCE AND GRADIENT CHANGES: RWY TK...,['RWY '],AVBL,,,,,
1,跑道数据/限制,RWY 16R/34L 2469M NORTH END NOT AVBL DUE WIP R...,Runway 16R/34L 2469M NORTH Stop-end NOT Availa...,Runway 16R/34L 2469M- NORTH Stop-end- NOT Avai...,[Runway 16R/34L 2469M- NORTH Stop-end- NOT Ava...,使用分句,TKOF DIST,[],AVBL,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,起落限制,RWY 13/31 OPN FOR ACFT OPS WITH THE FLW LIMITA...,Runway 13/31 Open FOR Aircraft Operations WITH...,Runway 13/31 Open FOR Aircraft Operations WITH...,[Runway 13/31 Open FOR Aircraft Operations WIT...,使用分句,RWY 31 -,['RWY 31'],LTD TO TKOF BTN 2200-1000 DLY AND TAX OPS H24,,,,,
145,起落限制,RWY 13/31 OPN FOR ACFT OPS WITH THE FLW LIMITA...,Runway 13/31 Open FOR Aircraft Operations WITH...,Runway 13/31 Open FOR Aircraft Operations WITH...,[Runway 13/31 Open FOR Aircraft Operations WIT...,使用分句,RWY 13 -,['RWY 13'],"LTD TO LDG BTN 2200-1000 DLY, LTD TO TKOF, AND...",,,,,
146,起落限制,PILOTS CARRYING OUT FLT FROM/TO KHABAROVSK/NOV...,PILOTS CARRYING OUT Flight FROM/TO KHABAROVSK/...,PILOTS CARRYING OUT Flight FROM/TO KHABAROVSK/...,[PILOTS CARRYING OUT Flight FROM/TO KHABAROVSK...,不使用分句，因为没有两句话及以上---存在动词表里的动词,,,,,,,,
147,起落限制,RWY 10R/28L CLSD FOR ACFT TKOF AND LDG.,Runway 10R/28L Closed FOR Aircraft Take-off AN...,Runway 10R/28L Closed FOR Aircraft Take-off AN...,[Runway 10R/28L Closed FOR Aircraft Take-off A...,不使用分句，因为只有一句话,RWY 10R/28L FOR ACFT TKOF AND LDG,['RWY 10R/28L'],CLSD,,,,,
