In [None]:
import os
from os import mkdir
import sys
import numpy as np
import pandas as pd
import re
import json
import csv
import string
import time
from datetime import date, datetime
# 
import openpyxl
from openpyxl import Workbook
from openpyxl.styles import Alignment, PatternFill, Font, Border, Side, NamedStyle

In [None]:
stime = time.time() 

# 【公務資料: 各套excel產出】
根據 df_survey_combine.csv 產生六個資料夾

# 1.讀檔

In [None]:
init = {
    # 'engine': 'python',
    'encoding': 'utf-8',
    'sep': ',',
    'keep_default_na': False,  # 放棄自動轉NA
    'dtype': {'farmer_num': str, 'link_num': str, 'tel': str}
}
#
df_survey_combine = pd.read_csv('df_survey_combine.csv', **init)
#
print(df_survey_combine.shape)
df_survey_combine[:2]

# 2.各種資料整理成23欄

In [None]:
# 欄位整理，excel需要18欄位(A~R)，最後R欄是身分證
cols_3 = ['f_num', 'l_num', 'type']
cols_18 = list(string.ascii_uppercase)[:18]
cols_21 = cols_3 + cols_18

## 2-1: 名冊基本資料 df_info

In [None]:
cols_info = cols_3 + ['farmer_num', 'B', 'name', 'birth', 'layer', 'link_num', 'G', 'tel', 'I', 'addr'] + list('KLMNOPQR')
# 23欄整理
df_info = df_survey_combine.reindex(cols_info, axis=1).set_axis(cols_21, axis=1).fillna('')
df_info['f_num'] = df_info['A']
df_info['l_num'] = df_info['F']
df_info['type'] = '0_基本b'
#
df_info[:3]

## 2-2: 戶內人口資料 df_hh

In [None]:
cols_hh = ['farmer_num', 'link_num', 'household']
df_hh = df_survey_combine[cols_hh].rename(columns={'farmer_num': 'f_num', 'link_num': 'l_num'})
# 要用json轉回list，再【垂直爆炸】，讓一個戶員一列
df_hh['household'] = df_hh['household'].str.replace("'", '"').map(json.loads)
df_hh = df_hh.explode('household').reset_index(drop=True)
# household欄位重造為18欄
data = df_hh['household'].fillna(0).map(lambda x: x and x or ['']*18).values.tolist()
df_18 = pd.DataFrame(data=data, columns=cols_18)
# 23欄整理
df_hh = pd.concat([df_hh.iloc[:, :4], df_18], axis=1).reindex(cols_21, axis=1)
df_hh['type'] = '1_戶籍b'
# df_hh.sort_values(by=['f_num', 'A', 'B', 'R'], inplace=True) # 排出生年/角色
#
print(df_hh.shape)
df_hh[:3]

## 2-3: 申報核定 df_FD

In [None]:
cols_FD = ['farmer_num', 'link_num', 'fallow_declare']
df_FD = df_survey_combine[cols_FD].rename(columns={'farmer_num': 'f_num', 'link_num': 'l_num', 'fallow_declare': 'B'})
# 沒有作物就不要顯示到excel
df_FD = df_FD[df_FD.B != '']
df_FD = df_FD.reindex(cols_21, axis=1).fillna('')
df_FD['A'] = '申報核定'
df_FD['type'] = '2_申核b'
#
print(df_FD.shape)
df_FD[:3]

## 2-4: 轉作補貼 df_FTS

In [None]:
cols_FTS = ['farmer_num', 'link_num', 'fallow_transfer_subsidy']
df_FTS = df_survey_combine[cols_FTS].rename(columns={'farmer_num': 'f_num', 'link_num': 'l_num', 'fallow_transfer_subsidy': 'FTS'})
df_FTS['FTS'] = df_FTS['FTS'].str.replace("'", '"').map(json.loads)
# 沒有補貼就不顯示到excel
df_FTS = df_FTS.explode('FTS').dropna().reset_index(drop=True)
data = df_FTS['FTS'].map(lambda x: list(x.values())).values.tolist()
df_CDE = pd.DataFrame(data=data, columns=list('CDE'))
# 23欄整理，B欄用rank加項目編號
df_FTS = pd.concat([df_FTS.iloc[:, :4], df_CDE], axis=1).assign(B=0)
df_FTS[['B']] = df_FTS[['f_num']].reset_index().rename(columns={'index': 'B'}).groupby('f_num').transform(lambda x: x.rank()).astype(int)
df_FTS = df_FTS.reindex(cols_21, axis=1).fillna('')
df_FTS['type'] = '3_轉補b'
#
print(df_FTS.shape)
df_FTS[:3]

## 2-5: 災害救助 df_DS

In [None]:
cols_DS = ['farmer_num', 'link_num', 'disaster_subsidy']
df_DS = df_survey_combine[cols_DS].rename(columns={'farmer_num': 'f_num', 'link_num': 'l_num', 'disaster_subsidy': 'DS'})
df_DS['DS'] = df_DS['DS'].str.replace("'", '"').map(json.loads)
# 沒有補貼就不顯示到excel
df_DS = df_DS.explode('DS').dropna().reset_index(drop=True)
data = df_DS['DS'].map(lambda x: list(x.values())).values.tolist()
df_CDEF = pd.DataFrame(data=data, columns=list('CDEF'))
# 23欄整理，B欄用rank加項目編號
df_DS = pd.concat([df_DS.iloc[:, :4], df_CDEF], axis=1).assign(B=0, G='DS')
df_DS[['B']] = df_DS[['f_num']].reset_index().rename(columns={'index': 'B'}).groupby('f_num').transform(lambda x: x.rank()).astype(int)
df_DS = df_DS.reindex(cols_21, axis=1).fillna('')
df_DS['type'] = '4_災害b'
df_DS = df_DS.round({'E': 4}) # 核定面積 0.651 之前匯出df_survey_combine.csv時變 0.650999999
#
print(df_DS.shape)
df_DS[:3]

## 2-6: 畜牧 df_LS

In [None]:
cols_LS = ['farmer_num', 'link_num', 'livestock']
df_LS = df_survey_combine[cols_LS].rename(columns={'farmer_num': 'f_num', 'link_num': 'l_num', 'livestock': 'LS'})
df_LS['LS'] = df_LS['LS'].str.replace(", *'([^',]+?)':", r", 'space_\1': [['','','','','','','']], '\1':")  # 兩個畜牧場間加空白行
df_LS['LS'] = df_LS['LS'].str.replace("'", '"').str.replace("None", '""').map(json.loads)  # 要null才能json還原
#
df_LS['LS'] = df_LS['LS'].map(lambda x: list(zip(x.keys(), x.values())))
df_LS = df_LS.explode('LS').dropna()
df_LS['LS'] = df_LS['LS'].map(lambda x: [[x[0]]+x[1][0]] + [['']+Ylist for Ylist in x[1][1:]])  # 畜牧場名加在第一筆
df_LS = df_LS.explode('LS').dropna().reset_index(drop=True)
# LS欄位重造為8欄
data = df_LS['LS'].values.tolist()
df_8 = pd.DataFrame(data=data, columns=list('ABCDEFGH'))
# 23欄整理
df_LS = pd.concat([df_LS.iloc[:, :4], df_8], axis=1).reindex(cols_21, axis=1).fillna('')
df_LS['A'] = df_LS['A'].str.replace('space.*', '')
df_LS['type'] = '5_畜牧b'
#
print(df_LS.shape)
df_LS[:3]

## 2-7: 小大 df_SL

In [None]:
cols_SL = ['farmer_num', 'link_num', 'small_large_data']
df_SL = df_survey_combine[cols_SL].rename(columns={'farmer_num': 'f_num', 'link_num': 'l_num', 'small_large_data': 'SL'})
df_SL['SL'] = df_SL['SL'].str.replace("'", '"').map(json.loads)
df_SL['SL'] = df_SL['SL'].map(lambda x: [list(mandict.values()) for mandict in x])
df_SL = df_SL.explode('SL').dropna().reset_index(drop=True)
# SL欄位重造6欄
data = df_SL['SL'].values.tolist()
df_6 = pd.DataFrame(data=data, columns=list('BCDEFG'))
df_SL = pd.concat([df_SL.iloc[:, :4], df_6], axis=1).reindex(cols_21, axis=1).fillna('')
df_SL['H'] = 'SL'
df_SL['type'] = '6_小大b'
#
print(df_SL.shape)
df_SL[:3]

## 2-8: 作物名稱 df_crop

In [None]:
cols_crop = ['farmer_num', 'link_num', 'crop_name']
df_crop = df_survey_combine[cols_crop].rename(columns={'farmer_num': 'f_num', 'link_num': 'l_num', 'crop_name': 'B'})
df_crop = df_crop[df_crop['B'] != ''].reindex(cols_21, axis=1).fillna('')
df_crop['A'] = '作物名稱'
df_crop['type'] = '7_作名b'
#
print(df_crop.shape)
df_crop[:3]

## 2-9: 子女獎助金 df_CS

In [None]:
cols_CS = ['farmer_num', 'link_num', 'child_scholarship']
df_CS = df_survey_combine[cols_CS].rename(columns={'farmer_num': 'f_num', 'link_num': 'l_num', 'child_scholarship': 'B'})
df_CS = df_CS[df_CS['B'] != ''].reindex(cols_21, axis=1).fillna('')
df_CS['A'] = '子女獎助金'
df_CS['type'] = '8_子女b'
#
print(df_CS.shape)
df_CS[:3]

# 3.每一農戶的各段標題

## 3-1: info 標題 (一戶一列)


In [None]:
df_title_info = df_info[cols_3].assign(type='0_基本a').reindex(cols_21, axis=1)
# 填充第一列，再前向填充其他列
df_title_info.loc[:0, list('ABCDEFHJ')] = ['農戶編號', '序號', '調查姓名', '出生年', '層別', '連結編號', '電話', '地址']
df_title_info = df_title_info.fillna(method='ffill').fillna('')
#
df_title_info[:3]

## 3-2: hh 標題 (一戶一列)

In [None]:
df_title_hh = df_info[cols_3].assign(type='1_戶籍a').reindex(cols_21, axis=1)
# 填充第一列，再前向填充其他列
hh_title = [
    '出生年', '關係', '死亡或\n除口', '農保/農職', '老農津貼', '國保給付', '勞保給付', '勞退給付', '農保給付',
    '住院\n日數\n(1-8月)', '門診\n次數\n(1-8月)', '健保\n身分別', '健保被\n保險人\n註記', '應繳眷\n口數', '健保自\n付金額\n(1-8月)',
    '勞保費\n8月', '國保實\n收保費\n(1-8月)', ''
]
df_title_hh.iloc[:1, 3:] = hh_title
df_title_hh.fillna(method='ffill', inplace=True)
#
df_title_hh[:3]

## 3-3: FTS 轉作補貼標題 (一戶有資料才一列)

In [None]:
df_title_FTS = df_FTS[cols_3].drop_duplicates().assign(type='3_轉補a').reindex(cols_21, axis=1)
# 填充第一列，再前向填充其他列
df_title_FTS.loc[:0, list('ABCDE')] = ['轉作補貼', '項目', '作物名稱', '金額', '期別']
df_title_FTS = df_title_FTS.fillna(method='ffill').fillna('')
#
df_title_FTS[:3]

## 3-4: DS 災害救助標題 (一戶有資料才一列)

In [None]:
df_title_DS = df_DS[cols_3].drop_duplicates().assign(type='4_災害a').reindex(cols_21, axis=1)
# 填充第一列，再前向填充其他列
df_title_DS.loc[:0, list('ABCDEF')] = ['災害', '項目', '災害', '核定作物', '核定面積', '金額']
df_title_DS = df_title_DS.fillna(method='ffill').fillna('')
#
df_title_DS[:3]

## 3-5: LS 畜牧標題 (一戶有資料才一列)

In [None]:
df_title_LS = df_LS[cols_3].drop_duplicates().assign(type='5_畜牧a').reindex(cols_21, axis=1)
# 填充第一列，再前向填充其他列
df_title_LS.loc[:0, list('ABCDEF')] = ['畜牧資訊', '年份', '調查時間', '畜牧品項', '在養頭數', '供應\n屠宰數']
df_title_LS = df_title_LS.fillna(method='ffill').fillna('')
#
df_title_LS[:3]

### >>> 畜牧【G,H】欄特別處理

In [None]:
df_LS_and_title = pd.concat([df_LS, df_title_LS]).sort_values(by=['f_num', 'type']).iloc[:, :13]
df_LS_and_title[['G']] = df_LS_and_title[['f_num', 'G']].groupby('f_num').transform(max)  # 用max把產乳量字串帶到標題G欄
# 把H的產乳量數值帶到G欄
where = df_LS_and_title['A'] != '畜牧資訊'
df_LS_and_title['G'] = np.where(where, df_LS_and_title['H'], df_LS_and_title['G'])
df_LS_and_title['G'] = df_LS_and_title['G'].replace(0, '', regex=True)
df_LS_and_title['H'] = np.where(where, 'LS', '') # 故意標註
df_LS_and_title = df_LS_and_title.reindex(cols_21, axis=1).fillna('')
#
# where = df_LS_and_title.f_num == '100040313271'
# df_LS_and_title[where]
df_LS_and_title[:3].iloc[:,:11]

## 3-6: SL 小大標題 (一戶有資料才一列)

In [None]:
df_title_SL = df_SL[cols_3].drop_duplicates().assign(type='6_小大a').reindex(cols_21, axis=1)
# 填充第一列，再前向填充其他列
df_title_SL.loc[:0, list('ABCDEFG')] = ['小大補貼', '姓名', '大專業農\n轉契作', '小地主\n出租給付', '離農獎勵', '期別', '是否為\n小大']
df_title_SL = df_title_SL.fillna(method='ffill').fillna('')
#
df_title_SL[:3]

## 3-9: 空白行 (用type控制位置)

In [None]:
# 基本空白行，一戶一行
df_space = df_info[cols_3].assign(type='').reindex(cols_21, axis=1).fillna('')
# 戶籍標題上一行
df_space_hh = df_space.assign(type='1_戶籍0')
# 申報核定上一行
df_space_FD = df_FD[cols_3].assign(type='2_申核0').reindex(cols_21, axis=1).fillna('')
# 轉作補貼上一行
df_space_FTS = df_title_FTS[cols_3].assign(type='3_轉補0').reindex(cols_21, axis=1).fillna('')
# 災害救助上一行
df_space_DS = df_title_DS[cols_3].assign(type='4_災害0').reindex(cols_21, axis=1).fillna('')
# 畜牧上一行
df_space_LS = df_title_LS[cols_3].assign(type='5_畜牧0').reindex(cols_21, axis=1).fillna('')
# 小大上一行
df_space_SL = df_title_SL[cols_3].assign(type='6_小大0').reindex(cols_21, axis=1).fillna('')
# 作物名稱上一行
df_space_crop = df_crop[cols_3].assign(type='7_作名0').reindex(cols_21, axis=1).fillna('')
# 子女獎助金上一行
df_space_CS = df_CS[cols_3].assign(type='8_子女0').reindex(cols_21, axis=1).fillna('')
# 每個農戶最後兩行
df_space_last2 = df_space.assign(type='9_空白1').append(df_space.assign(type='9_空白2', A='last'))  # 故意標註末行

# 4.輸出Excel

## 4-1: 組成excel的樣子

In [None]:
df_list = [
    # 資料
    df_info,
    df_hh,
    df_FD,  # 申報核定
    df_FTS,  # 轉作補貼
    df_DS,  # 災害救助
    df_LS_and_title,  # 畜牧+標題
    df_SL,  # 小大
    df_crop,  # 作物名稱
    df_CS, # 子女獎助金
    # 標題
    df_title_info,
    df_title_hh,
    df_title_FTS,
    df_title_DS,
    df_title_SL,
    # 空白行
    df_space_hh,
    df_space_FD,
    df_space_FTS,
    df_space_DS,
    df_space_LS,
    df_space_SL,
    df_space_crop,
    df_space_CS,
    df_space_last2  # 每戶最後兩行
]
df_final = pd.concat(df_list).sort_values(by=['f_num', 'type']).reset_index(drop=True)

## 4-2: 增加排序用的【man/county/addr】

In [None]:
cols_4 = ['farmer_num', 'man', 'county', 'addr']
df_survey_4 = df_survey_combine[cols_4].rename(columns={'farmer_num': 'f_num'})
df_final = df_final.merge(df_survey_4, on='f_num', how='left')[cols_4[1:]+cols_21]
#
df_final[:3]

## 4-3: 輸出excel的函數

In [None]:
def export_excel(df, path_folder_ID='', fn='公務資料_all'):
    # 造有ID/無ID之目錄
    if path_folder_ID and not os.path.isdir(path_folder_ID):
        mkdir(path_folder_ID)
    path_folder = path_folder_ID.replace('(有ID) ', '')
    if path_folder and not os.path.isdir(path_folder):
        mkdir(path_folder)
    # 先處理有ID，再另存無ID
    path_csv_ID = os.path.join(path_folder_ID, f'{fn} (有ID).csv')
    path_xlsx_ID = os.path.join(path_folder_ID, f'{fn} (有ID).xlsx')
    path_xlsx = os.path.join(path_folder, f'{fn}.xlsx')
    # df先存csv
    df.to_csv(path_csv_ID, index=False, header=False)
    where = df['A'] == '農戶編號'
    total_farmers = df[where].shape[0]  # 此excel檔內的總人數
    # 格式設定
    alignment = Alignment('left', 'top', wrap_text=True)  # 自動換行
    alignment_nowarp = Alignment('left', 'top', wrap_text=False)
    align_center = Alignment('center', 'top', wrap_text=True)  # 序號置中
    fillY = PatternFill(fill_type='solid', fgColor='FFFF00')
    title_fill = PatternFill(start_color='F7F7F7', end_color='F7F7F7', fill_type='solid')
    title_font = Font(bold=True)
    bottom_border = Border(bottom=Side(border_style='double', color='000000'))
    # 集中標題格式
    title = NamedStyle(name="title")
    title.fill = title_fill
    title.font = title_font
    title.alignment = alignment
    # 整數num
    num = NamedStyle(name="num")
    num.number_format = "#,###,##0"  # 千分位
    num.alignment = alignment
    # 浮點: 核定面積4位,蛋3位小數
    num_f = NamedStyle(name="num_f")
    num_f.number_format = "#,###,##0.####"
    num_f.alignment = alignment
    # 欄寬
    widths = [
        13.29,  9.29,  12.29,  11.29,  10.29, 10.29, 11.29, 10.29, 10.29,  # A-I
        7.79, 7.79,  7.79,  7.79,  7.79,  7.79,  7.79, 7.79, 13.29  # J-R
    ]
    ratio = 1.054
    # ==================================================================================
    wb = Workbook()
    sheet = wb.active
    for idx, col in enumerate(cols_18):
        sheet.column_dimensions[col].width = widths[idx]*ratio  # 調整欄寬
    # 從csv逐列貼資料到excel
    with open(path_csv_ID, 'r', encoding='UTF-8') as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        list_hh = list(range(4, 11)) + list(range(13, 17))
        list_20 = [str(i) for i in list(range(1, 20))]
        order_num = 0
        for n, row in enumerate(csv_reader, 1):
            # 貼數值進去，千分位及小數點格式化才有作用
            DS = False
            LS = False
            SL = False
            area_float = False
            egg_float = False
            last = False
            order = False
            if row[17]:  # R欄位有ID的戶內人口列(11,12/L,M 不格式化)
                for i in list_hh:
                    row[i] = row[i] and int(row[i])
            elif len(row[0]) > 10 and row[1] == '' and len(row[5]) == 5:
                order_num += 1
                row[1] = f'{order_num}/{total_farmers}'  # 填充序號
                order = True
            elif row[1] in list_20:
                if row[6] == 'DS':
                    DS = True
                    area = float(row[4])  # 核定面積
                    area_float = bool(area - int(area))
                    row[4] = area_float and area or int(area)
                    row[5] = int(row[5])  # 災害金額整數
                    row[6] = ''
                else:
                    row[3] = int(row[3])  # 補貼金額整數
            elif row[7] == 'LS':
                LS = True
                row[7] = ''
                for i in [4, 5]:  # 在養/屠宰，可能出清及''，'0'仍會以字串貼入
                    row[i] = (row[i] not in ['出清', '']) and int(row[i]) or row[i]
                if row[6]:
                    tmp = float(row[6])
                    egg_float = bool(tmp - int(tmp))  # 蛋可能有3位小數
                    row[6] = egg_float and tmp or int(tmp)
            elif row[7] == 'SL':
                SL = True
                row[7] = ''
                for i in [2, 3, 4]:
                    row[i] = row[i] and int(row[i])
            elif row[0] == 'last':
                last = True
                row[0] = ''
            ####################################################################
            sheet.append(row)
            ####################################################################
            # (1)標題格式
            if row[0] in ['農戶編號', '出生年']:
                for col in range(1, 18):
                    sheet.cell(row=n, column=col).style = title
            elif row[0] in ['申報核定', '作物名稱', '子女獎助金']:
                sheet.cell(row=n, column=1).style = title
            elif row[0] == '轉作補貼':
                for col in range(1, 6):
                    sheet.cell(row=n, column=col).style = title
            elif row[0] == '災害':
                for col in range(1, 7):
                    sheet.cell(row=n, column=col).style = title
            elif row[0] == '畜牧資訊':
                for col in range(1, 7):
                    sheet.cell(row=n, column=col).style = title
                if row[6]:  # H有產乳量
                    sheet.cell(row=n, column=7).style = title
            elif row[0] == '小大補貼':
                for col in range(1, 8):
                    sheet.cell(row=n, column=col).style = title
            # (2)千分位及小數格式
            elif row[17]:
                for col in [i+1 for i in list_hh]:
                    if row[col-1] != "":
                        sheet.cell(row=n, column=col).style = num  # R有ID的戶內人口列
            elif order:
                sheet.cell(row=n, column=2).alignment = align_center  # 序號置中
                sheet.cell(row=n, column=8).alignment = alignment  # 電話要換行
                for col in [1, 3, 4, 5, 6, 10]:
                    sheet.cell(row=n, column=col).alignment = alignment_nowarp
                if row[5][-1] in ['1', '4']:
                    sheet.cell(row=n, column=6).fill = fillY  # 1,4套連結編號黃底
            elif row[1] in list_20:
                if DS:
                    for col in [2, 3, 4]:
                        sheet.cell(row=n, column=col).alignment = alignment  # 項目/災害/核定作物
                    sheet.cell(row=n, column=5).style = area_float and num_f or num  # 核定面積
                    sheet.cell(row=n, column=6).style = num  # 災害金額
                else:
                    for col in [2, 3, 5]:
                        sheet.cell(row=n, column=col).alignment = alignment  # 轉作補貼_項目/作物名稱/期別
                    sheet.cell(row=n, column=4).style = num  # 轉作補貼_金額
            elif LS:
                for col in [1, 2, 3, 4]:
                    sheet.cell(row=n, column=col).alignment = alignment  # 畜牧場名稱換行
                for col in [5, 6]:  # 在養/屠宰 千分位
                    sheet.cell(row=n, column=col).style = num
                if row[6]:
                    sheet.cell(row=n, column=7).style = egg_float and num_f or num
            elif SL:
                for col in [3, 4, 5]:  # 小大三種錢
                    sheet.cell(row=n, column=col).style = num
            elif last:
                for col in range(1, 18):
                    sheet.cell(row=n, column=col).border = bottom_border  # 末列雙底線
    # 存excel==================================================================================
    wb.save(path_xlsx_ID)
    # 砍掉ID/應繳眷口數，另存
    sheet.delete_cols(18)  # R
    sheet.delete_cols(14)  # N
    wb.save(path_xlsx)
    #
    os.remove(path_csv_ID)

## 4-4: 輸出excel的目錄 & 切割各套df

In [None]:
# 目錄
folder_src = ['farmer_income_survey', 'resources']
folder_output = folder_src + ['output']
year_this = '109'
folder_output_thisyear = folder_output + [year_this]
path_folder_output_thisyear = os.path.join(*folder_output_thisyear)
#
if not os.path.isdir(path_folder_output_thisyear):
    mkdir(path_folder_output_thisyear)
# 七套輸出一個excel
folder_7_ID               = folder_output_thisyear + ['(有ID) 七套公務資料']
path_folder_7_ID          = os.path.join(*folder_7_ID)
# 六個excel目錄: 主選 0, 備選 調查員 123/456, 備選 縣市 123/1-6
folder_0_man_ID           = folder_output_thisyear + ['(有ID) 主選0____公務資料(依調查員)']
folder_0_county_ID        = folder_output_thisyear + ['(有ID) 主選0____公務資料(依縣市)']
folder_123_man_ID         = folder_output_thisyear + ['(有ID) 備選123_公務資料(依調查員)']
folder_456_man_ID         = folder_output_thisyear + ['(有ID) 備選456_公務資料(依調查員)']
folder_123_county_ID      = folder_output_thisyear + ['(有ID) 備選123_公務資料(依縣市)']
folder_1_6_county_ID      = folder_output_thisyear + ['(有ID) 備選123456_公務資料(依縣市)']
path_folder_0_man_ID      = os.path.join(*folder_0_man_ID)
path_folder_0_county_ID   = os.path.join(*folder_0_county_ID)
path_folder_123_man_ID    = os.path.join(*folder_123_man_ID)
path_folder_456_man_ID    = os.path.join(*folder_456_man_ID)
path_folder_123_county_ID = os.path.join(*folder_123_county_ID)
path_folder_1_6_county_ID = os.path.join(*folder_1_6_county_ID)

In [None]:
# 切主選出來
where = df_final['l_num'].str[-1] == '0'
df_final_0 = df_final[where]
# 切備選123套出來
where = df_final['l_num'].str[-1].str.contains('[123]', regex=True)
df_final_123 = df_final[where]
# 切備選456套出來
where = df_final['l_num'].str[-1].str.contains('[456]', regex=True)
df_final_456 = df_final[where]
# 切備選1-6套出來
where = df_final['l_num'].str[-1].str.contains('[123456]', regex=True)
df_final_1_6 = df_final[where]

### 輸出七套

In [None]:
export_excel(df_final[cols_18], path_folder_7_ID, f'七套公務資料_{year_this}')

## 4-5: 主選 0

In [None]:
### 依【調查員】/【縣市】切割另存excel，都先地址排序
# 依調查員切割df
df_final_0.sort_values(by=['man', 'addr', 'f_num', 'type'], inplace=True)
man_list = df_final_0['man'].drop_duplicates()
for man in man_list:
    where = df_final_0['man'] == man
    export_excel(df_final_0[where][cols_18], path_folder_0_man_ID, f'{man}_主選公務資料_{year_this}')

# 依縣市切割df
df_final_0.sort_values(by=['county', 'addr', 'f_num', 'type'], inplace=True)
county_list = df_final_0['county'].drop_duplicates()
for county in county_list:
    where = df_final_0['county'] == county
    export_excel(df_final_0[where][cols_18], path_folder_0_county_ID, f'{county}_主選公務資料_{year_this}')
#
print(f'主選共有{len(man_list)}個調查員excel檔')
print(f'主選共有{len(county_list)}個縣市excel檔')

## 4-6: 備選【依調查員】_123/456

In [None]:
# 123 新增以第一套為主的地址欄，2,3結尾者為na
# 連結編號排序，前四碼相同者為一組，第五碼2,3者，前向填充第五碼1的地址
df_final_123['addr_1'] = np.where(df_final_123['l_num'].str[-1] == '1', df_final_123['addr'], np.nan)
df_final_123 = df_final_123.sort_values(by=['l_num'], ascending=True).fillna(method='ffill')
# 456 新增以第4套為主的地址欄，5,6結尾者為na
# 連結編號排序，前四碼相同者為一組，第五碼5,6者，前向填充第五碼4的地址
df_final_456['addr_4'] = np.where(df_final_456['l_num'].str[-1] == '4', df_final_456['addr'], np.nan)
df_final_456 = df_final_456.sort_values(by=['l_num'], ascending=True).fillna(method='ffill')

In [None]:
# A.讓兩組123之間的順序以第一套的地址排序，同組123則以l_num排序
df_final_123.sort_values(by=['man', 'addr_1', 'l_num', 'type'], inplace=True)
man_list = df_final_123['man'].drop_duplicates()
for man in man_list:
    where = df_final_123['man'] == man
    export_excel(df_final_123[where][cols_18], path_folder_123_man_ID, f'{man}_備選123公務資料_{year_this}')

# B.讓兩組456之間的順序以第4套的地址排序，同組456則以l_num排序
df_final_456.sort_values(by=['man', 'addr_4', 'l_num', 'type'], inplace=True)
man_list = df_final_456['man'].drop_duplicates()
for man in man_list:
    where = df_final_456['man'] == man
    export_excel(df_final_456[where][cols_18], path_folder_456_man_ID, f'{man}_備選456公務資料_{year_this}')

## 4-7: 備選【依縣市】_123/1-6

In [None]:
# 同縣市依連結編號l_num排序
# 123依縣市切割df
df_final_123.sort_values(by=['county', 'l_num', 'type'], inplace=True)
county_list = df_final_123['county'].drop_duplicates()
for county in county_list:
    where = df_final_123['county'] == county
    export_excel(df_final_123[where][cols_18], path_folder_123_county_ID, f'{county}_備選123公務資料_{year_this}')

# 123456依縣市切割df
df_final_1_6.sort_values(by=['county', 'l_num', 'type'], inplace=True)
county_list = df_final_1_6['county'].drop_duplicates()
for county in county_list:
    where = df_final_1_6['county'] == county
    export_excel(df_final_1_6[where][cols_18], path_folder_1_6_county_ID, f'{county}_備選1_6公務資料_{year_this}')

In [None]:
(time.time() - stime)/60