In [None]:
import os
import pandas as pd
import numpy as np
from openpyxl import load_workbook

def parse_value(cell):
    if cell in [None, '-', '–', '—', '', 0]:
        return pd.NA
    try:
        val = int(str(cell).replace(',', '').strip())
        return pd.NA if val == 0 else val 
    except:
        return pd.NA

def process_single_file(filepath):
    wb = load_workbook(filepath, data_only=True)
    sheet = wb['10.연료별_등록현황']

    # 1. 연월 추출: B2
    year_month = sheet['B2'].value
    if isinstance(year_month, str):
        year_month = year_month.strip().replace(' ', '').replace('월', '')

    # 2. 지역명 추출: D3~U3 (17개 시도 + 계)
    region_cells = sheet['D3':'U3'][0]
    regions = [cell.value if cell.value not in [None, '-', '–', '—', ''] else np.nan for cell in region_cells]

    # 3. 연료유형 시작행
    fuel_info = {
        '휘발유': 5,
        '경유': 22,
        '전기': 73
    }

    records = []
    for fuel, start_row in fuel_info.items():
        socae_row = start_row + 13  # 소계 기준 행

        for idx, region in enumerate(regions):
            if pd.isna(region):
                continue

            col_idx = 3 + idx  # D열 = index 3
            col_letter = chr(ord('A') + col_idx)

            bisayu_cell = sheet[f'{col_letter}{socae_row + 1}'].value
            sayu_cell = sheet[f'{col_letter}{socae_row + 2}'].value

            bisayu = parse_value(bisayu_cell)
            sayu = parse_value(sayu_cell)

            records.append({
                '연월': year_month,
                '지역': region,
                '연료유형': fuel,
                '용도': '비사업용',
                '등록대수': bisayu
            })
            records.append({
                '연월': year_month,
                '지역': region,
                '연료유형': fuel,
                '용도': '사업용',
                '등록대수': sayu
            })

    df = pd.DataFrame(records)
    df['등록대수'] = df['등록대수'].astype('Int64')
    return df

def process_all_files_in_folder(input_folder='전기차'):
    all_dfs = []

    for root, dirs, files in os.walk(input_folder):
        for file in files:
            if file.endswith('.xlsx') and not file.startswith('~$'):
                full_path = os.path.join(root, file)
                try:
                    df = process_single_file(full_path)
                    all_dfs.append(df)
                except Exception as e:
                    print(f"❌ Error processing {file}: {e}")

    if all_dfs:
        final_df = pd.concat(all_dfs, ignore_index=True)

        # 연월 정리 및 정렬
        final_df['연월'] = pd.to_datetime(final_df['연월'], format='%Y.%m', errors='coerce')
        final_df = final_df.sort_values(by='연월').reset_index(drop=True)
        final_df['연월'] = final_df['연월'].dt.strftime('%Y.%m')

        # 저장
        final_df.to_csv('전기차_통합.csv', index=False)
        print("✅ 저장 완료: 전기차_통합.csv")
    else:
        print("⚠️ 처리할 데이터가 없습니다.")


In [47]:
process_all_files_in_folder()

✅ 저장 완료: 전기차_통합.csv
