In [1]:
import os
import boto3
import json
from datetime import datetime
from botocore.exceptions import ClientError

boto3.setup_default_session(profile_name='default')
s3_client = boto3.client('s3')

### Purpose

1. ToUpload/dsgeoadmin/baseinfo 폴더의 모든 파일을 S3 dsbaseinfo 버킷에 업로드
2. S3에 있지만 로컬에 없는 파일(이름이 다른 파일)은 삭제

In [2]:
BUCKET_NAME = 'dsbaseinfo'
PREFIXES = ['common/', 'geo/', 'outrecord/', 'work/']

AWS_folder = './'
GRP_Name = 'dsgeoadmin'
upload_folder = os.path.join(AWS_folder, 'ToUpload', GRP_Name, 'baseinfo')

print(f'Upload folder: {upload_folder}')
print(f'S3 Bucket: {BUCKET_NAME}')
print(f'Prefixes: {PREFIXES}')

Upload folder: ./ToUpload\dsgeoadmin\baseinfo
S3 Bucket: dsbaseinfo
Prefixes: ['common/', 'geo/', 'outrecord/', 'work/']


In [3]:
def list_s3_files(bucket, prefix):
    """S3 prefix 아래의 모든 파일 키 목록 반환"""
    keys = []
    paginator = s3_client.get_paginator('list_objects_v2')
    for page in paginator.paginate(Bucket=bucket, Prefix=prefix):
        for obj in page.get('Contents', []):
            key = obj['Key']
            if not key.endswith('/'):
                keys.append(key)
    return keys


def list_local_files(local_folder, prefix):
    """로컬 폴더의 파일을 S3 키 형태로 반환"""
    local_dir = os.path.join(local_folder, prefix.rstrip('/'))
    keys = []
    if not os.path.exists(local_dir):
        return keys
    for file in os.listdir(local_dir):
        file_path = os.path.join(local_dir, file)
        if os.path.isfile(file_path) and file.endswith('.json'):
            keys.append(f'{prefix}{file}')
    return keys


def sync_prefix(bucket, prefix, local_folder, dry_run=True):
    """S3 prefix와 로컬 폴더를 동기화 (삭제 + 업로드)"""
    s3_keys = set(list_s3_files(bucket, prefix))
    local_keys = set(list_local_files(local_folder, prefix))

    to_delete = s3_keys - local_keys
    to_upload = local_keys

    print(f'\n=== {prefix} ===')
    print(f'  S3 existing:  {len(s3_keys)} files')
    print(f'  Local:        {len(local_keys)} files')
    print(f'  To delete:    {len(to_delete)} files')
    print(f'  To upload:    {len(to_upload)} files')

    if to_delete:
        print(f'  -- DELETE --')
        for key in sorted(to_delete):
            print(f'    x {key}')
            if not dry_run:
                s3_client.delete_object(Bucket=bucket, Key=key)

    if to_upload:
        print(f'  -- UPLOAD --')
        for key in sorted(to_upload):
            local_path = os.path.join(local_folder, key.replace('/', os.sep))
            marker = '(new)' if key not in s3_keys else '(overwrite)'
            print(f'    > {key} {marker}')
            if not dry_run:
                s3_client.upload_file(local_path, bucket, key)

    return len(to_delete), len(to_upload)

### Step 1: Dry Run (미리보기 - 실제 실행 안함)

In [4]:
total_del = 0
total_up = 0

for prefix in PREFIXES:
    d, u = sync_prefix(BUCKET_NAME, prefix, upload_folder, dry_run=True)
    total_del += d
    total_up += u

print(f'\n=== TOTAL ===')
print(f'  Delete: {total_del} files')
print(f'  Upload: {total_up} files')


=== common/ ===
  S3 existing:  3 files
  Local:        3 files
  To delete:    0 files
  To upload:    3 files
  -- UPLOAD --
    > common/dsOrgList.json (overwrite)
    > common/dsworkcourse_info.json (overwrite)
    > common/mapcourse_info.json (overwrite)

=== geo/ ===
  S3 existing:  9 files
  Local:        9 files
  To delete:    0 files
  To upload:    9 files
  -- UPLOAD --
    > geo/area_def.json (overwrite)
    > geo/area_label_info.json (overwrite)
    > geo/course_info.json (overwrite)
    > geo/label_info.json (overwrite)
    > geo/label_infoL2.json (overwrite)
    > geo/tmp_extracted_info.json (overwrite)
    > geo/tmp_extracted_map.json (overwrite)
    > geo/tmp_mapdscourse_info.json (overwrite)
    > geo/turf_type.json (overwrite)

=== outrecord/ ===
  S3 existing:  12 files
  Local:        11 files
  To delete:    1 files
  To upload:    11 files
  -- DELETE --
    x outrecord/dsOrgList.json
  -- UPLOAD --
    > outrecord/dsEQCategoryTypeMAP.json (overwrite)
    > out

### Step 2: Execute (실제 실행)

In [5]:
total_del = 0
total_up = 0

for prefix in PREFIXES:
    d, u = sync_prefix(BUCKET_NAME, prefix, upload_folder, dry_run=False)
    total_del += d
    total_up += u

print(f'\n=== DONE ===')
print(f'  Deleted: {total_del} files')
print(f'  Uploaded: {total_up} files')


=== common/ ===
  S3 existing:  3 files
  Local:        3 files
  To delete:    0 files
  To upload:    3 files
  -- UPLOAD --
    > common/dsOrgList.json (overwrite)
    > common/dsworkcourse_info.json (overwrite)
    > common/mapcourse_info.json (overwrite)

=== geo/ ===
  S3 existing:  9 files
  Local:        9 files
  To delete:    0 files
  To upload:    9 files
  -- UPLOAD --
    > geo/area_def.json (overwrite)
    > geo/area_label_info.json (overwrite)
    > geo/course_info.json (overwrite)
    > geo/label_info.json (overwrite)
    > geo/label_infoL2.json (overwrite)
    > geo/tmp_extracted_info.json (overwrite)
    > geo/tmp_extracted_map.json (overwrite)
    > geo/tmp_mapdscourse_info.json (overwrite)
    > geo/turf_type.json (overwrite)

=== outrecord/ ===
  S3 existing:  12 files
  Local:        11 files
  To delete:    1 files
  To upload:    11 files
  -- DELETE --
    x outrecord/dsOrgList.json
  -- UPLOAD --
    > outrecord/dsEQCategoryTypeMAP.json (overwrite)
    > out

### Step 3: Verify (검증)

In [6]:
print('=== S3 dsbaseinfo 현재 상태 ===')
for prefix in PREFIXES:
    keys = list_s3_files(BUCKET_NAME, prefix)
    print(f'\n{prefix} ({len(keys)} files):')
    for k in sorted(keys):
        print(f'  {k}')

=== S3 dsbaseinfo 현재 상태 ===

common/ (3 files):
  common/dsOrgList.json
  common/dsworkcourse_info.json
  common/mapcourse_info.json

geo/ (9 files):
  geo/area_def.json
  geo/area_label_info.json
  geo/course_info.json
  geo/label_info.json
  geo/label_infoL2.json
  geo/tmp_extracted_info.json
  geo/tmp_extracted_map.json
  geo/tmp_mapdscourse_info.json
  geo/turf_type.json

outrecord/ (11 files):
  outrecord/dsEQCategoryTypeMAP.json
  outrecord/dsEQtypeOrder.json
  outrecord/dsEQtypeSymMap.json
  outrecord/dsOrgOrder.json
  outrecord/dsholidays.json
  outrecord/dsprecipitationConditions.json
  outrecord/dsrankOrder.json
  outrecord/dssclearConditions.json
  outrecord/dstask.json
  outrecord/dstypeOrder.json
  outrecord/dswindConditions.json

work/ (4 files):
  work/org_info.json
  work/outsource_info.json
  work/task_info.json
  work/vehicle_info.json
