In [8]:
from datetime import datetime, timedelta
import json
import re


def parse_work_records(input_text: str) -> str:
        """근무 기록 파싱 도구"""
        try:
            work_records = []

            # 자연어 패턴 처리
            natural_patterns = [
                r'(\d{1,2}월\s*\d{1,2}일).*?(\d{1,2}시|\d{1,2}:\d{2}).*?(\d{1,2}시|\d{1,2}:\d{2})',
                r'(어제|오늘|내일|그제|모레).*?(\d{1,2}시|\d{1,2}:\d{2}).*?(\d{1,2}시|\d{1,2}:\d{2})',
                r'(지난주|이번주|다음주)\s*(월|화|수|목|금|토|일)요일.*?(\d{1,2}시|\d{1,2}:\d{2}).*?(\d{1,2}시|\d{1,2}:\d{2})'
            ]

            # 정형 데이터 패턴
            formal_patterns = [
                r'(\d{4}-\d{2}-\d{2})\s+(\d{1,2}:\d{2})-(\d{1,2}:\d{2})',
                r'(\d{2}-\d{2})\s+(\d{1,2}:\d{2})-(\d{1,2}:\d{2})',
                r'(\d{1,2}/\d{1,2})\s+(\d{1,2}:\d{2})-(\d{1,2}:\d{2})'
            ]

            current_year = datetime.now().year
            today = datetime.now().date()

            # 정형 데이터 처리
            for pattern in formal_patterns:
                matches = re.findall(pattern, input_text)
                for match in matches:
                    date_str, start_time_str, end_time_str = match

                    try:
                        # 날짜 정규화
                        if '-' in date_str and len(date_str) == 10:  # YYYY-MM-DD
                            work_date = datetime.strptime(date_str, '%Y-%m-%d').date()
                        elif '-' in date_str and len(date_str) == 5:  # MM-DD
                            work_date = datetime.strptime(f"{current_year}-{date_str}", '%Y-%m-%d').date()
                        elif '/' in date_str:  # M/D
                            work_date = datetime.strptime(f"{current_year}/{date_str}", '%Y/%m/%d').date()
                        else:
                            continue

                        # 시간 파싱
                        start_time = datetime.strptime(f"{work_date} {start_time_str}", '%Y-%m-%d %H:%M')
                        end_time = datetime.strptime(f"{work_date} {end_time_str}", '%Y-%m-%d %H:%M')

                        # 종료 시간이 시작 시간보다 이전이면 다음날로 처리
                        if end_time <= start_time:
                            end_time += timedelta(days=1)

                        work_records.append({
                            'date': work_date,
                            'start_time': start_time,
                            'end_time': end_time,
                            'is_weekend': work_date.weekday() >= 5,
                            'is_substitute_holiday': False
                        })
                    except:
                        continue

            # 대체휴일 정보 추출
            substitute_holidays = []
            sub_patterns = [
                r'대체휴일[:\s]*(\d{4}-\d{2}-\d{2})',
                r'대체휴일[:\s]*(\d{2}-\d{2})',
                r'대체휴일[:\s]*(\d{1,2}월\s*\d{1,2}일)'
            ]

            for pattern in sub_patterns:
                matches = re.findall(pattern, input_text)
                for match in matches:
                    try:
                        if len(match) == 10:  # YYYY-MM-DD
                            sub_date = datetime.strptime(match, '%Y-%m-%d').date()
                        elif len(match) == 5:  # MM-DD
                            sub_date = datetime.strptime(f"{current_year}-{match}", '%Y-%m-%d').date()
                        elif '월' in match and '일' in match:  # M월 D일
                            match = match.replace('월', '-').replace('일', '').replace(' ', '')
                            sub_date = datetime.strptime(f"{current_year}-{match}", '%Y-%m-%d').date()
                        else:
                            continue
                        substitute_holidays.append(sub_date)
                    except:
                        continue

            # 대체휴일 정보 적용
            for record in work_records:
                if record['date'] in substitute_holidays:
                    record['is_substitute_holiday'] = True

            result = {
                'work_records': work_records,
                'substitute_holidays': [d.isoformat() for d in substitute_holidays],
                'total_records': len(work_records)
            }

            return json.dumps(result, default=str, ensure_ascii=False)

        except Exception as e:
            return f"파싱 오류: {str(e)}"

In [3]:
input = '1월 15일 오전 9시부터 밤 11시까지 근무했고, 1월 20일 토요일 오전 8시부터 자정까지 근무했어요. 20일은 대체휴일이에요.'

In [9]:
result=parse_work_records(input)

In [12]:
#print (result)
result_json = json.loads(result)
print(result_json)

{'work_records': [], 'substitute_holidays': [], 'total_records': 0}
