In [1]:
import pandas as pd
import requests
from datetime import datetime, timedelta, timezone

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestRegressor

import os
from dotenv import load_dotenv
load_dotenv('./../01_python/.env')
SERVICE_KEY = os.getenv('RAIN_ID') 

# =========================
# 설정
# =========================
CSV_PATH = "SUWON_S_DATA_TABLE_GENDER_SUM.csv"
NX, NY = 61, 121  # 수원
KST = timezone(timedelta(hours=9))

ULTRA_URL = "https://apis.data.go.kr/1360000/VilageFcstInfoService_2.0/getUltraSrtNcst"

# =========================
# 오늘 요일 코드
# =========================
def today_day_code():
    return datetime.now(KST).weekday() + 1  # 월=1

# =========================
# 기상청 초단기실황
# =========================
def get_weather():
    now = datetime.now(KST)
    base = now.replace(minute=0, second=0)
    if now.minute < 40:
        base -= timedelta(hours=1)

    params = {
        "serviceKey": SERVICE_KEY,
        "dataType": "JSON",
        "numOfRows": 1000,
        "pageNo": 1,
        "base_date": base.strftime("%Y%m%d"),
        "base_time": base.strftime("%H%M"),
        "nx": NX,
        "ny": NY
    }

    res = requests.get(ULTRA_URL, params=params)
    items = res.json()["response"]["body"]["items"]["item"]

    temp, rain = None, 0.0
    for it in items:
        if it["category"] == "T1H":
            temp = float(it["obsrValue"])
        if it["category"] == "RN1":
            try:
                rain = float(it["obsrValue"])
            except:
                rain = 0.0

    return temp, rain

# =========================
# 시간대별 오늘 매출 예측
# =========================
def predict_today_sales_by_hour():
    df = pd.read_csv(CSV_PATH)
    df["TA_YMD"] = pd.to_datetime(df["TA_YMD"], format="%Y%m%d")

    day = today_day_code()
    temp, rain = get_weather()

    print(f"오늘 요일={day}, 기온={temp}°C, 강수={rain}mm\n")

    results = []

    for hour in sorted(df["HOUR"].unique()):
        sub = df[df["HOUR"] == hour]

        daily = sub.groupby("TA_YMD", as_index=False).agg(
            AMT_sum=("AMT", "sum"),
            DAY_mode=("DAY", lambda x: int(pd.Series(x).mode()[0])),
            TEMP_mean=("TEMP", "mean"),
            RAIN_sum=("RAIN", "sum")
        )

        if len(daily) < 50:
            continue

        X = daily[["DAY_mode", "TEMP_mean", "RAIN_sum"]]
        y = daily["AMT_sum"]

        preprocess = ColumnTransformer([
            ("cat", OneHotEncoder(handle_unknown="ignore"), ["DAY_mode"]),
            ("num", "passthrough", ["TEMP_mean", "RAIN_sum"])
        ])

        model = Pipeline([
            ("prep", preprocess),
            ("rf", RandomForestRegressor(
                n_estimators=300,
                max_depth=10,
                random_state=42
            ))
        ])

        model.fit(X, y)

        X_today = pd.DataFrame([{
            "DAY_mode": day,
            "TEMP_mean": temp,
            "RAIN_sum": rain
        }])

        pred = model.predict(X_today)[0]
        results.append([hour, round(pred, 0)])

    return pd.DataFrame(results, columns=["HOUR", "오늘_예측매출"])

# 실행
result = predict_today_sales_by_hour()
print(result)


오늘 요일=3, 기온=5.3°C, 강수=0.0mm

   HOUR     오늘_예측매출
0     1   4438330.0
1     2   3509969.0
2     3   7962034.0
3     4  23290254.0
4     5  25216281.0
5     6  16645265.0
6     7  16065300.0
7     8  20222781.0
8     9  10378039.0
9    10   1908782.0
