In [3]:
from datetime import timedelta, datetime
import random
import time
import requests
import json
import os

from tqdm.notebook import tqdm
import pandas as pd

weather_api = f"https://api.openweathermap.org/data/3.0/onecall/timemachine?lat={lat}&lon={lon}&dt=1640995200&appid={openweather_key}"

# https://openweathermap.org/api/one-call-3#history
openweather_key = ""
lat, lon = 37.5635694444444, 126.980008333333

In [29]:
start_date = datetime(2024, 12, 1)
end_date = datetime(2024, 12, 31)

datetime_list = [
    start_date + timedelta(hours=x) 
    for x in range(int((end_date - start_date).total_seconds() // 3600) + 24)
]

existing_dates = set()
if os.path.exists('weather_data_more.csv'):
    existing_dates = set(pd.read_csv('weather_data_more.csv')['datetime'])

if not os.path.exists('weather_data.csv'):
    pd.DataFrame(columns=[
        'datetime', 'dt', 'sunrise', 'sunset', 'temp', 
        'feels_like', 'pressure', 'humidity', 'dew_point',
        'uvi', 'clouds', 'visibility', 'wind_speed', 'wind_deg',
        'rain', 'snow', 'weather'
    ]).to_csv('weather_data_more.csv', index=False)


for dt in tqdm(datetime_list):
    dt_str = dt.strftime("%Y%m%d")
    if dt_str in existing_dates:
        continue
    weather_api = f"https://api.openweathermap.org/data/3.0/onecall/timemachine?lat={lat}&lon={lon}&dt={int(dt.timestamp())}&appid={openweather_key}&units=metric"
    try:
        resp = requests.get(weather_api)
        if resp.status_code == 200:
            data = resp.json()
            current = data.get('data', [{}])[0]
            
            rain_1h = current.get('rain', {}).get('1h', 0)
            snow_1h = current.get('snow', {}).get('1h', 0)

            weather_info = {
                'datetime': dt_str,
                'dt': current.get('dt', None),
                'sunrise': current.get('sunrise', None),
                'sunset': current.get('sunset', None),
                'temp': current.get('temp', None),
                'feels_like': current.get('feels_like', None),
                'pressure': current.get('pressure', None),
                'humidity': current.get('humidity', None),
                'dew_point': current.get('dew_point', None),
                'uvi': current.get('uvi', None),
                'clouds': current.get('clouds', None),
                'visibility': current.get('visibility', None),
                'wind_speed': current.get('wind_speed', None),
                'wind_deg': current.get('wind_deg', None),
                "rain": rain_1h,
                "snow": snow_1h,
                "weather": current.get('weather', [{}])[0].get("main", None)
            }
            pd.DataFrame([weather_info]).to_csv('weather_data_more.csv', 
                                  mode='a', 
                                  header=False,
                                  index=False)
    except Exception as e:
        print(f"Error for {dt}: {e}")


  0%|          | 0/744 [00:00<?, ?it/s]

In [9]:
random_datetimes = []

# https://openweathermap.org/api/one-call-3#history
openweather_key = "6a70e468ada37307dfc2cde61fc39f56"
lat, lon = 37.5635694444444, 126.980008333333

for month in range(1, 7):

    start_date = datetime(2025, month, 1)
    if month == 6:
        end_date = datetime(2025, 6, 1)
    else:
        end_date = datetime(2025, month + 1, 1) - timedelta(days=1)
    
    # 5개 샘플 생성
    for _ in range(80):
        # 랜덤 날짜 생성
        delta_days = (end_date - start_date).days
        rand_day = start_date + timedelta(days=random.randint(0, delta_days))
        rand_hour = random.randint(0, 23)
        rand_datetime = datetime(
            rand_day.year, rand_day.month, rand_day.day,
            rand_hour
        )
        random_datetimes.append(rand_datetime)

# 출력 예시
len(random_datetimes)

480

In [11]:
file_name = "weather_data_random_recent.csv"
if not os.path.exists(file_name):
    pd.DataFrame(columns=[
        'datetime', 'dt', 'sunrise', 'sunset', 'temp', 
        'feels_like', 'pressure', 'humidity', 'dew_point',
        'uvi', 'clouds', 'visibility', 'wind_speed', 'wind_deg',
        'rain', 'snow', 'weather'
    ]).to_csv(file_name, index=False)

    
for dt in tqdm(random_datetimes):
    dt_str = dt.strftime("%Y%m%d")

    weather_api = f"https://api.openweathermap.org/data/3.0/onecall/timemachine?lat={lat}&lon={lon}&dt={int(dt.timestamp())}&appid={openweather_key}&units=metric"
    try:
        resp = requests.get(weather_api)
        if resp.status_code == 200:
            data = resp.json()
            current = data.get('data', [{}])[0]
            # rain과 snow 데이터 추출 (있는 경우에만)
            rain_1h = current.get('rain', {}).get('1h', 0)
            snow_1h = current.get('snow', {}).get('1h', 0)

            weather_info = {
                'datetime': dt_str,
                'dt': current.get('dt', None),
                'sunrise': current.get('sunrise', None),
                'sunset': current.get('sunset', None),
                'temp': current.get('temp', None),
                'feels_like': current.get('feels_like', None),
                'pressure': current.get('pressure', None),
                'humidity': current.get('humidity', None),
                'dew_point': current.get('dew_point', None),
                'uvi': current.get('uvi', None),
                'clouds': current.get('clouds', None),
                'visibility': current.get('visibility', None),
                'wind_speed': current.get('wind_speed', None),
                'wind_deg': current.get('wind_deg', None),
                "rain": rain_1h,
                "snow": snow_1h,
                "weather": current.get('weather', [{}])[0].get("main", None)
            }
            pd.DataFrame([weather_info]).to_csv(file_name, 
                                  mode='a', 
                                  header=False,
                                  index=False)
    except Exception as e:
        print(f"Error for {dt}: {e}")

  0%|          | 0/480 [00:00<?, ?it/s]

In [None]:
def add_weather_columes(file_path):
    columns = [
        'datetime', 'dt', 'sunrise', 'sunset', 'temp', 
        'feels_like', 'pressure', 'humidity', 'dew_point',
        'uvi', 'clouds', 'visibility', 'wind_speed', 'wind_deg',
        'rain', 'snow', 'weather'
    ]
    df = pd.read_csv(file_path, names=columns)
    df.columns = columns
    df.to_csv(file_path, index=False)

add_weather_columes(file_name)

In [21]:
df = pd.read_csv(file_name)
df.isna().sum()

datetime        0
dt              0
sunrise         0
sunset          0
temp            0
feels_like      0
pressure        0
humidity        0
dew_point       0
uvi           480
clouds          0
visibility     22
wind_speed      0
wind_deg        0
rain            0
snow            0
weather         0
dtype: int64

In [23]:
df_droped = df.drop('uvi', axis=1)
df_droped['visibility'].fillna(df['visibility'].mean(), inplace=True)
df_droped.isna().sum()

datetime      0
dt            0
sunrise       0
sunset        0
temp          0
feels_like    0
pressure      0
humidity      0
dew_point     0
clouds        0
visibility    0
wind_speed    0
wind_deg      0
rain          0
snow          0
weather       0
dtype: int64

In [2]:
holiday = [
    "20240101", "20240209", "20240212", "20240301", "20240410",
    "20240506", "20240515", "20240606", "20240815", "20240916",
    "20240917", "20240918", "20241003", "20241009", "20241225",
    "20250101", "20250127", "20250128", "20250129", "20250130",
    "20250303", "20250505", "20250506"
]

holiday_dates = pd.to_datetime(holiday, format="%Y%m%d")
df["datetime"] = pd.to_datetime(df["datetime"], format="%Y%m%d")


df["isHoliday"] = df["datetime"].dt.date.isin(holiday_dates.date)
df[["datetime", "isHoliday"]].head()

Unnamed: 0,datetime,isHoliday
0,2025-01-13,False
1,2025-01-29,True
2,2025-01-23,False
3,2025-01-31,False
4,2025-01-28,True
