In [3]:
import os
import json
import requests
import time
import pandas as pd
from datetime import datetime, timedelta

# Google Drive 경로 설정
base_dir = "../base/data"
save_dir = os.path.join(base_dir, "data")
os.makedirs(save_dir, exist_ok=True)

log_file = os.path.join(save_dir, "error_log.txt")

# 로그 기록 함수
def log_error(message):
    with open(log_file, "a") as f:
        f.write(f"{datetime.now()} - {message}\n")

# JSON 데이터 저장 여부 확인 함수
def is_data_saved_json(station_id, start_dt, end_dt):
    file_name = os.path.join(save_dir, f'station_{station_id}_{start_dt}_{end_dt}_filtered.json')
    return os.path.exists(file_name)

# 데이터 호출 함수
def fetch_data_with_retries(api, params):
    MAX_RETRIES = 3
    RETRY_DELAY = 2  # seconds
    retries = 0
    while retries < MAX_RETRIES:
        try:
            response = requests.get(api["url"], params=params, timeout=10)
            if response.status_code == 200:
                return response
            else:
                print(f"HTTP Error {response.status_code}: {response.text}")
        except requests.exceptions.RequestException as e:
            print(f"Request failed: {e}. Retrying...")
        retries += 1
        time.sleep(RETRY_DELAY)
    print(f"Failed to fetch data after {MAX_RETRIES} retries.")
    return None

# 원하는 열
required_columns = ['tm', 'stnId', 'stnNm', 'ta', 'rn', 'ws', 'wd', 'hm', 'pa', 'ts']

# API 정보
apis = [
    {
        "url": "http://apis.data.go.kr/1360000/AsosHourlyInfoService/getWthrDataList",
        "key": "ZSG78Pt3T/LihB09c5HOKiwiw7Fp0YO6Zyv3IXdsG6xlAHwXn0Ujh4AI7Qusf257iRAZCZxtR6elbu64sLb2JA==",
        "limit": 10000
    },
    {
        "url": "http://apis.data.go.kr/1360000/AsosHourlyInfoService/getWthrDataList",
        "key": "sj40PraKG7dGcBNarkyuQo1EcwXDwDORJi1Hz103NVmLD4NBFkyIHBiEnGw/XnL8pKczzFOJh5+qlCtxwRCBDw==",
        "limit": 10000
    },
    {
        "url": "https://apihub.kma.go.kr/api/typ01/url/kma_sfctm2.php",
        "key": "QxivqrPbSjeYr6qz2-o3Fg",
        "limit": 20000
    }
]

# 스테이션 데이터 불러오기
station_path = os.path.join(base_dir, "station_num.csv")
station = pd.read_csv(station_path, header=None)
station_ids = station[0].tolist()

# API 호출 및 키 관리
api_index = 0
api_calls = 0

for year in range(1990, 1990 + 1): 
    print(f"Processing year: {year}")
    year_start_date = datetime(year, 1, 1)
    year_end_date = datetime(year, 12, 31)

    for station_id in station_ids:
        print(f"Processing station ID: {station_id}")
        current_date = year_start_date

        while current_date <= year_end_date:
            # API 호출 한도 초과 시 전환
            if api_calls >= apis[api_index]["limit"]:
                print(f"API limit reached for key {api_index + 1}. Switching to next key...")
                api_index = (api_index + 1) % len(apis)
                api_calls = 0
                if api_index == 0:
                    print("All keys exhausted. Waiting for reset...")
                    time.sleep(86400)  # 1일 대기

            start_dt = current_date.strftime('%Y%m%d')
            end_dt = (current_date + timedelta(days=41) - timedelta(days=1)).strftime('%Y%m%d')

            if is_data_saved_json(station_id, start_dt, end_dt):
                print(f"Filtered data already saved for station {station_id}, {start_dt} to {end_dt}. Skipping...")
                current_date += timedelta(days=41)
                continue

            params = {
                'serviceKey': apis[api_index]["key"],  # 필수: 서비스 키
                'pageNo': '1',                         # 옵션: 페이지 번호
                'numOfRows': '999',                    # 옵션: 최대 한 페이지 결과 수
                'dataType': 'JSON',                    # 옵션: JSON 형식
                'dataCd': 'ASOS',                      # 필수: 자료 코드
                'dateCd': 'HR',                        # 필수: 날짜 코드
                'startDt': start_dt,                   # 필수: 시작일 (YYYYMMDD)
                'startHh': '00',                       # 필수: 시작 시각
                'endDt': end_dt,                       # 필수: 종료일 (YYYYMMDD)
                'endHh': '23',                         # 필수: 종료 시각
                'stnIds': station_id                   # 필수: 지점 번호
            }

            response = fetch_data_with_retries(apis[api_index], params)
            api_calls += 1

            if response:
                try:
                    # JSON 데이터 필터링
                    json_data = response.json()
                    if 'response' in json_data and 'body' in json_data['response']:
                        items = json_data['response']['body']['items']['item']
                        filtered_data = [
                            {key: item[key] for key in required_columns if key in item}
                            for item in items
                        ]

                        # 필터링된 데이터 저장
                        save_path = os.path.join(save_dir, f'station_{station_id}_{start_dt}_{end_dt}_filtered.json')
                        with open(save_path, "w", encoding="utf-8") as f:
                            json.dump(filtered_data, f, ensure_ascii=False, indent=4)
                        print(f"Filtered data saved for station {station_id}, {start_dt} to {end_dt}.")
                except Exception as e:
                    log_error(f"Error filtering data for station {station_id}, {start_dt} to {end_dt}: {e}")
            else:
                log_error(f"No response received for station {station_id}, {start_dt} to {end_dt}")

            current_date += timedelta(days=41)

Processing year: 1990
Processing station ID: 90
Filtered data saved for station 90, 19900211 to 19900323.
Filtered data saved for station 90, 19900904 to 19901014.
Filtered data saved for station 90, 19901015 to 19901124.
Processing station ID: 93
Processing station ID: 95
Filtered data saved for station 95, 19900101 to 19900210.
Filtered data saved for station 95, 19900211 to 19900323.
Filtered data saved for station 95, 19900324 to 19900503.
Filtered data saved for station 95, 19900504 to 19900613.
Filtered data saved for station 95, 19901015 to 19901124.
Filtered data saved for station 95, 19901125 to 19910104.
Processing station ID: 98
Processing station ID: 99
Processing station ID: 100
Filtered data saved for station 100, 19900614 to 19900724.
Filtered data saved for station 100, 19901015 to 19901124.
Processing station ID: 101
Filtered data saved for station 101, 19900504 to 19900613.
Filtered data saved for station 101, 19900614 to 19900724.
Filtered data saved for station 101,

In [4]:
import os
import json
import requests
import time
import pandas as pd
from datetime import datetime, timedelta

# Google Drive 경로 설정
base_dir = "../base/data"
save_dir = os.path.join(base_dir, "data")
os.makedirs(save_dir, exist_ok=True)

log_file = os.path.join(save_dir, "error_log.txt")

# 로그 기록 함수
def log_error(message):
    with open(log_file, "a") as f:
        f.write(f"{datetime.now()} - {message}\n")

# JSON 데이터 저장 여부 확인 함수
def is_data_saved_json(station_id, start_dt, end_dt):
    file_name = os.path.join(save_dir, f'station_{station_id}_{start_dt}_{end_dt}_filtered.json')
    return os.path.exists(file_name)

# 데이터 호출 함수
def fetch_data_with_retries(api, params):
    MAX_RETRIES = 3
    RETRY_DELAY = 2  # seconds
    retries = 0
    while retries < MAX_RETRIES:
        try:
            response = requests.get(api["url"], params=params, timeout=10)
            if response.status_code == 200:
                return response
            else:
                print(f"HTTP Error {response.status_code}: {response.text}")
        except requests.exceptions.RequestException as e:
            print(f"Request failed: {e}. Retrying...")
        retries += 1
        time.sleep(RETRY_DELAY)
    print(f"Failed to fetch data after {MAX_RETRIES} retries.")
    return None

# 원하는 열
required_columns = ['tm', 'stnId', 'stnNm', 'ta', 'rn', 'ws', 'wd', 'hm', 'pa', 'ts']

# API 정보
apis = [
    {
        "url": "http://apis.data.go.kr/1360000/AsosHourlyInfoService/getWthrDataList",
        "key": "ZSG78Pt3T/LihB09c5HOKiwiw7Fp0YO6Zyv3IXdsG6xlAHwXn0Ujh4AI7Qusf257iRAZCZxtR6elbu64sLb2JA==",
        "limit": 10000
    },
    {
        "url": "http://apis.data.go.kr/1360000/AsosHourlyInfoService/getWthrDataList",
        "key": "sj40PraKG7dGcBNarkyuQo1EcwXDwDORJi1Hz103NVmLD4NBFkyIHBiEnGw/XnL8pKczzFOJh5+qlCtxwRCBDw==",
        "limit": 10000
    },
    {
        "url": "https://apihub.kma.go.kr/api/typ01/url/kma_sfctm2.php",
        "key": "QxivqrPbSjeYr6qz2-o3Fg",
        "limit": 20000
    }
]

# 스테이션 데이터 불러오기
station_path = os.path.join(base_dir, "station_num.csv")
station = pd.read_csv(station_path, header=None)
station_ids = station[0].tolist()

# API 호출 및 키 관리
api_index = 0
api_calls = 0

for year in range(1991, 2000 + 1): 
    print(f"Processing year: {year}")
    year_start_date = datetime(year, 1, 1)
    year_end_date = datetime(year, 12, 31)

    for station_id in station_ids:
        print(f"Processing station ID: {station_id}")
        current_date = year_start_date

        while current_date <= year_end_date:
            # API 호출 한도 초과 시 전환
            if api_calls >= apis[api_index]["limit"]:
                print(f"API limit reached for key {api_index + 1}. Switching to next key...")
                api_index = (api_index + 1) % len(apis)
                api_calls = 0
                if api_index == 0:
                    print("All keys exhausted. Waiting for reset...")
                    time.sleep(86400)  # 1일 대기

            start_dt = current_date.strftime('%Y%m%d')
            end_dt = (current_date + timedelta(days=41) - timedelta(days=1)).strftime('%Y%m%d')

            if is_data_saved_json(station_id, start_dt, end_dt):
                print(f"Filtered data already saved for station {station_id}, {start_dt} to {end_dt}. Skipping...")
                current_date += timedelta(days=41)
                continue

            params = {
                'serviceKey': apis[api_index]["key"],  # 필수: 서비스 키
                'pageNo': '1',                         # 옵션: 페이지 번호
                'numOfRows': '999',                    # 옵션: 최대 한 페이지 결과 수
                'dataType': 'JSON',                    # 옵션: JSON 형식
                'dataCd': 'ASOS',                      # 필수: 자료 코드
                'dateCd': 'HR',                        # 필수: 날짜 코드
                'startDt': start_dt,                   # 필수: 시작일 (YYYYMMDD)
                'startHh': '00',                       # 필수: 시작 시각
                'endDt': end_dt,                       # 필수: 종료일 (YYYYMMDD)
                'endHh': '23',                         # 필수: 종료 시각
                'stnIds': station_id                   # 필수: 지점 번호
            }

            response = fetch_data_with_retries(apis[api_index], params)
            api_calls += 1

            if response:
                try:
                    # JSON 데이터 필터링
                    json_data = response.json()
                    if 'response' in json_data and 'body' in json_data['response']:
                        items = json_data['response']['body']['items']['item']
                        filtered_data = [
                            {key: item[key] for key in required_columns if key in item}
                            for item in items
                        ]

                        # 필터링된 데이터 저장
                        save_path = os.path.join(save_dir, f'station_{station_id}_{start_dt}_{end_dt}_filtered.json')
                        with open(save_path, "w", encoding="utf-8") as f:
                            json.dump(filtered_data, f, ensure_ascii=False, indent=4)
                        print(f"Filtered data saved for station {station_id}, {start_dt} to {end_dt}.")
                except Exception as e:
                    log_error(f"Error filtering data for station {station_id}, {start_dt} to {end_dt}: {e}")
            else:
                log_error(f"No response received for station {station_id}, {start_dt} to {end_dt}")

            current_date += timedelta(days=41)

Processing year: 1991
Processing station ID: 90
Processing station ID: 93
Processing station ID: 95
Processing station ID: 98
Processing station ID: 99
Processing station ID: 100
Filtered data saved for station 100, 19911015 to 19911124.
Filtered data saved for station 100, 19911125 to 19920104.
Processing station ID: 101
Filtered data saved for station 101, 19910101 to 19910210.
Filtered data saved for station 101, 19910211 to 19910323.
Filtered data saved for station 101, 19910725 to 19910903.
Filtered data saved for station 101, 19910904 to 19911014.
Processing station ID: 102
Processing station ID: 104
Processing station ID: 105
Filtered data saved for station 105, 19910324 to 19910503.
Filtered data saved for station 105, 19910504 to 19910613.
Processing station ID: 106
Processing station ID: 108
Processing station ID: 112
Processing station ID: 114
Filtered data saved for station 114, 19911015 to 19911124.
Processing station ID: 115
Filtered data saved for station 115, 19910324 t

In [9]:
import os
import json
from datetime import datetime
import pandas as pd

#CSV 파일 경로
csv_file_path = "../data/meteorological/location.csv"

#JSON 파일들이 저장된 디렉토리
save_dir = "../base/data"

#CSV 데이터 로드 및 전처리
csv_data = pd.read_csv(csv_file_path)
csv_data['시작시각'] = pd.to_datetime(csv_data['시작시각'], format='%Y.%m.%d %H:%M:%S')
station_start_times = dict(zip(csv_data['지점번호'].astype(str), csv_data['시작시각']))

#디렉토리 내 파일 확인
files = os.listdir(save_dir)

#삭제 대상 파일 확인 및 삭제
for file_name in files:
    if file_name.endswith(".json"):
        try:
            # 파일명에서 station과 날짜 추출
            parts = filename.split('')
            stationid = parts[1]  # 'station'에서 '' 추출
            file_date = datetime.strptime(parts[2], "%Y%m%d")  # '19700101' -> 날짜 변환

            # 지점번호와 시작시각 매칭 확인
            if station_id in station_start_times:
                start_time = station_start_times[station_id]
                if file_date < start_time:
                    continue  # 시작시간보다 이전 날짜 파일은 유지

            # JSON 파일 열어서 'resultCode' 확인
            file_path = os.path.join(save_dir, file_name)
            with open(file_path, "r", encoding="utf-8") as f:
                data = json.load(f)

            if (
                "response" in data and
                "header" in data["response"] and
                data["response"]["header"].get("resultCode") == "03"
            ):
                os.remove(file_path)  # 파일 삭제
                print(f"Deleted file: {file_name}")

        except Exception as e:
            print(f"Error processing file {file_name}: {e}")

In [None]:
from dash import Dash, html, dcc, Input, Output, State
import dash_bootstrap_components as dbc

# 스타일 설정
SIDEBAR_STYLE = {
    "position": "fixed",
    "top": 0,
    "left": 0,
    "bottom": 0,
    "width": "250px",
    "padding": "20px",
    "background-color": "#f8f9fa",
    "transition": "0.3s",
    "overflow-x": "hidden"
}

SIDEBAR_HIDDEN = {
    **SIDEBAR_STYLE,
    "margin-left": "-250px"
}

CONTENT_STYLE = {
    "margin-left": "250px",
    "padding": "20px",
    "transition": "0.3s"
}

CONTENT_EXPANDED = {
    **CONTENT_STYLE,
    "margin-left": "0"
}

# Dash 애플리케이션 생성
app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

# 사이드바 레이아웃
sidebar = html.Div(
    [
        html.H2("Menu", className="display-4"),
        html.Hr(),
        html.P("Select a page:", className="lead"),
        dbc.Nav(
            [
                dbc.NavLink("Home", href="/", id="home-link"),
                dbc.NavLink("Tab 1", href="/tab1", id="tab1-link"),
                dbc.NavLink("Tab 2", href="/tab2", id="tab2-link"),
                dbc.NavLink("Tab 3", href="/tab3", id="tab3-link"),
                dbc.NavLink("Tab 4", href="/tab4", id="tab4-link"),
            ],
            vertical=True,
            pills=True,
        ),
    ],
    style=SIDEBAR_STYLE,
    id="sidebar",
)

# 메인 콘텐츠 레이아웃
content = html.Div(id="page-content", style=CONTENT_STYLE)

# 전체 레이아웃
app.layout = html.Div([
    dcc.Location(id="url"),  # URL 변경 감지
    sidebar,
    dbc.Button(
        "Toggle Sidebar", id="sidebar-toggle", color="primary", n_clicks=0, style={"margin": "10px"}
    ),
    content,
])

# 콜백 설정
@app.callback(
    [Output("sidebar", "style"), Output("page-content", "style")],
    [Input("sidebar-toggle", "n_clicks")],
    [State("sidebar", "style"), State("page-content", "style")],
)
def toggle_sidebar(n, sidebar_style, content_style):
    """사이드바 열고 닫기"""
    if n % 2 == 1:  # 홀수 클릭: 닫기
        return SIDEBAR_HIDDEN, CONTENT_EXPANDED
    else:           # 짝수 클릭: 열기
        return SIDEBAR_STYLE, CONTENT_STYLE

@app.callback(
    Output("page-content", "children"),
    [Input("url", "pathname")],
)
def render_page(pathname):
    """URL에 따라 페이지 콘텐츠 변경"""
    if pathname == "/tab1":
        return html.Div([
            html.H1("Tab 1"),
            html.P("Content for Tab 1"),
        ])
    elif pathname == "/tab2":
        return html.Div([
            html.H1("Tab 2"),
            html.P("Content for Tab 2"),
        ])
    elif pathname == "/tab3":
        return html.Div([
            html.H1("Tab 3"),
            html.P("Content for Tab 3"),
        ])
    elif pathname == "/tab4":
        return html.Div([
            html.H1("Tab 4"),
            html.P("Content for Tab 4"),
        ])
    else:  # 기본 페이지 (Home)
        return html.Div([
            html.H1("Home"),
            html.P("This is the Home Page. Project overview and details will go here."),
        ])

# 애플리케이션 실행
if __name__ == "__main__":
    app.run_server(debug=True)