In [1]:
# work environment: faiss_env
import pandas as pd
import numpy as np
import pickle
import sys
import os
import warnings
from pathlib import Path

# 경로 설정
gems_tco_path = "/Users/joonwonlee/Documents/GEMS_TCO-1/src"
sys.path.append(gems_tco_path)

# Warnings 무시 설정
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning, module="xarray")

# 커스텀 모듈 로드
from GEMS_TCO import configuration as config
from GEMS_TCO import data_preprocess as dmbh

def process_coarse_data(base_path, years, months, lat_lon_bounds, step_sizes, base_csv_path, grid_type='standard'):
    """
    격자화된 데이터를 생성하고 저장하는 통합 함수
    
    Args:
        grid_type (str): 'standard' (기울어진 격자) or 'rect' (반듯한 직사각형 격자)
    """
    print(f"\n--- Starting Coarsening Process (Type: {grid_type}) ---")
    
    lat_start, lat_end, lon_start, lon_end = lat_lon_bounds
    step_lat, step_lon = step_sizes
    
    # 1. Base DataFrame & Instance 초기화
    try:
        print(f"Loading base dataframe from: {base_csv_path}")
        df = pd.read_csv(base_csv_path)
        instance = dmbh.center_matching_hour(df, lat_start, lat_end, lon_start, lon_end)
    except Exception as e:
        print(f"Error initializing instance: {e}")
        return

    # 2. 격자 포인트 생성 (타입에 따라 분기)
    if grid_type == 'rect':
        # [작명 변경 반영] without_decrement -> Rectangular Grid
        print("Generating Rectangular Center Points (No Calibration)...")
        center_points = instance.make_center_points_wo_calibration(step_lat=step_lat, step_lon=step_lon)
        file_suffix = "rect" # 파일명에 붙을 접미사
    else:
        print("Generating Standard Center Points (With Calibration)...")
        center_points = instance.make_center_points(step_lat=step_lat, step_lon=step_lon)
        file_suffix = "std" # 혹은 빈 문자열 ""

    # 3. 연도/월별 처리 루프
    for year in years:
        for month in months:
            month_str = f"{month:02d}"
            print(f">> Processing: Year {year}, Month {month_str}")
            
            try:
                pickle_path = os.path.join(base_path, f'pickle_{year}')
                input_filename = f"orbit_map{str(year)[2:]}_{month_str}.pkl"
                
                # 출력 파일명 깔끔하게 변경
                # 예: coarse_cen_map_rect_24_07.pkl
                if file_suffix == "std":
                    output_filename = f"coarse_cen_map{str(year)[2:]}_{month_str}.pkl"
                else:
                    output_filename = f"coarse_cen_map_{file_suffix}{str(year)[2:]}_{month_str}.pkl"
                
                input_filepath = os.path.join(pickle_path, input_filename)
                output_filepath = os.path.join(pickle_path, output_filename)
                
                # Load
                if not os.path.exists(input_filepath):
                    print(f"   [Skip] File not found: {input_filename}")
                    continue

                with open(input_filepath, 'rb') as pickle_file:
                    loaded_map = pickle.load(pickle_file)
                
                # Processing (IDW k=3 적용된 로직이 실행됨)
                coarse_cen_map = instance.coarse_by_center(loaded_map, center_points)

                # Save
                os.makedirs(pickle_path, exist_ok=True)
                with open(output_filepath, 'wb') as pickle_file:
                    pickle.dump(coarse_cen_map, pickle_file)
                
                print(f"   [Saved] {output_filename}")

            except Exception as e:
                print(f"   [Error] {e}")

# ==========================================
# 실행부 (Main)
# ==========================================
if __name__ == '__main__':
    BASE_PATH = config.mac_data_load_path 
    
    # 설정 변수
    target_year = 2022
    target_month = 7
    
    # 경로 및 파라미터 설정
    LAT_LON_BOUNDS = (-3, 2, 121, 131)
    STEP_SIZES = (0.044, 0.063)

    BASE_CSV_PATH = f"/Users/joonwonlee/Documents/GEMS_DATA/data_{target_year}/data_{str(target_year)[2:]}_07_0131_N-32_E121131.csv"
    YEARS_TO_PROCESS = [target_year]
    MONTHS_TO_PROCESS = [target_month]
    

    
    # -------------------------------------------------------
    # 여기서 원하는 타입만 선택해서 실행하세요
    # -------------------------------------------------------
    
    # 1. 표준 격자 (기존 방식) 생성 시:
    # process_coarse_data(BASE_PATH, YEARS_TO_PROCESS, MONTHS_TO_PROCESS, LAT_LON_BOUNDS, STEP_SIZES, BASE_CSV_PATH, grid_type='standard')
    
    # 2. 직사각형 격자 (without decrement) 생성 시:
    process_coarse_data(
        base_path=BASE_PATH,
        years=YEARS_TO_PROCESS,
        months=MONTHS_TO_PROCESS,
        lat_lon_bounds=LAT_LON_BOUNDS,
        step_sizes=STEP_SIZES,
        base_csv_path=BASE_CSV_PATH,
        grid_type='rect'  # 여기가 핵심
    )


--- Starting Coarsening Process (Type: rect) ---
Loading base dataframe from: /Users/joonwonlee/Documents/GEMS_DATA/data_2022/data_22_07_0131_N-32_E121131.csv
Generating Rectangular Center Points (No Calibration)...
>> Processing: Year 2022, Month 07
   [Saved] coarse_cen_map_rect22_07.pkl


In [4]:
import sys
# Add your custom path
gems_tco_path = "/Users/joonwonlee/Documents/GEMS_TCO-1/src"
sys.path.append(gems_tco_path)
from GEMS_TCO import kernels_reparam_space_time_gpu as kernels_reparam_space_time
from GEMS_TCO import orderings as _orderings 
from GEMS_TCO import alg_optimization, BaseLogger

from typing import Optional, List, Tuple
from pathlib import Path
import typer
import json
from json import JSONEncoder
from GEMS_TCO import configuration as config
from GEMS_TCO.data_loader import load_data2, exact_location_filter
from GEMS_TCO import debiased_whittle
from torch.nn import Parameter

space: List[str] = ['1', '1']
lat_lon_resolution = [int(s) for s in space]
mm_cond_number: int = 8


#years = ['2024']
years = ['2023']
month_range = [7] 

output_path = input_path = Path(config.mac_estimates_day_path)
data_load_instance = load_data2(config.mac_data_load_path)

#lat_range_input = [1, 3]
#lon_range_input = [125.0, 129.0]

lat_range_input=[-3,2]      
lon_range_input=[121, 131.0] 

df_map, ord_mm, nns_map, day_offsets = data_load_instance.load_maxmin_ordered_data_bymonthyear(
lat_lon_resolution=lat_lon_resolution, 
mm_cond_number=mm_cond_number,
years_=years, 
months_=month_range,

lat_range=lat_range_input,   
lon_range=lon_range_input
  
)

import torch

--- Global Monthly Mean for 2023-7: 249.8149 ---


In [6]:
daily_aggregated_tensors_dw = [] 
daily_hourly_maps_dw = []      

daily_aggregated_tensors_vecc = [] 
daily_hourly_maps_vecc = []   

for day_index in range(31):
    hour_start_index = day_index * 8
    hour_end_index = (day_index + 1) * 8
    hour_indices = [hour_start_index, hour_end_index]

    # --- DW용 데이터 로드 (day_offsets 인자 추가) ---
    day_hourly_map, day_aggregated_tensor = data_load_instance.load_working_data(
        df_map, 
        day_offsets,  # <--- 이 부분이 추가되어야 합니다
        hour_indices, 
        ord_mm=None,
        dtype=torch.float64, 
        keep_ori=False
    )
    daily_aggregated_tensors_dw.append(day_aggregated_tensor)
    daily_hourly_maps_dw.append(day_hourly_map)

    # --- Vecchia용 데이터 로드 (day_offsets 인자 추가) ---
    day_hourly_map, day_aggregated_tensor = data_load_instance.load_working_data(
        df_map, 
        day_offsets,  # <--- 이 부분이 추가되어야 합니다
        hour_indices, 
        ord_mm=ord_mm,
        dtype=torch.float64, 
        keep_ori= True
    )
    daily_aggregated_tensors_vecc.append(day_aggregated_tensor)
    daily_hourly_maps_vecc.append(day_hourly_map)

print(f"Aggregated Tensor Shape: {daily_aggregated_tensors_vecc[0].shape}")
# 예상 출력: torch.Size([행수, 12]) -> 열이 12개여야 성공입니다.
nn = daily_aggregated_tensors_vecc[0].shape[0]

Aggregated Tensor Shape: torch.Size([145008, 11])


In [9]:
print(daily_hourly_maps_dw[2]['2023_07_y23m07day03_hm00:53'].shape)

daily_hourly_maps_dw[2]['2023_07_y23m07day03_hm00:53']

torch.Size([18126, 11])


tensor([[  2.0000, 131.0000,   1.8712,  ...,   0.0000,   0.0000,   0.0000],
        [  2.0000, 130.9370,   1.9664,  ...,   0.0000,   0.0000,   0.0000],
        [  2.0000, 130.8740,   1.8309,  ...,   0.0000,   0.0000,   0.0000],
        ...,
        [ -2.9720, 121.1720,   5.9084,  ...,   0.0000,   0.0000,   0.0000],
        [ -2.9720, 121.1090,   3.5099,  ...,   0.0000,   0.0000,   0.0000],
        [ -2.9720, 121.0460,   5.7083,  ...,   0.0000,   0.0000,   0.0000]],
       dtype=torch.float64)