In [17]:
# work environment: faiss_env
import pandas as pd
import numpy as np
import pickle
import sys
import os
import warnings
from pathlib import Path

# 경로 설정
gems_tco_path = "/Users/joonwonlee/Documents/GEMS_TCO-1/src"
sys.path.append(gems_tco_path)

# Warnings 무시 설정
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning, module="xarray")

# 커스텀 모듈 로드
from GEMS_TCO import configuration as config
from GEMS_TCO import data_preprocess as dmbh

# ==========================================
# 1. ORI Pickle 파일 로드 확인
# ==========================================
mac_data_path = config.mac_data_load_path
years = [2024]
months = list(range(7, 8))
year = years[0]
month = months[0]
month_str = f"{month:02d}"

filename = f"pickle_2024/orbit_map{str(year)[2:]}_{month_str}.pkl"
picklefile_path = Path(mac_data_path) / filename
print(f"Loading Pickle: {picklefile_path}")

with open(picklefile_path, 'rb') as pickle_file:
    data_map_hour = pickle.load(pickle_file)

# 데이터 확인 (Head)
# 충돌 코드 중 N05 버전에 해당하는 데이터 확인
print("--- Data Head Check ---")
try:
    print(data_map_hour['y24m07day01_hm00:52'].head())
except KeyError:
    # 키 값이 다를 경우를 대비해 첫 번째 키 출력
    first_key = list(data_map_hour.keys())[0]
    print(data_map_hour[first_key].head())

# ==========================================
# 2. Coarse Map 생성 (Center Matching)
# ==========================================
# [설정] 요청하신 N05_E123133 버전 (위도 0~5, 경도 123~133)
#lat_start, lat_end, lon_start, lon_end = 0, 5, 123, 133
lat_start, lat_end, lon_start, lon_end = -5, 7, 118, 135
step_lat, step_lon = 0.044, 0.063

# Base CSV 파일 로드 (N05 버전)
#df_path = "/Users/joonwonlee/Documents/GEMS_DATA/data_2024/data_24_07_0131_N05_E123133.csv"
df_path = "/Users/joonwonlee/Documents/GEMS_DATA/data_2024/data_24_07_0131_N-57_E118135.csv"
print(f"Loading Base CSV: {df_path}")
df = pd.read_csv(df_path)

instance = dmbh.center_matching_hour(df, lat_start, lat_end, lon_start, lon_end)

for year in years:
    for month in months:
        try:
            # Load Dense ORI Data
            pickle_path = os.path.join(mac_data_path, f'pickle_{year}')
            input_filename = f"orbit_map{str(year)[2:]}_{month_str}.pkl"
            input_filepath = os.path.join(pickle_path, input_filename)
            
            with open(input_filepath, 'rb') as pickle_file:
                loaded_map = pickle.load(pickle_file)
            
            # Coarse Data 생성
            center_points = instance.make_center_points(step_lat=step_lat, step_lon=step_lon)
            coarse_cen_map = instance.coarse_by_center(loaded_map, center_points)

            # Save Coarse Data
            output_filename = f"coarse_cen_map{str(year)[2:]}_{month_str}.pkl"
            output_filepath = os.path.join(pickle_path, output_filename)
            
            with open(output_filepath, 'wb') as pickle_file:
                pickle.dump(coarse_cen_map, pickle_file)
            
            print(f"Successfully processed and saved data for year {str(year)[2:]} month {month_str}.")
            
        except FileNotFoundError:
            print(f"Warning: File {input_filename} not found. Skipping.")
        except Exception as e:
            print(f"Error processing file {input_filename}: {e}")

# ==========================================
# 3. Latitude 조정 없이 Coarsening (함수 버전)
# ==========================================
def process_and_save_coarse_data(base_path, years, months, lat_lon_bounds, step_sizes, base_csv_path):
    """
    Loads orbit map data, processes it to a coarse grid without calibration,
    and saves the result as a pickle file.
    """
    print("\n--- Starting Data Coarsening Process ---")
    
    lat_start, lat_end, lon_start, lon_end = lat_lon_bounds
    step_lat, step_lon = step_sizes
    
    try:
        print(f"Loading base dataframe from: {base_csv_path}")
        df = pd.read_csv(base_csv_path)
        instance = dmbh.center_matching_hour(df, lat_start, lat_end, lon_start, lon_end)
    except Exception as e:
        print(f"Error initializing: {e}")
        return

    for year in years:
        for month in months:
            month_str = f"{month:02d}"
            print(f"Processing: Year {year}, Month {month_str}")
            
            try:
                pickle_path = os.path.join(base_path, f'pickle_{year}')
                input_filename = f"orbit_map{str(year)[2:]}_{month_str}.pkl"
                output_filename = f"coarse_cen_map_without_decrement_latitude{str(year)[2:]}_{month_str}.pkl"
                
                input_filepath = os.path.join(pickle_path, input_filename)
                output_filepath = os.path.join(pickle_path, output_filename)
                
                print(f"  Loading: {input_filename}")
                with open(input_filepath, 'rb') as pickle_file:
                    loaded_map = pickle.load(pickle_file)
                
                print("  Generating center points (without calibration)...")
                center_points = instance.make_center_points_wo_calibration(step_lat=step_lat, step_lon=step_lon)
                
                print("  Coarsening data by center...")
                coarse_cen_map = instance.coarse_by_center(loaded_map, center_points)

                os.makedirs(pickle_path, exist_ok=True)
                print(f"  Saving: {output_filename}")
                with open(output_filepath, 'wb') as pickle_file:
                    pickle.dump(coarse_cen_map, pickle_file)
                
                print(f"  Successfully processed and saved data for {year}-{month_str}.")

            except Exception as e:
                print(f"  An error occurred: {e}")

# 실행부
if __name__ == '__main__':
    BASE_PATH = config.mac_data_load_path 
    
    # [수정됨] 사용자가 요청한 N05_E123133 파일 경로
    BASE_CSV_PATH = "/Users/joonwonlee/Documents/GEMS_DATA/data_2024/data_24_07_0131_N05_E123133.csv"
    
    YEARS_TO_PROCESS = [2024]
    MONTHS_TO_PROCESS = [7]
    
    # [수정됨] 사용자가 요청한 좌표 범위 (0~5, 123~133)
    #LAT_LON_BOUNDS = (0, 5, 123, 133)
    LAT_LON_BOUNDS = (-5, 7, 118, 135)
    STEP_SIZES = (0.044, 0.063)
    
    process_and_save_coarse_data(
        base_path=BASE_PATH,
        years=YEARS_TO_PROCESS,
        months=MONTHS_TO_PROCESS,
        lat_lon_bounds=LAT_LON_BOUNDS,
        step_sizes=STEP_SIZES,
        base_csv_path=BASE_CSV_PATH
    )


Loading Pickle: /Users/joonwonlee/Documents/GEMS_DATA/pickle_2024/orbit_map24_07.pkl
--- Data Head Check ---
   Latitude  Longitude                 Time  ColumnAmountO3  \
0  6.988798  134.98330  2024-07-01 00:53:00       263.67792   
1  6.988811  134.92009  2024-07-01 00:53:00       269.09198   
2  6.988795  134.85590  2024-07-01 00:53:00       270.52588   
3  6.988876  134.79294  2024-07-01 00:53:00       271.30637   
4  6.988755  134.72989  2024-07-01 00:53:00       271.05127   

   FinalAlgorithmFlags  Hours_elapsed             Orbit  
0                  2.0  477720.883333  2024-07-01 00:53  
1                  2.0  477720.883333  2024-07-01 00:53  
2                  2.0  477720.883333  2024-07-01 00:53  
3                  2.0  477720.883333  2024-07-01 00:53  
4                  2.0  477720.883333  2024-07-01 00:53  
Loading Base CSV: /Users/joonwonlee/Documents/GEMS_DATA/data_2024/data_24_07_0131_N-57_E118135.csv
Successfully processed and saved data for year 24 month 07.

--- 

In [18]:
import sys
# Add your custom path
gems_tco_path = "/Users/joonwonlee/Documents/GEMS_TCO-1/src"
sys.path.append(gems_tco_path)
from GEMS_TCO import kernels_reparam_space_time_gpu as kernels_reparam_space_time
from GEMS_TCO import orderings as _orderings 
from GEMS_TCO import alg_optimization, alg_opt_Encoder

from typing import Optional, List, Tuple
from pathlib import Path
import typer
import json
from json import JSONEncoder
from GEMS_TCO import configuration as config
from GEMS_TCO.data_loader import load_data2, exact_location_filter
from GEMS_TCO import debiased_whittle
from torch.nn import Parameter
from GEMS_TCO.data_loader import load_data2, exact_location_filter

space: List[str] = ['1', '1']
lat_lon_resolution = [int(s) for s in space]
mm_cond_number: int = 8
years = ['2024']
month_range = [7] 

output_path = input_path = Path(config.mac_estimates_day_path)
data_load_instance = load_data2(config.mac_data_load_path)

#lat_range_input = [1, 3]
#lon_range_input = [125.0, 129.0]

lat_range_input=[0,5]      
lon_range_input=[123, 133.0] 

df_map, ord_mm, nns_map = data_load_instance.load_maxmin_ordered_data_bymonthyear(
lat_lon_resolution=lat_lon_resolution, 
mm_cond_number=mm_cond_number,
years_=years, 
months_=month_range,

lat_range=lat_range_input,   
lon_range=lon_range_input

)

import torch

Subsetting data to lat: [0, 5], lon: [123, 133.0]


In [19]:
daily_aggregated_tensors_dw = [] 
daily_hourly_maps_dw = []      

daily_aggregated_tensors_vecc = [] 
daily_hourly_maps_vecc = []   


for day_index in range(31):
    hour_start_index = day_index * 8
    
    hour_end_index = (day_index + 1) * 8
    #hour_end_index = day_index*8 + 1
    hour_indices = [hour_start_index, hour_end_index]

    day_hourly_map, day_aggregated_tensor = data_load_instance.load_working_data(
    df_map, 
    hour_indices, 
    ord_mm= None,  # or just omit it
    dtype=torch.float64, # or just omit it 
    keep_ori=True  #keep_exact_loc
    )

    daily_aggregated_tensors_dw.append( day_aggregated_tensor )
    daily_hourly_maps_dw.append( day_hourly_map )

    day_hourly_map, day_aggregated_tensor = data_load_instance.load_working_data(
    df_map, 
    hour_indices, 
    ord_mm= ord_mm,  # or just omit it
    dtype=torch.float64, # or just omit it 
    keep_ori=False  #keep_exact_loc
    )

    daily_aggregated_tensors_vecc.append( day_aggregated_tensor )
    daily_hourly_maps_vecc.append( day_hourly_map )
print(daily_aggregated_tensors_vecc[0].shape)
#print(daily_hourly_maps[0])
nn = daily_aggregated_tensors_vecc[0].shape[0]

torch.Size([145008, 4])


In [22]:
daily_hourly_maps_dw[2]['2024_07_y24m07day03_hm06:49']

tensor([[4.9913e+00, 1.3170e+02, 2.6778e+02, 7.5000e+01],
        [4.9913e+00, 1.3170e+02, 2.6778e+02, 7.5000e+01],
        [4.9913e+00, 1.3170e+02, 2.6778e+02, 7.5000e+01],
        ...,
        [9.8304e-03, 1.2316e+02, 2.6370e+02, 7.5000e+01],
        [9.9527e-03, 1.2310e+02, 2.6461e+02, 7.5000e+01],
        [9.9826e-03, 1.2304e+02, 2.6473e+02, 7.5000e+01]], dtype=torch.float64)