In [None]:

observation_coord = "active_stations_lat_lon.csv"
annotation = "generated_weather_dataset_gemini.jsonl"
ir_images="./ir"
rgb_images="./rgb"

# active_stations_lat_lon.csv sample line
"""
id,latitude,longitude
00FAY,53.19,-112.25
"""
# generated_weather_dataset_gemini.jsonl sample line
"""
{"sample_id": "65344", "source_filename": "65344_terra_day_coco25_23100311_lh11_5.093901_0.147878_14.319126_9.507087.png", "ground_truth_labels": {"id": "65344", "time": "2023-10-03 11:00:00", "lat": 6.35, "lon": 2.3833, "coco": 25, "local_hour": 11, "temp": 28.4, "dwpt": 24.6, "rhum": 80.0, "prcp": 0.1, "snow": NaN, "wdir": 223.0, "wspd": 14.8, "wpgt": NaN, "pres": 1011.9, "tsun": NaN, "filename": "65344_terra_day_coco25_23100311_lh11_5.093901_0.147878_14.319126_9.507087.png", "satellite": "terra", "day_night": "day", "utc_yymmddhh": "23100311", "local_hour_filename": 11, "bbox_lat1": 5.093901, "bbox_lon1": 0.147878, "bbox_lat2": 14.319126, "bbox_lon2": 9.507087}, "generated_qas": [{"theme": "Theme 0: Basic Observation", "question": "Describe the weather conditions in the observation region based on the images provided.", "answer": "The observation region is dominated by highly unstable weather, characterized by intense, vertically developed cloud masses. The RGB image shows very bright, towering cumuliform clouds, indicative of deep convection, which suggests conditions conducive to heavy rain and strong winds. The LST/IR panel is largely black across the cloudy regions, confirming the presence of very high-altitude, cold cloud tops associated with mature convective cells, typical of an active storm environment."}, {"theme": "Theme 1: Holistic Analysis", "question": "What is the overall synoptic situation indicated by the cloud structures visible across this region?", "answer": "The overall synoptic situation suggests a highly unstable atmosphere characterized by widespread deep moist convection. The presence of numerous, massive cumulonimbus clouds, highly reflective in the visible spectrum, points to strong uplift and significant latent heat release. This pattern is characteristic of a highly energetic environment where moisture and instability fuel organized storm development across a broad area, often leading to thunderstorms."}, {"theme": "Theme 2: Qualitative Inference", "question": "Based on the appearance of the cloud structures, what qualitative assessment can be made regarding the wind intensity associated with this system?", "answer": "The visual evidence suggests that the wind intensity, particularly associated with the storm dynamics, is qualitatively high. The turbulent, rapidly developing nature of the towering convective cells in the RGB image indicates strong vertical wind shear and significant horizontal winds feeding the storm system. High wind speeds are necessary to maintain the vigorous uplift and deep structure seen in these powerful convective clouds."}, {"theme": "Theme 3: Causal Reasoning", "question": "What primary meteorological factors are likely causing the formation and rapid growth of these large, vertically developed convective clouds?", "answer": "The primary cause of this intense cloud formation is a combination of atmospheric instability and abundant moisture. The towering height and exceptional brightness of the clouds in the RGB image signify strong surface heating or convergence leading to vigorous, persistent uplift. This rapid vertical transport of moist air through the deep troposphere allows substantial latent heat release, fueling the massive development characteristic of severe convective storms."}, {"theme": "Theme 4: Anomaly & Mismatch", "question": "Does the satellite imagery indicate a stable, clear, or generally calm weather regime over the observed area?", "answer": "No, the imagery strongly contradicts the notion of a stable or calm regime. The RGB panel is dominated by deep, turbulent, and highly reflective cumulonimbus clouds, which are unmistakable signatures of extreme atmospheric instability. This intense convective activity suggests vigorous vertical motion, a high potential for heavy rainfall, and associated severe phenomena, indicating a highly active storm system."}, {"theme": "Theme 5: Cross-Modal Comparison", "question": "How does the LST/IR data (right panel) corroborate the visual interpretation of cloud height derived from the reflectivity in the RGB image (left panel)?", "answer": "The LST/IR image strongly corroborates the interpretation of extremely high cloud tops derived from the RGB image. The massive, bright structures seen in the RGB image, which indicate high reflectivity and vertical extent, correspond to large areas of black in the LST/IR panel. This black coloring signifies cloud tops that are very cold, confirming the exceptional vertical development that penetrates high into the troposphere, a key characteristic of severe convective storms."}, {"theme": "Theme 6: Counterfactual Reasoning", "question": "If the environment surrounding the main convective cells showed clear skies and strong land surface heating, how might that influence the longevity and intensity of the existing storms?", "answer": "If the surrounding environment were characterized by clear skies and strong land surface heating, the intensity and longevity of the existing storms would likely be enhanced. Strong heating would increase the temperature and moisture contrast near the storm boundaries, generating more buoyant air that could be ingested into the storm system. This continuous supply of energy and moisture would help sustain the deep convection and potentially increase the storm's severity and size."}]}
"""


output_images="./seperated"
output_json="./stage1_preprocessed_items.json"
# output json sample item
"""
    {
        "image": "/home/agi592/kse/ClimateToText/data/WeatherQA/WeatherQA_MD_2014-2019/md_image/2018/ttd/md0022_20180112_13_ttd.gif",
        "annotation": "Areas affected...Middle....",
        "cond_name": "wqa:ttd"
    },
"""

In [None]:
import json
import os
import re
from PIL import Image, ImageDraw
from typing import Dict, Optional, Tuple
from tqdm import tqdm

def parse_full_filename_metadata(filename: str) -> Dict:
    """
    파일 이름의 모든 메타데이터를 파싱하여 딕셔너리로 반환합니다.
    """
    metadata = {}
    pattern = r'([^_]+)_([^_]+)_(day|night)_coco(\d+)_(\d{8})_lh(\d+)_([-\d.]+)_([-\d.]+)_([-\d.]+)_([-\d.]+)\.png'
    match = re.match(pattern, filename)
    if match:
        metadata['filename'] = filename
        metadata['id'] = match.group(1)
        metadata['satellite'] = match.group(2)
        metadata['day_night'] = match.group(3)
        metadata['coco'] = int(match.group(4))
        metadata['utc_yymmddhh'] = match.group(5)
        metadata['local_hour_filename'] = int(match.group(6))
        metadata['bbox_lat1'] = float(match.group(7))
        metadata['bbox_lon1'] = float(match.group(8))
        metadata['bbox_lat2'] = float(match.group(9))
        metadata['bbox_lon2'] = float(match.group(10))
    return metadata

def calculate_dot_position(metadata: Dict) -> Tuple[Optional[float], Optional[float], bool]:
    """
    메타데이터(CSV의 lat/lon 및 파일명의 bbox)를 기반으로
    128x128 이미지 상의 점의 (x, y) 픽셀 위치를 계산합니다.
    """
    dot_keys = ['lat', 'lon', 'bbox_lat1', 'bbox_lon1', 'bbox_lat2', 'bbox_lon2']
    if not all(key in metadata for key in dot_keys):
        return None, None, False

    try:
        lat_target = float(metadata['lat'])
        lon_target = float(metadata['lon'])
        lat_min = float(metadata['bbox_lat1'])
        lon_min = float(metadata['bbox_lon1'])
        lat_max = float(metadata['bbox_lat2'])
        lon_max = float(metadata['bbox_lon2'])

        img_width = 128
        img_height = 128
        
        if (lon_max - lon_min) == 0 or (lat_max - lat_min) == 0:
            return None, None, False
        else:
            lon_percent = (lon_target - lon_min) / (lon_max - lon_min)
            pixel_x = lon_percent * (img_width - 1)
            
            lat_percent_from_top = (lat_max - lat_target) / (lat_max - lat_min)
            pixel_y = lat_percent_from_top * (img_height - 1)

            if not (0 <= pixel_x < img_width and 0 <= pixel_y < img_height):
                return None, None, False
            
            return pixel_x, pixel_y, True

    except (ValueError, TypeError, ZeroDivisionError):
        return None, None, False

def add_purple_dot_to_image(img: Image.Image, pixel_x: float, pixel_y: float) -> Image.Image:
    """
    이미지에 보라색 점을 추가합니다.
    """
    img = img.convert("RGB")
    draw = ImageDraw.Draw(img)
    draw.ellipse((pixel_x - 2, pixel_y - 2, pixel_x + 2, pixel_y + 2), fill='purple', outline='purple')
    return img

def combine_images_horizontally(rgb_img: Image.Image, ir_img: Image.Image) -> Image.Image:
    """
    두 이미지를 가로로 합칩니다.
    """
    # 두 이미지의 높이를 맞춤
    h = min(rgb_img.height, ir_img.height)
    rgb_img = rgb_img.resize((int(rgb_img.width * h / rgb_img.height), h))
    ir_img = ir_img.resize((int(ir_img.width * h / ir_img.height), h))
    
    total_width = rgb_img.width + ir_img.width
    combined_img = Image.new('RGB', (total_width, h))
    combined_img.paste(rgb_img, (0, 0))
    combined_img.paste(ir_img, (rgb_img.width, 0))
    
    return combined_img

# 출력 디렉토리 생성
os.makedirs(output_images, exist_ok=True)

# 결과를 저장할 리스트
output_items = []

# annotation jsonl 파일 읽기
print(f"Processing {annotation}...")
with open(annotation, 'r', encoding='utf-8') as f:
    lines = f.readlines()

failed_count = 0
success_count = 0

for line in tqdm(lines, desc="Processing annotations"):
    if not line.strip():
        continue
    
    try:
        data = json.loads(line)
        
        # 필요한 정보 추출
        source_filename = data.get('source_filename')
        ground_truth = data.get('ground_truth_labels', {})
        generated_qas = data.get('generated_qas', [])
        
        if not source_filename or not generated_qas:
            failed_count += 1
            continue
        
        # 파일명에서 메타데이터 파싱
        filename_metadata = parse_full_filename_metadata(source_filename)
        
        # ground_truth와 병합
        merged_metadata = {**ground_truth, **filename_metadata}
        
        # 이미지 경로 구성
        rgb_path = os.path.join(rgb_images, source_filename)
        ir_path = os.path.join(ir_images, source_filename)
        
        # 이미지 파일 존재 확인
        if not os.path.exists(rgb_path) or not os.path.exists(ir_path):
            failed_count += 1
            continue
        
        # 이미지 로드
        rgb_img = Image.open(rgb_path)
        ir_img = Image.open(ir_path)
        
        # 점 위치 계산
        pixel_x, pixel_y, can_plot_dot = calculate_dot_position(merged_metadata)
        
        # 두 이미지에 보라색 점 추가
        if can_plot_dot:
            rgb_img = add_purple_dot_to_image(rgb_img, pixel_x, pixel_y)
            ir_img = add_purple_dot_to_image(ir_img, pixel_x, pixel_y)
        
        # RGB 이미지 저장
        rgb_output_path = os.path.join(output_images, f"rgb_{source_filename}")
        rgb_img.save(rgb_output_path)
        
        # LST 이미지 저장
        lst_output_path = os.path.join(output_images, f"lst_{source_filename}")
        ir_img.save(lst_output_path)
        
        # 출력 JSON 아이템 생성 (RGB)
        output_item_rgb = {
            "image": os.path.abspath(rgb_output_path),
            "annotation": generated_qas[0]['answer'],
            "cond_name": "modis:rgb"
        }
        output_items.append(output_item_rgb)
        
        # 출력 JSON 아이템 생성 (LST)
        output_item_lst = {
            "image": os.path.abspath(lst_output_path),
            "annotation": generated_qas[0]['answer'],
            "cond_name": "modis:lst"
        }
        output_items.append(output_item_lst)
        success_count += 1
        
    except Exception as e:
        print(f"\nError processing line: {e}")
        failed_count += 1
        continue

# 출력 JSON 파일 저장
with open(output_json, 'w', encoding='utf-8') as f:
    json.dump(output_items, f, ensure_ascii=False, indent=2)

print(f"\nProcessing complete!")
print(f"Success: {success_count} items")
print(f"Failed: {failed_count} items")
print(f"Output JSON: {output_json}")
print(f"Output images directory: {output_images}")

In [None]:
# convert all in ./rgb and ./ir to seperated_all
import json
import os
import re
import csv
from PIL import Image, ImageDraw
from typing import Dict, Optional, Tuple
from tqdm import tqdm

# 설정
annotation_file = "./gee_weather_ground_truth_balanced.csv"
# id,time,lat,lon,coco,local_hour,temp,dwpt,rhum,prcp,snow,wdir,wspd,wpgt,pres,tsun
# 71917,2023-11-21 07:00:00,79.99,-85.81,1,1,-23.0,-26.0,76.0,0.0,,290.0,20.5,,1002.3,

# KFLY0_terra_day_coco3_23072517_lh10_38.353755_-106.820302_42.966368_-100.739783.png
# pk: {id, yymmddhh, lh{localhour}}
rgb_input_dir = "./rgb"
ir_input_dir = "./ir"
output_dir = "./seperated_all"

# 출력 디렉토리 생성
os.makedirs(output_dir, exist_ok=True)

def parse_full_filename_metadata(filename: str) -> Dict:
    """파일 이름의 모든 메타데이터를 파싱하여 딕셔너리로 반환합니다."""
    metadata = {}
    pattern = r'([^_]+)_([^_]+)_(day|night)_coco(\d+)_(\d{8})_lh(\d+)_([-\d.]+)_([-\d.]+)_([-\d.]+)_([-\d.]+)\.png'
    match = re.match(pattern, filename)
    if match:
        metadata['filename'] = filename
        metadata['id'] = match.group(1)
        metadata['satellite'] = match.group(2)
        metadata['day_night'] = match.group(3)
        metadata['coco'] = int(match.group(4))
        metadata['utc_yymmddhh'] = match.group(5)
        metadata['local_hour_filename'] = int(match.group(6))
        metadata['bbox_lat1'] = float(match.group(7))
        metadata['bbox_lon1'] = float(match.group(8))
        metadata['bbox_lat2'] = float(match.group(9))
        metadata['bbox_lon2'] = float(match.group(10))
    return metadata

def calculate_dot_position(metadata: Dict) -> Tuple[Optional[float], Optional[float], bool]:
    """메타데이터를 기반으로 128x128 이미지 상의 점의 (x, y) 픽셀 위치를 계산합니다."""
    dot_keys = ['lat', 'lon', 'bbox_lat1', 'bbox_lon1', 'bbox_lat2', 'bbox_lon2']
    if not all(key in metadata for key in dot_keys):
        return None, None, False

    try:
        lat_target = float(metadata['lat'])
        lon_target = float(metadata['lon'])
        lat_min = float(metadata['bbox_lat1'])
        lon_min = float(metadata['bbox_lon1'])
        lat_max = float(metadata['bbox_lat2'])
        lon_max = float(metadata['bbox_lon2'])

        img_width = 128
        img_height = 128
        
        if (lon_max - lon_min) == 0 or (lat_max - lat_min) == 0:
            return None, None, False
        else:
            lon_percent = (lon_target - lon_min) / (lon_max - lon_min)
            pixel_x = lon_percent * (img_width - 1)
            
            lat_percent_from_top = (lat_max - lat_target) / (lat_max - lat_min)
            pixel_y = lat_percent_from_top * (img_height - 1)

            if not (0 <= pixel_x < img_width and 0 <= pixel_y < img_height):
                return None, None, False
            
            return pixel_x, pixel_y, True

    except (ValueError, TypeError, ZeroDivisionError):
        return None, None, False

def add_purple_dot_to_image(img: Image.Image, pixel_x: float, pixel_y: float) -> Image.Image:
    """이미지에 보라색 점을 추가합니다."""
    img = img.convert("RGB")
    draw = ImageDraw.Draw(img)
    draw.ellipse((pixel_x - 2, pixel_y - 2, pixel_x + 2, pixel_y + 2), fill='purple', outline='purple')
    return img

def make_composite_key(station_id: str, yymmddhh: str, local_hour: int) -> str:
    """복합 키 생성: {id}_{yymmddhh}_{lh}"""
    return f"{station_id}_{yymmddhh}_{local_hour}"

# CSV 파일에서 메타데이터 매핑 생성
# 복합 키: {id, yymmddhh, local_hour} -> {lat, lon, ...}
composite_key_to_metadata = {}
print(f"Loading metadata from {annotation_file}...")
with open(annotation_file, 'r', encoding='utf-8') as f:
    reader = csv.DictReader(f)
    for row in reader:
        station_id = row.get('id')
        time_str = row.get('time')  # "2023-11-21 07:00:00"
        local_hour_str = row.get('local_hour')
        
        if station_id and time_str and local_hour_str:
            try:
                # time에서 yymmddhh 추출: "2023-11-21 07:00:00" -> "23112107"
                # time 형식: YYYY-MM-DD HH:MM:SS
                parts = time_str.split(' ')
                date_part = parts[0]  # "2023-11-21"
                time_part = parts[1].split(':')[0]  # "07"
                
                year, month, day = date_part.split('-')
                yy = year[2:]  # "23"
                mm = month  # "11"
                dd = day  # "21"
                hh = time_part  # "07"
                
                yymmddhh = f"{yy}{mm}{dd}{hh}"
                local_hour = int(local_hour_str)
                
                composite_key = make_composite_key(station_id, yymmddhh, local_hour)
                composite_key_to_metadata[composite_key] = row
            except Exception as e:
                continue

print(f"Loaded metadata for {len(composite_key_to_metadata)} records")

# 파일명에서 복합 키를 추출하고 CSV 데이터와 매칭
def get_metadata_for_file(filename: str) -> Optional[Dict]:
    """파일명에서 복합 키를 추출하고 CSV 데이터와 병합합니다."""
    filename_metadata = parse_full_filename_metadata(filename)
    if not filename_metadata:
        return None
    
    station_id = filename_metadata.get('id')
    yymmddhh = filename_metadata.get('utc_yymmddhh')
    local_hour = filename_metadata.get('local_hour_filename')
    
    if station_id and yymmddhh and local_hour is not None:
        composite_key = make_composite_key(station_id, yymmddhh, local_hour)
        
        if composite_key in composite_key_to_metadata:
            # CSV 데이터와 파일명 메타데이터 병합
            csv_data = composite_key_to_metadata[composite_key]
            merged = {**csv_data, **filename_metadata}
            return merged
    
    return None

# RGB 이미지 처리
print("\nProcessing RGB images...")
rgb_files = [f for f in os.listdir(rgb_input_dir) if f.endswith('.png')]
success_rgb = 0
failed_rgb = 0
dot_added_rgb = 0
no_metadata_rgb = 0

for filename in tqdm(rgb_files, desc="RGB images"):
    try:
        input_path = os.path.join(rgb_input_dir, filename)
        output_path = os.path.join(output_dir, f"rgb_{filename}")
        
        img = Image.open(input_path)
        
        # 메타데이터가 있으면 보라색 점 추가
        metadata = get_metadata_for_file(filename)
        if metadata:
            pixel_x, pixel_y, can_plot = calculate_dot_position(metadata)
            if can_plot:
                img = add_purple_dot_to_image(img, pixel_x, pixel_y)
                dot_added_rgb += 1
        else:
            no_metadata_rgb += 1
        
        img.save(output_path)
        success_rgb += 1
    except Exception as e:
        failed_rgb += 1
        continue

print(f"RGB - Success: {success_rgb}, Failed: {failed_rgb}, Dots added: {dot_added_rgb}, No metadata: {no_metadata_rgb}")

# IR 이미지 처리
print("\nProcessing IR images...")
ir_files = [f for f in os.listdir(ir_input_dir) if f.endswith('.png')]
success_ir = 0
failed_ir = 0
dot_added_ir = 0
no_metadata_ir = 0

for filename in tqdm(ir_files, desc="IR images"):
    try:
        input_path = os.path.join(ir_input_dir, filename)
        output_path = os.path.join(output_dir, f"lst_{filename}")
        
        img = Image.open(input_path)
        
        # 메타데이터가 있으면 보라색 점 추가
        metadata = get_metadata_for_file(filename)
        if metadata:
            pixel_x, pixel_y, can_plot = calculate_dot_position(metadata)
            if can_plot:
                img = add_purple_dot_to_image(img, pixel_x, pixel_y)
                dot_added_ir += 1
        else:
            no_metadata_ir += 1
        
        img.save(output_path)
        success_ir += 1
    except Exception as e:
        failed_ir += 1
        continue

print(f"IR - Success: {success_ir}, Failed: {failed_ir}, Dots added: {dot_added_ir}, No metadata: {no_metadata_ir}")
print(f"\nAll images saved to {output_dir}")


In [None]:
# Pick 1000 rgb images and 1000 lst images from ./seperated_all and save to ./seperated_eval
# Note: that images SHOULD NOT be in ./seperated

import os
import random
import shutil
from tqdm import tqdm

source_dir = "./seperated_all"
eval_dir = "./seperated_eval"
train_dir = "./seperated"
num_samples_per_type = 1000

# 출력 디렉토리 생성
os.makedirs(eval_dir, exist_ok=True)

# seperated_all의 모든 이미지 파일 가져오기
all_images = [f for f in os.listdir(source_dir) if f.endswith('.png')]
print(f"Total images in {source_dir}: {len(all_images)}")

# RGB와 LST 이미지 분리
rgb_images = [f for f in all_images if f.startswith('rgb_')]
lst_images = [f for f in all_images if f.startswith('lst_')]
print(f"RGB images: {len(rgb_images)}, LST images: {len(lst_images)}")

# seperated에 있는 이미지 파일명 가져오기 (제외할 목록)
if os.path.exists(train_dir):
    train_images = set(os.listdir(train_dir))
    print(f"Images in {train_dir} (to exclude): {len(train_images)}")
else:
    train_images = set()
    print(f"{train_dir} does not exist, no images to exclude")

# seperated_all에만 있는 이미지 필터링
available_rgb = [img for img in rgb_images if img not in train_images]
available_lst = [img for img in lst_images if img not in train_images]
print(f"Available RGB images for eval: {len(available_rgb)}")
print(f"Available LST images for eval: {len(available_lst)}")

# 샘플링할 개수 결정
actual_rgb_samples = min(num_samples_per_type, len(available_rgb))
actual_lst_samples = min(num_samples_per_type, len(available_lst))
print(f"Sampling {actual_rgb_samples} RGB images and {actual_lst_samples} LST images")

# 랜덤 샘플링
random.seed(42)  # 재현성을 위한 시드 설정
selected_rgb = random.sample(available_rgb, actual_rgb_samples)
selected_lst = random.sample(available_lst, actual_lst_samples)

# RGB 이미지 복사
print(f"\nCopying RGB images to {eval_dir}...")
for img_name in tqdm(selected_rgb, desc="Copying RGB images"):
    src_path = os.path.join(source_dir, img_name)
    dst_path = os.path.join(eval_dir, img_name)
    shutil.copy2(src_path, dst_path)

# LST 이미지 복사
print(f"\nCopying LST images to {eval_dir}...")
for img_name in tqdm(selected_lst, desc="Copying LST images"):
    src_path = os.path.join(source_dir, img_name)
    dst_path = os.path.join(eval_dir, img_name)
    shutil.copy2(src_path, dst_path)

print(f"\nSuccessfully copied {len(selected_rgb)} RGB and {len(selected_lst)} LST images to {eval_dir}")
print(f"Total: {len(selected_rgb) + len(selected_lst)} images")


In [None]:
# Sample 100 rgb and 100 lst images and construct jsonl file with below format
"""
{
    "image": "/home/agi592/yjju/EarthData/seperated/rgb_KAIK0_aqua_day_coco3_23090819_lh14_32.924186_-82.100136_34.077339_-80.717257.png",
    "annotation": "",
    "cond_name": "modis:rgb"
},
"""

import os
import random
import json

source_dir = "./seperated_eval"
output_jsonl = "./stage1_preprocessed_items_sep_eval.json"
num_samples_per_type = 100

# seperated_eval의 모든 이미지 파일 가져오기
all_images = [f for f in os.listdir(source_dir) if f.endswith('.png')]
print(f"Total images in {source_dir}: {len(all_images)}")

# RGB와 LST 이미지 분리
rgb_images = [f for f in all_images if f.startswith('rgb_')]
lst_images = [f for f in all_images if f.startswith('lst_')]
print(f"RGB images: {len(rgb_images)}, LST images: {len(lst_images)}")

# 샘플링할 개수 결정
actual_rgb_samples = min(num_samples_per_type, len(rgb_images))
actual_lst_samples = min(num_samples_per_type, len(lst_images))
print(f"Sampling {actual_rgb_samples} RGB images and {actual_lst_samples} LST images")

# 랜덤 샘플링
random.seed(42)  # 재현성을 위한 시드 설정
selected_rgb = random.sample(rgb_images, actual_rgb_samples)
selected_lst = random.sample(lst_images, actual_lst_samples)

# JSON 아이템 생성
output_items = []

# RGB 이미지 아이템 생성
for img_name in selected_rgb:
    img_path = os.path.abspath(os.path.join(source_dir, img_name))
    item = {
        "image": img_path,
        "annotation": "",
        "cond_name": "modis:rgb"
    }
    output_items.append(item)

# LST 이미지 아이템 생성
for img_name in selected_lst:
    img_path = os.path.abspath(os.path.join(source_dir, img_name))
    item = {
        "image": img_path,
        "annotation": "",
        "cond_name": "modis:lst"
    }
    output_items.append(item)

# JSON 파일로 저장
with open(output_jsonl, 'w', encoding='utf-8') as f:
    json.dump(output_items, f, ensure_ascii=False, indent=2)

print(f"\nGenerated {len(output_items)} items ({len(selected_rgb)} RGB + {len(selected_lst)} LST)")
print(f"Output saved to: {output_jsonl}")
