### checking

사용 python 버전 확인

In [1]:
import sys
print(sys.version)

3.10.19 (main, Oct 21 2025, 16:43:05) [GCC 11.2.0]


CPU/GPU 사용 확인

In [2]:
import torch

# CPU/GPU 사용 여부 확인
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("현재 실행 장치: GPU (CUDA)")
    print("GPU 이름:", torch.cuda.get_device_name(0))
else:
    device = torch.device("cpu")
    print("현재 실행 장치: CPU")

현재 실행 장치: GPU (CUDA)
GPU 이름: NVIDIA RTX 6000 Ada Generation


### LIBRARY

In [3]:
import os
import pandas as pd
import ee
import re
from datetime import timedelta



### CSV

호소 chla 자료

In [4]:
# 최상위 폴더 경로
folder = r"/home/khs/data/csv/hoso chla/"

# 결과 저장
df_list = []

# 모든 하위 폴더 포함하여 .csv 찾기
for root, dirs, files in os.walk(folder):
    for file in files:
        if file.lower().endswith('.csv'):
            file_path = os.path.join(root, file)
            try:
                df = pd.read_csv(file_path, encoding='cp949')  # 또는 utf-8-sig
                df_list.append(df)
            except Exception as e:
                print(f"[오류] {file_path} 읽기 실패: {e}")

# 결측값 제거 후 전체 병합
if df_list:
    total_df = pd.concat(df_list, ignore_index=True).dropna()
    print(f"\n 총 병합된 파일 수: {len(df_list)}개")
    print(f" 총 병합된 행 수: {len(total_df)} rows")
    display(total_df.head())
else:
    print(" 병합할 수 있는 CSV 파일이 없습니다.")


 총 병합된 파일 수: 8개
 총 병합된 행 수: 47776 rows


Unnamed: 0,분류번호,측정소명,년/월/일,회차,경도,위도,채수시각,클로로필 a(㎎/㎥)
0,3008B60,대청댐2,2020/01/02,1회차 상층부,"127°29'5.4""","36°28'37""",15:05,1.2
1,3008B50,대청댐3,2020/01/02,1회차 상층부,"127°30'23.8""","36°30'43.2""",15:20,1.4
2,3008B10,대청댐4,2020/01/02,1회차 상층부,"127°38'8.4""","36°22'18.5""",14:00,1.8
3,3008B30,대청댐5(대청호),2020/01/02,1회차 상층부,"127°33'9.3""","36°25'59.2""",14:40,1.2
4,3008B20,대청댐6,2020/01/02,1회차 상층부,"127°33'25.1""","36°23'18.6""",14:20,2.0


댐 목록 추출

In [5]:
# 측정소명에서 숫자 + 괄호 제거 → 댐 이름만 추출
total_df['댐명'] = (
    total_df['측정소명']
    .str.replace(r'\d+', '', regex=True)        # 숫자 제거
    .str.replace(r'\(.*?\)', '', regex=True)    # 괄호 내용 제거
    .str.strip()                                # 공백 제거
)

# 중복 없는 댐 목록 추출
dam_list = total_df['댐명'].unique()

print(dam_list)

['대청댐' '섬진강댐' '임하댐' '충주조정지댐' '보령댐' '부안댐' '영산호' '용담댐' '장성댐' '주암조정지댐' '충주댐'
 '간월호' '부남호' '팔당댐' '낙동강하구' '보현산댐' '성덕댐' '수어댐' '안계댐' '안동댐' '영천댐' '운문댐'
 '의암댐' '광포호' '봉포호' '송지호' '천진호' '화진포호' '소양강댐' '주암댐' '감포댐' '군위댐' '밀양댐' '평림댐'
 '횡성댐' '광동댐' '김천부항댐' '달방댐' '예당지' '나주댐' '영암호' '가창댐' '회야호' '광주댐' '구천댐' '남강댐'
 '연초댐' '영주댐' '청평댐' '경포호' '금호호' '대곡댐' '대암댐' '사연댐' '선암댐' '장흥댐' '합천댐' '고삼지'
 '괴산댐' '이동지' '주남저수지' '담양댐' '보성강댐' '남양호' '보문호' '서호' '아산호' '춘천댐' '매호' '영랑호'
 '청초호' '향호' '경천지' '대아지' '동화호' '화천댐' '금강하구' '탑정지' '대호' '삽교호' '신갈지' '광교지'
 '원천지' '평화의댐' '동복댐' '한탄강댐' '도암댐' '군남댐']


수역 리스트 정의

In [6]:
# ---------------------
# 대형 댐호 (dam)
# ---------------------
dam_list = [
    '대청댐1','대청댐2','대청댐3','대청댐4','대청댐5(대청호)','대청댐6',
    '섬진강댐1(옥정호)','섬진강댐2(옥정호)','섬진강댐3(옥정호)',
    '임하댐1','임하댐2','임하댐3',

    '보령댐1','보령댐2','보령댐3',
    '부안댐1','부안댐2','부안댐3',
    '용담댐1','용담댐2','용담댐3','용담댐4',
    '장성댐1','장성댐2',

    '충주댐1','충주댐2','충주댐3','충주댐4',
    '수어댐1','수어댐2',
    '안계댐',

    '안동댐1','안동댐2','안동댐3',

    '영천댐1(영천호)','영천댐2(영천호)',
    '운문댐1','운문댐2',

    '의암댐1','의암댐2','의암댐3',

    '팔당댐1','팔당댐2','팔당댐3','팔당댐4','팔당댐5',

    '소양강댐1','소양강댐2','소양강댐3','소양강댐4','소양강댐5',

    '주암댐1','주암댐2','주암댐3',

    '군위댐1','군위댐2',
    '밀양댐1','밀양댐2',

    '횡성댐1','횡성댐2','횡성댐3',

    '광동댐','김천부항댐','달방댐',

    '나주댐1','나주댐2',
    '가창댐1','가창댐2',
    '구천댐',

    '남강댐1(진양호)','남강댐2(진양호)','남강댐3(진양호)',

    '연초댐1','연초댐2',

    '영주댐1','영주댐2','영주댐3','영주댐4',

    '청평댐1','청평댐2','청평댐3',

    '대곡댐1','대곡댐2',
    '대암댐1','대암댐2',
    '사연댐1','사연댐2',

    '선암댐',

    '장흥댐1','장흥댐2','장흥댐3','장흥댐4',

    '합천댐1','합천댐2','합천댐3',

    '괴산댐1','괴산댐2','괴산댐3',

    '담양댐1','담양댐2',

    '보성강댐1','보성강댐2',

    '화천댐1(파로호)','화천댐2(파로호)','화천댐3(파로호)',

    '춘천댐1','춘천댐2','춘천댐3',

    '한탄강댐','도암댐','군남댐','보현산댐','감포댐',

    '광주댐1','광주댐2',
    '회야호1','회야호2'
]


# ---------------------
# 조정지댐 (regulating)
# ---------------------
regulating_list = [
    '충주조정지댐1','충주조정지댐2',
    '주암조정지댐1(상사호)','주암조정지댐3(상사호)'
]


# ---------------------
# 자연호 (natural)
# ---------------------
natural_list = [
    '광포호','봉포호','송지호','천진호','화진포호',
    '영랑호','경포호1','경포호2','청초호',
    '서호1','서호2','서호3'
]


# ---------------------
# 인공저수지 (artificial)
# ---------------------
artificial_list = [
    '예당지1','예당지2','예당지3',
    '고삼지1','고삼지2','고삼지3',

    '경천지1','경천지2',
    '대아지1','대아지2','대아지3',

    '이동지1','이동지2',

    '향호','매호',

    '평림댐',

    '금호호1','금호호2','금호호3',

    '주남저수지',

    '보문호1','보문호2',

    '원천지1','원천지2','원천지3',
    '광교지1','광교지2',

    '아산호1(평택호)','아산호2(평택호)','아산호3(평택호)',

    '탑정지1(논산지)','탑정지2(논산지)',

    '동화호'
]


# ---------------------
# 간척호 (lagoon)
# ---------------------
lagoon_list = [
    '영암호1','영암호2','영암호3',
    '부남호1','부남호2','부남호3',
    '간월호1','간월호2','간월호3',
    '남양호1','남양호2','남양호3',
    '대호1','대호2','대호3',
    '삽교호1','삽교호2','삽교호3'
]


# ---------------------
# 하구호 (estuary)
# ---------------------
estuary_list = [
    '금강하구1','금강하구2','금강하구3',
    '낙동강하구1','낙동강하구2','낙동강하구3',
    '영산호1','영산호2','영산호3'
]

측정소 선택

In [7]:
# 호소 유형 분류 함수
def classify_lake(name):
    if name in dam_list:
        return 'dam'
    elif name in natural_list:
        return 'natural'
    elif name in regulating_list:
        return 'regulating'
    elif name in artificial_list:
        return 'artificial'
    elif name in lagoon_list:
        return 'lagoon'
    elif name in estuary_list:
        return 'estuary'
    else:
        return 'unknown'

# total_df에 새로운 컬럼 추가
total_df["호소유형"] = total_df["측정소명"].apply(classify_lake)

# 결과 확인
print(total_df["호소유형"].value_counts())

호소유형
dam           35307
artificial     5118
lagoon         2604
estuary        1792
natural        1062
unknown         965
regulating      928
Name: count, dtype: int64


In [8]:
selected_types = ["dam", "regulating"]
total_df = total_df[ total_df["호소유형"].isin(selected_types) ].copy()

In [9]:
print("최종 선택된 호소유형 분포:")
print(total_df["호소유형"].value_counts())
print("최종 선택된 측정소 개수:", total_df["측정소명"].nunique())

최종 선택된 호소유형 분포:
호소유형
dam           35307
regulating      928
Name: count, dtype: int64
최종 선택된 측정소 개수: 121


In [10]:
# 측정소명에서 숫자 + 괄호 제거 → 댐 이름만 추출
total_df['댐명'] = (
    total_df['측정소명']
    .str.replace(r'\d+', '', regex=True)        # 숫자 제거
    .str.replace(r'\(.*?\)', '', regex=True)    # 괄호 내용 제거
    .str.strip()                                # 공백 제거
)

# 중복 없는 댐 목록 추출
dam_list = total_df['댐명'].unique()

print(dam_list)

['대청댐' '섬진강댐' '임하댐' '충주조정지댐' '보령댐' '부안댐' '용담댐' '장성댐' '주암조정지댐' '충주댐' '팔당댐'
 '보현산댐' '수어댐' '안계댐' '안동댐' '영천댐' '운문댐' '의암댐' '소양강댐' '주암댐' '감포댐' '군위댐' '밀양댐'
 '횡성댐' '광동댐' '김천부항댐' '달방댐' '나주댐' '가창댐' '회야호' '광주댐' '구천댐' '남강댐' '연초댐' '영주댐'
 '청평댐' '대곡댐' '대암댐' '사연댐' '선암댐' '장흥댐' '합천댐' '괴산댐' '담양댐' '보성강댐' '춘천댐' '화천댐'
 '한탄강댐' '도암댐' '군남댐']


### GEE

In [11]:
# 서버 사용 시 직접 터미널로 인증 진행

# 기존 인증 파일 삭제(꼬였을 수 있으니 정리)
# rm ~/.config/earthengine/credentials

# 터미널에서 인증 실행
# earthengine authenticate --auth_mode=notebook

In [12]:
# 최초 1회만 필요 (브라우저 인증)
ee.Authenticate()

ee.Initialize()

### matching

In [13]:
 # DMS->DD (이미 DD면 통과)
def dms_to_dd(v):
    try:
        if isinstance(v, (int, float)):
            return float(v)
        s = str(v).strip()
        if re.match(r'^\d+(\.\d+)?$', s):
            return float(s)
        parts = re.split('[°\'"]+', s)
        parts = [p for p in parts if p]
        deg, minute, sec = map(float, parts[:3])
        return deg + minute/60 + sec/3600
    except Exception:
        return None

ImageCollection 정의

In [14]:
# sentinel-2
S2 = ee.ImageCollection("COPERNICUS/S2_SR_HARMONIZED")

# landsat
L8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2")
L9 = ee.ImageCollection("LANDSAT/LC09/C02/T1_L2")
Landsat = L8.merge(L9)

# 결과 저장용 리스트
results = []

불러오기 (날짜별)

In [35]:
for date, group in total_df.groupby("년/월/일"):

    obs_date = pd.to_datetime(date)
    start = obs_date.strftime("%Y-%m-%d")
    end   = (obs_date + timedelta(days=1)).strftime("%Y-%m-%d")

    # -----------------------------------
    # 관측 지점 → FeatureCollection
    # -----------------------------------
    features = []
    for idx, row in group.iterrows():
        lon = dms_to_dd(row["경도"])
        lat = dms_to_dd(row["위도"])

        pt = ee.Geometry.Point([lon, lat])
        feat = ee.Feature(pt, {"idx": idx})
        features.append(feat)

    fc = ee.FeatureCollection(features)

    # -----------------------------------
    # Sentinel-2
    # -----------------------------------
    s2_img = (S2
              .filterBounds(fc)
              .filterDate(start, end)
              .sort("system:time_start", False)
              .first())

    if s2_img.getInfo() is None:
        print(date, "→ Sentinel-2 없음")
        continue

    s2_selected = s2_img.select([
        "SCL",
        "B1","B2","B3","B4","B5","B6","B7",
        "B8","B8A","B9","B11","B12"
    ])

    s2_reduced = s2_selected.reduceRegions(
        collection = fc,
        reducer = ee.Reducer.first(),
        scale = 10
    )

    s2_rows = s2_reduced.getInfo()["features"]

    # -----------------------------------
    # Landsat
    # -----------------------------------
    lst_img = (Landsat
               .filterBounds(fc)
               .filterDate(start, end)
               .sort("system:time_start", False)
               .first())

    # Landsat 없으면 해당 날짜 전체 제거
    if lst_img.getInfo() is None:
        print(date, "→ Landsat 없음")
        continue

    # ST_B10 → 섭씨 변환
    lst_selected = (
        lst_img.select("ST_B10")
        .multiply(0.00341802)
        .add(149.0)
        .subtract(273.15)
        .rename("LST")
    )

    lst_reduced = lst_selected.reduceRegions(
        collection = fc,
        reducer = ee.Reducer.first(),
        scale = 30
    )

    # LST가 있는 지점만 저장
    lst_rows = {
        f["properties"]["idx"]: f["properties"]["LST"]
        for f in lst_reduced.getInfo()["features"]
        if "LST" in f["properties"]
    }
    # lst_rows = {
    #     f["properties"]["idx"]: f["properties"]["LST"]
    #     for f in lst_reduced.getInfo()["features"]
    # }

    # -----------------------------------
    # Sentinel + Landsat 병합
    # -----------------------------------
    for item in s2_rows:
        props = item["properties"]
        idx = props["idx"]

        # SCL 기반 수질 픽셀 필터
        scl = props.get("SCL", None)
        if scl not in [2, 6, 7]:
            continue

        # Landsat LST 없는 지점 제거
        if idx not in lst_rows:
            continue

        out = total_df.loc[idx].to_dict()

        # Sentinel-2 밴드 추가
        out.update(props)

        # Landsat LST 추가
        out["LST"] = lst_rows.get(idx, None)

        # 메타정보
        out["image_id_S2"] = s2_img.get("system:index").getInfo()
        out["image_id_Landsat"] = lst_img.get("system:index").getInfo()

        results.append(out)

df_out = pd.DataFrame(results)

2018/01/02 → Sentinel-2 없음
2018/01/03 → Sentinel-2 없음
2018/01/04 → Sentinel-2 없음
2018/01/05 → Sentinel-2 없음
2018/01/08 → Sentinel-2 없음
2018/01/09 → Sentinel-2 없음
2018/01/10 → Sentinel-2 없음
2018/01/12 → Sentinel-2 없음
2018/01/15 → Sentinel-2 없음
2018/01/19 → Sentinel-2 없음
2018/01/22 → Sentinel-2 없음
2018/01/23 → Sentinel-2 없음
2018/01/29 → Sentinel-2 없음
2018/02/01 → Sentinel-2 없음
2018/02/02 → Sentinel-2 없음
2018/02/05 → Sentinel-2 없음
2018/02/06 → Sentinel-2 없음
2018/02/08 → Sentinel-2 없음
2018/02/09 → Sentinel-2 없음
2018/02/12 → Sentinel-2 없음
2018/02/14 → Sentinel-2 없음
2018/02/19 → Sentinel-2 없음
2018/02/20 → Sentinel-2 없음
2018/02/22 → Landsat 없음
2018/02/26 → Sentinel-2 없음
2018/02/27 → Sentinel-2 없음
2018/02/28 → Sentinel-2 없음
2018/03/02 → Sentinel-2 없음
2018/03/05 → Sentinel-2 없음
2018/03/06 → Sentinel-2 없음
2018/03/07 → Sentinel-2 없음
2018/03/08 → Sentinel-2 없음
2018/03/09 → Landsat 없음
2018/03/12 → Sentinel-2 없음
2018/03/13 → Sentinel-2 없음
2018/03/14 → Sentinel-2 없음
2018/03/15 → Sentinel-2 없음
2018/03

In [39]:
results = []

for date, group in total_df.groupby("년/월/일"):

    obs_date = pd.to_datetime(date)
    start = obs_date.strftime("%Y-%m-%d")
    end   = (obs_date + timedelta(days=1)).strftime("%Y-%m-%d")

    # -----------------------------------
    # 관측 지점 → FeatureCollection
    # (Landsat용 buffer 적용)
    # -----------------------------------
    s2_features  = []
    lst_features = []

    for idx, row in group.iterrows():
        lon = dms_to_dd(row["경도"])
        lat = dms_to_dd(row["위도"])

        # Sentinel-2: point
        pt_s2 = ee.Geometry.Point([lon, lat])
        s2_features.append(
            ee.Feature(pt_s2, {"idx": idx})
        )

        # Landsat: buffer (60 m)
        pt_lst = ee.Geometry.Point([lon, lat]).buffer(60)
        lst_features.append(
            ee.Feature(pt_lst, {"idx": idx})
        )

    fc_s2  = ee.FeatureCollection(s2_features)
    fc_lst = ee.FeatureCollection(lst_features)

    # -----------------------------------
    # Sentinel-2
    # -----------------------------------
    s2_img = (S2
              .filterBounds(fc_s2)
              .filterDate(start, end)
              .sort("system:time_start", False)
              .first())

    if s2_img.getInfo() is None:
        print(date, "→ Sentinel-2 없음")
        continue

    s2_selected = s2_img.select([
        "SCL",
        "B1","B2","B3","B4","B5","B6","B7",
        "B8","B8A","B9","B11","B12"
    ])

    s2_reduced = s2_selected.reduceRegions(
        collection = fc_s2,
        reducer    = ee.Reducer.first(),
        scale      = 10
    )

    s2_rows = s2_reduced.getInfo()["features"]

    # -----------------------------------
    # Landsat LST
    # -----------------------------------
    lst_img = (Landsat
               .filterBounds(fc_lst)
               .filterDate(start, end)
               .sort("system:time_start", False)
               .first())

    if lst_img.getInfo() is None:
        print(date, "→ Landsat 없음")
        continue

    # ST_B10 → 섭씨
    lst_selected = (
        lst_img.select("ST_B10")
        .multiply(0.00341802)
        .add(149.0)
        .subtract(273.15)
        .rename("LST")
    )

    # buffer 영역 평균값 사용
    lst_reduced = lst_selected.reduceRegions(
        collection = fc_lst,
        reducer    = ee.Reducer.mean(),
        scale      = 30
    )

    # idx → LST 매핑
    lst_rows = {
        f["properties"]["idx"]: f["properties"]["LST"]
        for f in lst_reduced.getInfo()["features"]
        if "LST" in f["properties"] and f["properties"]["LST"] is not None
    }

    # -----------------------------------
    # Sentinel + Landsat 병합
    # -----------------------------------
    for item in s2_rows:
        props = item["properties"]
        idx   = props["idx"]

        # Sentinel-2 SCL 필터 (수체)
        scl = props.get("SCL", None)
        if scl not in [2, 6, 7]:
            continue

        # Landsat LST 없는 지점 제거
        if idx not in lst_rows:
            continue

        out = total_df.loc[idx].to_dict()

        # Sentinel-2 밴드
        out.update(props)

        # Landsat LST
        out["LST"] = lst_rows[idx]

        # 메타정보
        out["image_id_S2"]       = s2_img.get("system:index").getInfo()
        out["image_id_Landsat"]  = lst_img.get("system:index").getInfo()

        results.append(out)

df_out = pd.DataFrame(results)

2018/01/02 → Sentinel-2 없음
2018/01/03 → Sentinel-2 없음
2018/01/04 → Sentinel-2 없음
2018/01/05 → Sentinel-2 없음
2018/01/08 → Sentinel-2 없음
2018/01/09 → Sentinel-2 없음
2018/01/10 → Sentinel-2 없음
2018/01/12 → Sentinel-2 없음
2018/01/15 → Sentinel-2 없음
2018/01/19 → Sentinel-2 없음
2018/01/22 → Sentinel-2 없음
2018/01/23 → Sentinel-2 없음
2018/01/29 → Sentinel-2 없음
2018/02/01 → Sentinel-2 없음
2018/02/02 → Sentinel-2 없음
2018/02/05 → Sentinel-2 없음
2018/02/06 → Sentinel-2 없음
2018/02/08 → Sentinel-2 없음
2018/02/09 → Sentinel-2 없음
2018/02/12 → Sentinel-2 없음
2018/02/14 → Sentinel-2 없음
2018/02/19 → Sentinel-2 없음
2018/02/20 → Sentinel-2 없음
2018/02/22 → Landsat 없음
2018/02/26 → Sentinel-2 없음
2018/02/27 → Sentinel-2 없음
2018/02/28 → Sentinel-2 없음
2018/03/02 → Sentinel-2 없음
2018/03/05 → Sentinel-2 없음
2018/03/06 → Sentinel-2 없음
2018/03/07 → Sentinel-2 없음
2018/03/08 → Sentinel-2 없음
2018/03/09 → Landsat 없음
2018/03/12 → Sentinel-2 없음
2018/03/13 → Sentinel-2 없음
2018/03/14 → Sentinel-2 없음
2018/03/15 → Sentinel-2 없음
2018/03

In [43]:
import numpy as np
from datetime import timedelta

#### 이걸로 진행
results = []

for date, group in total_df.groupby("년/월/일"):

    obs_date = pd.to_datetime(date)

    # 날짜는 당일
    start = obs_date.strftime("%Y-%m-%d")
    end   = (obs_date + timedelta(days=1)).strftime("%Y-%m-%d")

    # -----------------------------------
    # 관측 지점 → FeatureCollection
    # Sentinel-2 / Landsat 분리
    # -----------------------------------
    s2_features  = []
    lst_features = []

    for idx, row in group.iterrows():
        lon = dms_to_dd(row["경도"])
        lat = dms_to_dd(row["위도"])

        # Sentinel-2: point
        pt_s2 = ee.Geometry.Point([lon, lat])
        s2_features.append(
            ee.Feature(pt_s2, {"idx": idx})
        )

        # Landsat: buffer (60 m)
        pt_lst = ee.Geometry.Point([lon, lat]).buffer(60)
        lst_features.append(
            ee.Feature(pt_lst, {"idx": idx})
        )

    fc_s2  = ee.FeatureCollection(s2_features)
    fc_lst = ee.FeatureCollection(lst_features)

    # -----------------------------------
    # Sentinel-2
    # -----------------------------------
    s2_img = (S2
              .filterBounds(fc_s2)
              .filterDate(start, end)
              .sort("system:time_start", False)
              .first())

    if s2_img.getInfo() is None:
        print(date, "→ Sentinel-2 없음")
        continue

    s2_selected = s2_img.select([
        "SCL",
        "B1","B2","B3","B4","B5","B6","B7",
        "B8","B8A","B9","B11","B12"
    ])

    s2_reduced = s2_selected.reduceRegions(
        collection = fc_s2,
        reducer    = ee.Reducer.first(),
        scale      = 10
    )

    s2_rows = s2_reduced.getInfo()["features"]

    # -----------------------------------
    # Landsat LST
    # -----------------------------------
    lst_img = (Landsat
               .filterBounds(fc_lst)
               .filterDate(start, end)
               .sort("system:time_start", False)
               .first())

    if lst_img.getInfo() is None:
        print(date, "→ Landsat 없음")
        continue

    # ST_B10 → 섭씨
    lst_selected = (
        lst_img.select("ST_B10")
        .multiply(0.00341802)
        .add(149.0)
        .subtract(273.15)
        .rename("LST")
    )

    # buffer 평균
    lst_reduced = lst_selected.reduceRegions(
        collection = fc_lst,
        reducer    = ee.Reducer.mean(),
        scale      = 30
    )

    # idx → LST 매핑 (null 제거)
    lst_rows = {
        f["properties"]["idx"]: f["properties"]["LST"]
        for f in lst_reduced.getInfo()["features"]
        if f["properties"].get("LST") is not None
    }

    # -----------------------------------
    # Sentinel + Landsat 병합
    # -----------------------------------
    for item in s2_rows:
        props = item["properties"]
        idx   = props["idx"]

        # Sentinel-2 수체 필터
        scl = props.get("SCL", None)
        if scl not in [6, 7]:   # 필요하면 [2,6,7]
            continue

        out = total_df.loc[idx].to_dict()

        # Sentinel-2 밴드
        out.update(props)

        # Landsat LST (있으면 쓰고, 없으면 NaN)
        if idx in lst_rows:
            out["LST"] = lst_rows[idx]
            out["LST_valid"] = 1
        else:
            out["LST"] = np.nan
            out["LST_valid"] = 0

        # 메타정보
        out["image_id_S2"]      = s2_img.get("system:index").getInfo()
        out["image_id_Landsat"] = lst_img.get("system:index").getInfo()

        results.append(out)

df_out = pd.DataFrame(results)


2018/01/02 → Sentinel-2 없음
2018/01/03 → Sentinel-2 없음
2018/01/04 → Sentinel-2 없음
2018/01/05 → Sentinel-2 없음
2018/01/08 → Sentinel-2 없음
2018/01/09 → Sentinel-2 없음
2018/01/10 → Sentinel-2 없음
2018/01/12 → Sentinel-2 없음
2018/01/15 → Sentinel-2 없음
2018/01/19 → Sentinel-2 없음
2018/01/22 → Sentinel-2 없음
2018/01/23 → Sentinel-2 없음
2018/01/29 → Sentinel-2 없음
2018/02/01 → Sentinel-2 없음
2018/02/02 → Sentinel-2 없음
2018/02/05 → Sentinel-2 없음
2018/02/06 → Sentinel-2 없음
2018/02/08 → Sentinel-2 없음
2018/02/09 → Sentinel-2 없음
2018/02/12 → Sentinel-2 없음
2018/02/14 → Sentinel-2 없음
2018/02/19 → Sentinel-2 없음
2018/02/20 → Sentinel-2 없음
2018/02/22 → Landsat 없음
2018/02/26 → Sentinel-2 없음
2018/02/27 → Sentinel-2 없음
2018/02/28 → Sentinel-2 없음
2018/03/02 → Sentinel-2 없음
2018/03/05 → Sentinel-2 없음
2018/03/06 → Sentinel-2 없음
2018/03/07 → Sentinel-2 없음
2018/03/08 → Sentinel-2 없음
2018/03/09 → Landsat 없음
2018/03/12 → Sentinel-2 없음
2018/03/13 → Sentinel-2 없음
2018/03/14 → Sentinel-2 없음
2018/03/15 → Sentinel-2 없음
2018/03

In [44]:
print(df_out.head())
print(len(df_out))
print(df_out.columns)

      분류번호       측정소명       년/월/일       회차            경도            위도   채수시각  \
0  1010B50       춘천댐1  2018/12/19      1회차   127°39'.28"  37°58'44.33"  12:43   
1  2018B10  남강댐3(진양호)  2019/03/11  1회차 상층부  127°58'38.4"   35°12'24.1"  10:30   
2  2018B10  남강댐3(진양호)  2019/03/11  1회차 중층부  127°58'38.4"   35°12'24.1"  10:30   
3  2018B10  남강댐3(진양호)  2019/03/11  1회차 하층부  127°58'38.4"   35°12'24.1"  10:30   
4  5101B10       장흥댐1  2019/04/03  1회차 상층부    126°52'57"    34°45'4.2"  10:00   

   클로로필 a(㎎/㎥)   댐명 호소유형  ...     B7   B8  B8A     B9  SCL    idx  LST  \
0          4.5  춘천댐  dam  ...  230.0  280  227  407.0    6  44900  NaN   
1          5.2  남강댐  dam  ...  120.0  136  126  116.0    6   7948  NaN   
2          5.6  남강댐  dam  ...  120.0  136  126  116.0    6   7953  NaN   
3          5.1  남강댐  dam  ...  120.0  136  126  116.0    6   7958  NaN   
4          9.2  장흥댐  dam  ...   70.0   66   68  101.0    6   8298  NaN   

   LST_valid                             image_id_S2        image_id

In [46]:
df_out

Unnamed: 0,분류번호,측정소명,년/월/일,회차,경도,위도,채수시각,클로로필 a(㎎/㎥),댐명,호소유형,...,B7,B8,B8A,B9,SCL,idx,LST,LST_valid,image_id_S2,image_id_Landsat
0,1010B50,춘천댐1,2018/12/19,1회차,"127°39'.28""","37°58'44.33""",12:43,4.5,춘천댐,dam,...,230.0,280,227,407.0,6,44900,,0,20181219T022109_20181219T022640_T52SCH,1_LC08_116034_20181219
1,2018B10,남강댐3(진양호),2019/03/11,1회차 상층부,"127°58'38.4""","35°12'24.1""",10:30,5.2,남강댐,dam,...,120.0,136,126,116.0,6,7948,,0,20190311T020651_20190311T020648_T52SCD,1_LC08_114036_20190311
2,2018B10,남강댐3(진양호),2019/03/11,1회차 중층부,"127°58'38.4""","35°12'24.1""",10:30,5.6,남강댐,dam,...,120.0,136,126,116.0,6,7953,,0,20190311T020651_20190311T020648_T52SCD,1_LC08_114036_20190311
3,2018B10,남강댐3(진양호),2019/03/11,1회차 하층부,"127°58'38.4""","35°12'24.1""",10:30,5.1,남강댐,dam,...,120.0,136,126,116.0,6,7958,,0,20190311T020651_20190311T020648_T52SCD,1_LC08_114036_20190311
4,5101B10,장흥댐1,2019/04/03,1회차 상층부,"126°52'57""","34°45'4.2""",10:00,9.2,장흥댐,dam,...,70.0,66,68,101.0,6,8298,,0,20190403T021651_20190403T021647_T52SBD,1_LC08_115036_20190403
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
188,4007B10,보성강댐1,2025/10/28,1회차 상층부,"127°8'41.64""","34°48'21.53""",11:25,11.8,보성강댐,dam,...,97.0,81,40,385.0,6,18575,,0,20251028T021821_20251028T022504_T52SCD,1_LC08_115036_20251028
189,4007B20,보성강댐2,2025/10/28,1회차 상층부,"127°7'38.64""","34°48'39.02""",11:00,5.8,보성강댐,dam,...,120.0,97,111,637.0,6,18576,,0,20251028T021821_20251028T022504_T52SCD,1_LC08_115036_20251028
190,4007B70,주암댐1,2025/10/28,3회차 상층부,"127°14'26.74""","35°3'23.78""",10:10,6.0,주암댐,dam,...,52.0,49,62,69.0,6,18577,,0,20251028T021821_20251028T022504_T52SCD,1_LC08_115036_20251028
191,4007B70,주암댐1,2025/10/28,3회차 중층부,"127°14'26.74""","35°3'23.78""",10:10,5.8,주암댐,dam,...,52.0,49,62,69.0,6,18578,,0,20251028T021821_20251028T022504_T52SCD,1_LC08_115036_20251028


In [37]:
# DataFrame을 pickle로 저장
df_out.to_csv("/home/khs/data/csv/hoso_reservoir_fast.csv")

In [38]:
df = pd.read_csv("/home/khs/data/csv/hoso_reservoir_fast.csv")
df

Unnamed: 0.1,Unnamed: 0,분류번호,측정소명,년/월/일,회차,경도,위도,채수시각,클로로필 a(㎎/㎥),댐명,...,B6,B7,B8,B8A,B9,SCL,idx,LST,image_id_S2,image_id_Landsat
0,0,1010B50,춘천댐1,2018/12/19,1회차,"127°39'.28""","37°58'44.33""",12:43,4.5,춘천댐,...,250,230.0,280,227,407.0,6,44900,,20181219T022109_20181219T022640_T52SCH,1_LC08_116034_20181219
1,1,2018B10,남강댐3(진양호),2019/03/11,1회차 상층부,"127°58'38.4""","35°12'24.1""",10:30,5.2,남강댐,...,117,120.0,136,126,116.0,6,7948,,20190311T020651_20190311T020648_T52SCD,1_LC08_114036_20190311
2,2,2018B10,남강댐3(진양호),2019/03/11,1회차 중층부,"127°58'38.4""","35°12'24.1""",10:30,5.6,남강댐,...,117,120.0,136,126,116.0,6,7953,,20190311T020651_20190311T020648_T52SCD,1_LC08_114036_20190311
3,3,2018B10,남강댐3(진양호),2019/03/11,1회차 하층부,"127°58'38.4""","35°12'24.1""",10:30,5.1,남강댐,...,117,120.0,136,126,116.0,6,7958,,20190311T020651_20190311T020648_T52SCD,1_LC08_114036_20190311
4,4,5101B10,장흥댐1,2019/04/03,1회차 상층부,"126°52'57""","34°45'4.2""",10:00,9.2,장흥댐,...,76,70.0,66,68,101.0,6,8298,,20190403T021651_20190403T021647_T52SBD,1_LC08_115036_20190403
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
147,147,2015B20,합천댐2,2024/06/10,1회차 중층부,"128°2'22.4""","35°33'42""",10:30,0.9,합천댐,...,0,0.0,0,0,0.0,6,48023,,20240610T021539_20240610T021535_T52SDE,1_LC08_116034_20240610
148,148,2015B10,합천댐3,2024/06/10,1회차 중층부,"128°1'58.8""","35°36'4.6""",10:00,0.9,합천댐,...,444,414.0,328,356,209.0,6,48024,,20240610T021539_20240610T021535_T52SDE,1_LC08_116034_20240610
149,149,2015B30,합천댐1,2024/06/10,1회차 하층부,"128°1'35.4""","35°32'2.2""",11:00,0.9,합천댐,...,0,0.0,0,0,0.0,6,48041,,20240610T021539_20240610T021535_T52SDE,1_LC08_116034_20240610
150,150,2015B20,합천댐2,2024/06/10,1회차 하층부,"128°2'22.4""","35°33'42""",10:30,0.9,합천댐,...,0,0.0,0,0,0.0,6,48042,,20240610T021539_20240610T021535_T52SDE,1_LC08_116034_20240610


임시 매칭

In [47]:
import pandas as pd
import numpy as np
import ee
from datetime import timedelta

# Earth Engine 초기화 (이미 했으면 생략)
ee.Initialize()

# --------------------------------------------------
# 1. CSV 불러오기
# --------------------------------------------------
df = pd.read_csv("/home/khs/data/csv/hoso_reservoir_fast.csv")

# 날짜 컬럼 datetime 변환
df["년/월/일"] = pd.to_datetime(df["년/월/일"])

# 결과 저장용 리스트
results = []

# --------------------------------------------------
# 2. Landsat Collection (예시: Landsat 8/9 C2 L2)
# --------------------------------------------------
Landsat = (
    ee.ImageCollection("LANDSAT/LC08/C02/T1_L2")
    .merge(ee.ImageCollection("LANDSAT/LC09/C02/T1_L2"))
)

# --------------------------------------------------
# 3. 날짜별 그룹 처리
# --------------------------------------------------
for date, group in df.groupby("년/월/일"):

    start = date.strftime("%Y-%m-%d")
    end   = (date + timedelta(days=1)).strftime("%Y-%m-%d")

    # ----------------------------------------------
    # 3-1. FeatureCollection 생성 (buffer 적용)
    # ----------------------------------------------
    features = []

    for idx, row in group.iterrows():
        lon = dms_to_dd(row["경도"])
        lat = dms_to_dd(row["위도"])

        geom = ee.Geometry.Point([lon, lat]).buffer(60)
        feat = ee.Feature(geom, {"idx": idx})
        features.append(feat)

    fc = ee.FeatureCollection(features)

    # ----------------------------------------------
    # 3-2. Landsat 영상 선택 (당일 only)
    # ----------------------------------------------
    lst_img = (
        Landsat
        .filterBounds(fc)
        .filterDate(start, end)
        .sort("system:time_start", False)
        .first()
    )

    if lst_img.getInfo() is None:
        print(date.date(), "→ Landsat 없음")
        # 날짜에 해당하는 행 전부 NaN 처리
        for idx in group.index:
            out = df.loc[idx].to_dict()
            out["LST"] = np.nan
            out["LST_valid"] = 0
            out["image_id_Landsat"] = None
            results.append(out)
        continue

    # ----------------------------------------------
    # 3-3. ST_B10 → 섭씨 변환
    # ----------------------------------------------
    lst_img = (
        lst_img.select("ST_B10")
        .multiply(0.00341802)
        .add(149.0)
        .subtract(273.15)
        .rename("LST")
    )

    # ----------------------------------------------
    # 3-4. reduceRegions (buffer 평균)
    # ----------------------------------------------
    reduced = lst_img.reduceRegions(
        collection=fc,
        reducer=ee.Reducer.mean(),
        scale=30
    ).getInfo()["features"]

    # idx → LST 매핑
    lst_map = {
        f["properties"]["idx"]: f["properties"].get("LST")
        for f in reduced
    }

    # ----------------------------------------------
    # 3-5. 결과 병합
    # ----------------------------------------------
    for idx in group.index:
        out = df.loc[idx].to_dict()

        lst_val = lst_map.get(idx)

        if lst_val is not None:
            out["LST"] = lst_val
            out["LST_valid"] = 1
        else:
            out["LST"] = np.nan
            out["LST_valid"] = 0

        out["image_id_Landsat"] = lst_img.get("system:index").getInfo()

        results.append(out)

# --------------------------------------------------
# 4. DataFrame 생성
# --------------------------------------------------
df_out = pd.DataFrame(results)

2019-03-11 → Landsat 없음
2019-09-17 → Landsat 없음
2019-11-04 → Landsat 없음
2020-06-08 → Landsat 없음
2021-12-13 → Landsat 없음
2022-02-21 → Landsat 없음
2022-04-12 → Landsat 없음
2022-05-04 → Landsat 없음
2022-06-21 → Landsat 없음
2022-07-01 → Landsat 없음
2022-10-11 → Landsat 없음
2023-03-13 → Landsat 없음
2023-03-20 → Landsat 없음
2023-07-26 → Landsat 없음
2024-01-04 → Landsat 없음
2024-04-01 → Landsat 없음
2024-06-10 → Landsat 없음


In [48]:
df_out

Unnamed: 0.1,Unnamed: 0,분류번호,측정소명,년/월/일,회차,경도,위도,채수시각,클로로필 a(㎎/㎥),댐명,...,B7,B8,B8A,B9,SCL,idx,LST,image_id_S2,image_id_Landsat,LST_valid
0,0,1010B50,춘천댐1,2018-12-19,1회차,"127°39'.28""","37°58'44.33""",12:43,4.5,춘천댐,...,230.0,280,227,407.0,6,44900,,20181219T022109_20181219T022640_T52SCH,,0
1,1,2018B10,남강댐3(진양호),2019-03-11,1회차 상층부,"127°58'38.4""","35°12'24.1""",10:30,5.2,남강댐,...,120.0,136,126,116.0,6,7948,,20190311T020651_20190311T020648_T52SCD,,0
2,2,2018B10,남강댐3(진양호),2019-03-11,1회차 중층부,"127°58'38.4""","35°12'24.1""",10:30,5.6,남강댐,...,120.0,136,126,116.0,6,7953,,20190311T020651_20190311T020648_T52SCD,,0
3,3,2018B10,남강댐3(진양호),2019-03-11,1회차 하층부,"127°58'38.4""","35°12'24.1""",10:30,5.1,남강댐,...,120.0,136,126,116.0,6,7958,,20190311T020651_20190311T020648_T52SCD,,0
4,4,5101B10,장흥댐1,2019-04-03,1회차 상층부,"126°52'57""","34°45'4.2""",10:00,9.2,장흥댐,...,70.0,66,68,101.0,6,8298,,20190403T021651_20190403T021647_T52SBD,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
147,147,2015B20,합천댐2,2024-06-10,1회차 중층부,"128°2'22.4""","35°33'42""",10:30,0.9,합천댐,...,0.0,0,0,0.0,6,48023,,20240610T021539_20240610T021535_T52SDE,,0
148,148,2015B10,합천댐3,2024-06-10,1회차 중층부,"128°1'58.8""","35°36'4.6""",10:00,0.9,합천댐,...,414.0,328,356,209.0,6,48024,,20240610T021539_20240610T021535_T52SDE,,0
149,149,2015B30,합천댐1,2024-06-10,1회차 하층부,"128°1'35.4""","35°32'2.2""",11:00,0.9,합천댐,...,0.0,0,0,0.0,6,48041,,20240610T021539_20240610T021535_T52SDE,,0
150,150,2015B20,합천댐2,2024-06-10,1회차 하층부,"128°2'22.4""","35°33'42""",10:30,0.9,합천댐,...,0.0,0,0,0.0,6,48042,,20240610T021539_20240610T021535_T52SDE,,0
