In [1]:
# 必要なライブラリのインポート
import sys
import numpy as np
import pandas as pd
from pathlib import Path

In [2]:
# config 読み込み
base_dir = Path.cwd().parent.parent
config_path = base_dir / "config"
sys.path.append(str(config_path))

from config import (
    RANDOM_SEED,
    FEATURE_RANGES, # クラス別の温度レンジ定義
    NOISE           # 各統計量のノイズ/スピルオーバー設定
)

rng = np.random.default_rng(RANDOM_SEED)

In [3]:
# パスを取得し、cdvデータの読込み込み
csv_dir = base_dir / "assets" / "csv_data"
input_path  = csv_dir / "八王子霊園_航空写真からVARI値検出結果.csv"
output_path = csv_dir / "八王子霊園_葉音・VARI・座標結合済み.csv"
df = pd.read_csv(input_path)

In [4]:
# ========= クラス名の正規化（想定：危険/要注意/健康/N/A）=========
# config.FEATURE_RANGES のキーと一致させる
CLASS_COL = "class"
valid_classes = set(FEATURE_RANGES.keys())  # 例：{"危険","要注意","健康","N/A"}

# 入力に想定外のクラスがあっても落とさない：未知は"N/A"扱いに寄せる
df[CLASS_COL] = df[CLASS_COL].astype(str).str.strip()
df[CLASS_COL] = df[CLASS_COL].where(df[CLASS_COL].isin(valid_classes), "N/A")

# ========= 乱数生成ユーティリティ =========
def sample_uniform(size, lo, hi):
    return rng.uniform(lo, hi, size=size)

def add_gaussian_noise(arr, sigma):
    if sigma and sigma > 0:
        return arr + rng.normal(0.0, sigma, size=len(arr))
    return arr

def apply_spillover(arr, rate, sigma):
    """一部サンプルに追加ノイズを与える（外乱の混入を模擬）"""
    if not rate or rate <= 0:
        return arr
    k = int(rate * len(arr))
    if k <= 0:
        return arr
    idx = rng.choice(len(arr), size=k, replace=False)
    if sigma and sigma > 0:
        arr[idx] = arr[idx] + rng.normal(0.0, sigma, size=k)
    return arr

def clip_physical(arr, lo, hi):
    if lo is not None or hi is not None:
        return np.clip(arr, lo if lo is not None else -np.inf,
                             hi if hi is not None else  np.inf)
    return arr

# ========= 統計量をクラス別に生成 =========
n = len(df)
out_cols = ["leaf_temp_mean", "leaf_temp_min", "leaf_temp_max", "leaf_temp_median", "leaf_temp_std"]
for c in out_cols:
    df[c] = np.nan  # まず空（NaN）で作る

# 物理的な安全レンジ（config 側で明示）
phys_lo = NOISE.get("physical_clip_lo", 15.0)
phys_hi = NOISE.get("physical_clip_hi", 50.0)

# 各クラスごとに生成
for cls_name, cls_range in FEATURE_RANGES.items():
    mask = (df[CLASS_COL] == cls_name)
    if not mask.any():
        continue

    # N/A は空欄維持（= 何も埋めない）
    if cls_name == "N/A":
        continue

    # ---- レンジ取得（統計量ごとに設定可。無ければ mean の幅を基準に組み立て）----
    # 期待値の中心帯レンジ（mean）
    mean_lo, mean_hi = cls_range["leaf_temp_mean"]

    # min/max/median/std のレンジ（設定なければ派生）
    min_lo,  min_hi  = cls_range.get("leaf_temp_min",  (mean_lo - 3.0, mean_lo - 0.5))
    max_lo,  max_hi  = cls_range.get("leaf_temp_max",  (mean_hi + 0.5, mean_hi + 3.0))
    med_lo,  med_hi  = cls_range.get("leaf_temp_median", (mean_lo - 0.5, mean_hi + 0.5))
    std_lo,  std_hi  = cls_range.get("leaf_temp_std",  (0.1, 2.5))  # 温度のばらつき幅を素朴に想定

    # ---- 乱数サンプリング（各列）----
    size = mask.sum()
    vals_mean   = sample_uniform(size, mean_lo, mean_hi)
    vals_min    = sample_uniform(size, min_lo,  min_hi)
    vals_max    = sample_uniform(size, max_lo,  max_hi)
    vals_median = sample_uniform(size, med_lo,  med_hi)
    vals_std    = sample_uniform(size, std_lo,  std_hi)

    # ---- ノイズ付与（ガウス）----
    vals_mean   = add_gaussian_noise(vals_mean,   float(NOISE.get("leaf_temp_mean",   0.0) or 0.0))
    vals_min    = add_gaussian_noise(vals_min,    float(NOISE.get("leaf_temp_min",    0.0) or 0.0))
    vals_max    = add_gaussian_noise(vals_max,    float(NOISE.get("leaf_temp_max",    0.0) or 0.0))
    vals_median = add_gaussian_noise(vals_median, float(NOISE.get("leaf_temp_median", 0.0) or 0.0))
    vals_std    = add_gaussian_noise(vals_std,    float(NOISE.get("leaf_temp_std",    0.0) or 0.0))

    # ---- スピルオーバー（外乱混入）----
    spill_rate = float(NOISE.get("spillover_rate", 0.0) or 0.0)
    spill_sig  = float(NOISE.get("leaf_temp_spill_sigma", 0.0) or 0.0)
    vals_mean   = apply_spillover(vals_mean,   spill_rate, spill_sig)
    vals_min    = apply_spillover(vals_min,    spill_rate, spill_sig)
    vals_max    = apply_spillover(vals_max,    spill_rate, spill_sig)
    vals_median = apply_spillover(vals_median, spill_rate, spill_sig)
    vals_std    = apply_spillover(vals_std,    spill_rate, spill_sig)

    # ---- 物理レンジでクリップ ----
    vals_mean   = clip_physical(vals_mean,   phys_lo, phys_hi)
    vals_min    = clip_physical(vals_min,    phys_lo, phys_hi)
    vals_max    = clip_physical(vals_max,    phys_lo, phys_hi)
    vals_median = clip_physical(vals_median, phys_lo, phys_hi)

    # std は下限のみ（負にならないよう）
    std_lo_phys = max(0.0, float(NOISE.get("std_clip_lo", 0.0) or 0.0))
    std_hi_phys = float(NOISE.get("std_clip_hi", 10.0) or 10.0)
    vals_std    = clip_physical(vals_std, std_lo_phys, std_hi_phys)

    # ---- min <= median <= max の論理整合（簡易）----
    # 必要なら均し処理
    vals_min, vals_max = np.minimum(vals_min, vals_max), np.maximum(vals_min, vals_max)
    vals_median = np.clip(vals_median, vals_min, vals_max)
    # mean も極端に外れないよう軽く中央へ寄せる（任意）
    vals_mean = np.clip(vals_mean, vals_min, vals_max)

    # ---- 代入 ----
    df.loc[mask, "leaf_temp_mean"]   = vals_mean
    df.loc[mask, "leaf_temp_min"]    = vals_min
    df.loc[mask, "leaf_temp_max"]    = vals_max
    df.loc[mask, "leaf_temp_median"] = vals_median
    df.loc[mask, "leaf_temp_std"]    = vals_std

In [5]:
# ========= 出力（元CSVの全カラム + 新列5本）=========
df.to_csv(output_path, index=False, encoding="utf-8-sig")
print(f"CSVを出力完了: {output_path}")
display(df.head())

CSVを出力完了: c:\Users\kyous\OneDrive\デスクトップ\ハッカソン\tokyo-tree-doctor_program\tokyo-tree-doctor\ml\assets\csv_data\八王子霊園_葉音・VARI・座標結合済み.csv


Unnamed: 0,park_name,緯度,経度,class,VARI,VARI_mean,VARI_std,VARI_min,VARI_max,R_med,...,G_mean,B_mean,veg_ratio,n_mask,n_veg,leaf_temp_mean,leaf_temp_min,leaf_temp_max,leaf_temp_median,leaf_temp_std
0,八王子霊園,35.659925,139.260968,危険,0.042204,0.055463,0.033118,-0.019776,0.120428,174.0,...,178.283035,155.141891,1.0,1332,1332,34.242799,29.44264,36.562385,32.715195,0.67775
1,八王子霊園,35.659745,139.260969,危険,0.045946,0.077839,0.107718,-0.038145,0.914504,173.0,...,173.113892,152.587799,0.994156,1369,1361,32.58938,31.810642,37.11204,36.25503,1.091892
2,八王子霊園,35.659565,139.260971,健康,0.29592,0.437962,0.388074,-0.02111,2.0,110.5,...,126.561729,117.544754,1.0,1296,1296,26.95626,20.546631,28.636001,25.878957,0.942152
3,八王子霊園,35.660287,139.261186,危険,0.040918,0.054562,0.02428,0.020024,0.102625,173.0,...,178.306061,153.948135,1.0,1369,1369,34.94496,31.761909,34.94496,33.884481,1.856382
4,八王子霊園,35.660106,139.261188,危険,0.03187,0.032898,0.006193,0.020024,0.053582,181.0,...,182.154938,162.531219,0.9497,1332,1265,33.638312,30.208159,39.941651,32.069328,1.887114
