In [None]:
# ============================================================
# ライブラリのインストール（Colab なら必要に応じて）
# ============================================================
# ＊ローカル環境の場合は、事前に `pip install lightgbm` を実行してください。
# !pip -q install lightgbm


In [None]:
# ============================================================
# 1) 準備：ライブラリ読み込みと乱数シード設定
# ============================================================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from lightgbm import LGBMRegressor
import os

RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)

# 図の保存先
os.makedirs("fig", exist_ok=True)


In [None]:
# ============================================================
# 2) 真の関数：高温側で線形を超えて増加（化学反応っぽい形）
# ============================================================
def build_true_function(a=8.0, b=0.03):
    # 線形 + 弱い指数項（60℃以降で効き始める）。0～100 に正規化。
    x_grid = np.linspace(0, 100, 1001)
    y_raw_grid = x_grid + a * (np.exp(b * (x_grid - 60.0)) - 1.0)
    ymin, ymax = y_raw_grid.min(), y_raw_grid.max()
    def f(x):
        x = np.asarray(x)
        y_raw = x + a * (np.exp(b * (x - 60.0)) - 1.0)
        return 100.0 * (y_raw - ymin) / (ymax - ymin)
    return f

true_func = build_true_function(a=8.0, b=0.03)
x_val = np.linspace(0, 100, 501)
y_val_true = true_func(x_val)


In [None]:
# ============================================================
# 3) 学習データ（20～40℃のみ）＋微小ノイズ
# ============================================================
n_train = 80
x_train = np.random.uniform(20, 40, size=n_train)
noise_std = 1.0
y_train = true_func(x_train) + np.random.normal(0.0, noise_std, size=n_train)

# DataFrame は任意で確認
import pandas as pd
train_df = pd.DataFrame({"x_C": x_train, "Y_observed": y_train})
val_df   = pd.DataFrame({"x_C": x_val,   "Y_true": y_val_true})
train_df.head()


In [None]:
# ============================================================
# 4) 学習：線形回帰と LightGBM（特徴量は温度 x のみ）
# ============================================================
X_train = x_train.reshape(-1, 1)

lin = LinearRegression().fit(X_train, y_train)
lgbm = LGBMRegressor(n_estimators=500, learning_rate=0.05, random_state=42).fit(X_train, y_train)

X_val = x_val.reshape(-1, 1)
y_pred_lin  = lin.predict(X_val)
y_pred_lgbm = lgbm.predict(X_val)


In [None]:
# ============================================================
# 5) 内挿域（20–40℃）と外挿域（それ以外）で MSE を比較
# ============================================================
in_mask  = (x_val >= 20) & (x_val <= 40)
out_mask = ~in_mask

mse_lin_in   = mean_squared_error(y_val_true[in_mask],  y_pred_lin[in_mask])
mse_lin_out  = mean_squared_error(y_val_true[out_mask], y_pred_lin[out_mask])
mse_lgb_in   = mean_squared_error(y_val_true[in_mask],  y_pred_lgbm[in_mask])
mse_lgb_out  = mean_squared_error(y_val_true[out_mask], y_pred_lgbm[out_mask])

import pandas as pd
result_df = pd.DataFrame({
    "Model": ["LinearRegression", "LightGBM", "LinearRegression", "LightGBM"],
    "Region": ["In-domain (20-40C)", "In-domain (20-40C)", "Out-of-domain", "Out-of-domain"],
    "MSE": [mse_lin_in, mse_lgb_in, mse_lin_out, mse_lgb_out]
})
result_df


In [None]:
# ============================================================
# 6) 可視化１：真の関数と学習データ
# ============================================================
import matplotlib.pyplot as plt
plt.figure(figsize=(7, 4.5))
plt.plot(x_val, y_val_true, label="True function", linewidth=2)
plt.scatter(x_train, y_train, label="Train (noisy)", alpha=0.7)
plt.axvspan(20, 40, alpha=0.12, label="Train domain [20, 40]")
plt.title("Ground truth and training data")
plt.xlabel("Temperature (C)")
plt.ylabel("Reaction rate (arbitrary, ~0-100)")
plt.legend()
plt.grid(True)
plt.savefig("fig/fig1_ground_truth.png", dpi=150, bbox_inches="tight")
plt.show()


In [None]:
# ============================================================
# 7) 可視化２：0～100℃の予測比較（内挿 vs 外挿）
# ============================================================
plt.figure(figsize=(7, 4.5))
plt.plot(x_val, y_val_true, label="True function", linewidth=2)
plt.plot(x_val, y_pred_lin,  label="Linear Regression", linestyle="--")
plt.plot(x_val, y_pred_lgbm, label="LightGBM", linestyle=":")
plt.axvspan(20, 40, alpha=0.12, label="Train domain [20, 40]")
plt.title("Interpolation vs Extrapolation")
plt.xlabel("Temperature (C)")
plt.ylabel("Predicted reaction rate")
plt.legend()
plt.grid(True)
plt.savefig("fig/fig2_interp_vs_extrap.png", dpi=150, bbox_inches="tight")
plt.show()


In [None]:
# ============================================================
# 8) 可視化３：絶対誤差の比較
# ============================================================
abs_err_lin  = np.abs(y_pred_lin  - y_val_true)
abs_err_lgbm = np.abs(y_pred_lgbm - y_val_true)
plt.figure(figsize=(7, 4.5))
plt.plot(x_val, abs_err_lin,  label="Abs error: Linear Regression", linestyle="--")
plt.plot(x_val, abs_err_lgbm, label="Abs error: LightGBM", linestyle=":")
plt.axvspan(20, 40, alpha=0.12, label="Train domain [20, 40]")
plt.title("Absolute error across temperature")
plt.xlabel("Temperature (C)")
plt.ylabel("Absolute error")
plt.legend()
plt.grid(True)
plt.savefig("fig/fig3_abs_error.png", dpi=150, bbox_inches="tight")
plt.show()


In [None]:
# ============================================================
# 9) まとめ（テキスト出力）
# ============================================================
print("【まとめ】")
print("・学習は20–40℃のみに限定。真の関数は高温ほど超線形に増加。")
print("・内挿域では LightGBM も線形回帰も良好。")
print("・外挿域では LightGBM は値が飽和しやすく、線形回帰は直線外挿で高温を過小予測。")
print("・“内挿に強く外挿に弱い”が誤差と図で確認できる。")
