In [14]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

In [5]:
csv_path = "data/processed/waob_features_states.csv" # adapte le chemin si besoin
df = pd.read_csv(csv_path)

df = df[(df["year"] >= 1988) & (df["year"] <= 2012)]
df = df[df["state"] == "US"]

feature_cols = ["trend", "jun_shortfall", "temp_JA", "prec_JA", "prec_JA_sq"]
target_col = "yield_bu_acre"

keep = [target_col] + feature_cols
df_model = df[keep].dropna().copy()

y = df_model[target_col]
X = df_model[feature_cols]

X = sm.add_constant(X, has_constant="add")  # force la constante
model = sm.OLS(y, X).fit()

print("\n=== Summary model (year ≤ 2012) ===")
print(model.summary())

metrics = {
    "n_obs": int(model.nobs),
    "r2": model.rsquared,
    "r2_adj": model.rsquared_adj,
    "rmse": np.sqrt(model.mse_resid),
}
#pd.Series(metrics).to_csv("waob_ols_metrics_upto_2013.csv")


=== Summary model (year ≤ 2012) ===
                            OLS Regression Results                            
Dep. Variable:          yield_bu_acre   R-squared:                       0.859
Model:                            OLS   Adj. R-squared:                  0.822
Method:                 Least Squares   F-statistic:                     23.17
Date:                Wed, 08 Oct 2025   Prob (F-statistic):           1.77e-07
Time:                        14:55:59   Log-Likelihood:                -49.265
No. Observations:                  25   AIC:                             110.5
Df Residuals:                      19   BIC:                             117.8
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
const    

In [9]:

df = pd.read_csv(csv_path)

df = df[(df["year"] >= 1988) & (df["year"] <= 2019)]
df = df[df["state"] == "US"]

feature_cols = ["trend", "jun_shortfall", "temp_JA", "prec_JA", "prec_JA_sq", "dummy_2003"]
target_col = "yield_bu_acre"

keep = [target_col] + feature_cols
df_model = df[keep].dropna().copy()

y = df_model[target_col]
X = df_model[feature_cols]

X = sm.add_constant(X, has_constant="add")  # force la constante
model = sm.OLS(y, X).fit()

print("\n=== Summary model (year ≤ 2019) ===")
print(model.summary())

metrics = {
    "n_obs": int(model.nobs),
    "r2": model.rsquared,
    "r2_adj": model.rsquared_adj,
    "rmse": np.sqrt(model.mse_resid),
}
#pd.Series(metrics).to_csv("waob_ols_metrics_upto_2013.csv")


=== Summary model (year ≤ 2019) ===
                            OLS Regression Results                            
Dep. Variable:          yield_bu_acre   R-squared:                       0.920
Model:                            OLS   Adj. R-squared:                  0.900
Method:                 Least Squares   F-statistic:                     47.60
Date:                Wed, 08 Oct 2025   Prob (F-statistic):           1.76e-12
Time:                        14:56:38   Log-Likelihood:                -64.874
No. Observations:                  32   AIC:                             143.7
Df Residuals:                      25   BIC:                             154.0
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
const    

In [10]:
df_out = df.loc[df_model.index, ["year", target_col]].copy()
df_out["fitted"] = model.fittedvalues
df_out.sort_values("year", inplace=True)
#df_out.to_csv("waob_fitted_vs_actual_upto_2013.csv", index=False)

print("\nfitted vs actual :")
print(df_out.tail(10).to_string(index=False))


fitted vs actual :
 year  yield_bu_acre    fitted
 2010      49.342615 48.470373
 2011      46.647672 47.103504
 2012      40.166289 40.663494
 2013      45.624100 45.711230
 2014      50.448293 51.065069
 2015      51.397611 52.233955
 2016      56.114128 52.695390
 2017      53.909850 52.167776
 2018      57.331183 53.727259
 2019      50.740308 53.709997


In [15]:

YEAR_TO_PRED = 2020
Xcols = ["trend", "jun_shortfall", "temp_JA", "prec_JA", "prec_JA_sq", "dummy_2003"]

df = pd.read_csv(csv_path)
df = df[df["state"] == "US"]

row20 = df[df["year"] == YEAR_TO_PRED]
if row20.empty:
    raise ValueError(f"Aucune ligne {YEAR_TO_PRED} dans {csv_path}.")
X20 = sm.add_constant(row20[Xcols], has_constant="add")
yhat20_ols = float(model.predict(X20))
print(f"\nForecast Y={YEAR_TO_PRED} with find coefs (2020 weather) : {yhat20_ols:.2f} bu/ac")





FileNotFoundError: [Errno 2] No such file or directory: 'data/processed/waob_features_national.csv'

In [None]:
coef_paper = {
    "const": 59.173,
    "trend": 0.523,
    "dummy_2003": -5.884,
    "jun_shortfall": -0.867,
    "temp_JA": -0.503,
    "prec_JA": 4.142,
    "prec_JA_sq": -0.407,
}

sample = df[(df["year"] >= 1988) & (df["year"] <= 2019)]
mu_temp = float(sample["temp_JA"].mean())
mu_prec = float(sample["prec_JA"].mean())
mu_prec_sq = mu_prec ** 2
trend_2020 = YEAR_TO_PRED - 1987 

yhat20_paper_means = (
    coef_paper["const"]
    + coef_paper["trend"] * trend_2020
    + coef_paper["jun_shortfall"] * 0.0
    + coef_paper["temp_JA"] * mu_temp
    + coef_paper["prec_JA"] * mu_prec
    + coef_paper["prec_JA_sq"] * mu_prec_sq
    # dummy_2003 = 0
)
print(
    f"\n=== Variables utilisées pour la prédiction 2020 ===\n"
    f"Intercept = 1\n"
    f"trend = {trend_2020}\n"
    f"jun_shortfall = 0.0\n"
    f"temp_JA = {mu_temp:.2f}\n"
    f"prec_JA = {mu_prec:.2f}\n"
    f"prec_JA_sq = {mu_prec_sq:.2f}\n"
    f"dummy_2003 = 0\n"
)
print(f"Prédiction {YEAR_TO_PRED} COEFFS PAPIER avec MOYENNES (sans ajustement non-linéaire) : {yhat20_paper_means:.1f} bu/ac")

# Optionnel : ajustement non-linéaire WAOB (~ -0.12 bu/ac selon farmdoc pour 2020)
yhat20_paper_means_adj = yhat20_paper_means - 0.117
print(f"… avec ajustement non-linéaire WAOB ≈ {yhat20_paper_means_adj:.1f} bu/ac")
