In [4]:
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [8]:
data = {
    "Year": np.arange(2005, 2023),
    "Wheat_Yield": [4,3.9, 3.7,4.3, 4.1,3.4, 4.2,4,4.3,3.9,4.1,4.8,4.1,4.6,4.4,4.9,5.7,4.9],
    "Corn_Yield": [7.1,6.3,3.9,5.9,6.4,7.1,6.2,3.6,6,7.5,5.4,7.3,4,7.7,7.6,7.9,5.9,4.5],
    "Soy_Yield": [2.8,2.7,2.1,2.4,2.4, 3.2,2.7,1.7,2.4,3.5, 2.5, 3.2, 2.3, 3.3, 3.1, 3.2, 2.3,1.7]
}
df = pd.DataFrame(data)

In [9]:
df

Unnamed: 0,Year,Wheat_Yield,Corn_Yield,Soy_Yield
0,2005,4.0,7.1,2.8
1,2006,3.9,6.3,2.7
2,2007,3.7,3.9,2.1
3,2008,4.3,5.9,2.4
4,2009,4.1,6.4,2.4
5,2010,3.4,7.1,3.2
6,2011,4.2,6.2,2.7
7,2012,4.0,3.6,1.7
8,2013,4.3,6.0,2.4
9,2014,3.9,7.5,3.5


In [10]:
# Feature Engineering: Add previous year's yields as features
df["Wheat_Lag1"] = df["Wheat_Yield"].shift(1)
df["Corn_Lag1"] = df["Corn_Yield"].shift(1)
df["Soy_Lag1"] = df["Soy_Yield"].shift(1)

# Drop rows with NaN (first row due to shifting)
df = df.dropna()

# Define Features (X) and Target (y)
X = df[["Wheat_Lag1", "Corn_Lag1", "Soy_Lag1"]]
y = df[["Wheat_Yield", "Corn_Yield", "Soy_Yield"]]

# Train-test split (2005–2021 for training, 2022 for testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.7, shuffle=False)

# Scale Data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train XGBoost Models for Each Commodity
models = {}
for crop in ["Wheat_Yield", "Corn_Yield", "Soy_Yield"]:
    model = xgb.XGBRegressor(objective="reg:squarederror", n_estimators=100, learning_rate=0.1, max_depth=5)
    model.fit(X_train_scaled, y_train[crop])
    models[crop] = model

# Predict Prices for 2024–2026
future_years = [2024, 2025, 2026]
future_predictions = []

# Start with last known prices
last_known_prices = df.iloc[-1][["Wheat_Yield", "Corn_Yield", "Soy_Yield"]].values

for year in future_years:
    # Prepare input for model
    future_input = np.array(last_known_prices).reshape(1, -1)
    future_input_scaled = scaler.transform(future_input)

    # Predict for each crop
    wheat_pred = models["Wheat_Yield"].predict(future_input_scaled)[0]
    corn_pred = models["Corn_Yield"].predict(future_input_scaled)[0]
    soy_pred = models["Soy_Yield"].predict(future_input_scaled)[0]

    # Append results
    future_predictions.append([year, wheat_pred, corn_pred, soy_pred])

    # Update last known prices for the next iteration
    last_known_prices = [wheat_pred, corn_pred, soy_pred]

# Convert predictions to DataFrame
future_df = pd.DataFrame(future_predictions, columns=["Year", "Wheat_Yield_Pred", "Corn_Yield_Pred", "Soy_Yield_Pred"])

print("\nPredicted Prices for 2024–2026:")
print(future_df)


Predicted Prices for 2024–2026:
   Year  Wheat_Yield_Pred  Corn_Yield_Pred  Soy_Yield_Pred
0  2024          4.099801         6.398462        2.400131
1  2025          3.408430         6.298754        2.102802
2  2026          4.297269         5.900118        2.400131


