In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, LSTM, Dense, Dropout
import keras_tuner as kt 
import matplotlib.pyplot as plt

In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from statsmodels.tsa.arima.model import ARIMA
import warnings

warnings.filterwarnings("ignore")

# === 1. Dataset ===
data = {
    'Year': list(range(2008, 2023)),
    'Land_used(ha)': [
        380543, 391291, 389471, 308104, 338626,
        329303, 375216, 376530, 447204, 453470,
        442958, 396397, 366426, 362320, 336003
    ]
}
df = pd.DataFrame(data)

# === 2. Create lag features ===
def create_lag_features(series, window=3):
    X, y, years = [], [], []
    for i in range(window, len(series)):
        X.append(series[i-window:i])
        y.append(series[i])
        years.append(df['Year'][i])
    return np.array(X), np.array(y), years

land_used_series = df['Land_used(ha)'].values
X_lag, y_lag, target_years = create_lag_features(land_used_series, window=3)
X_train, y_train = X_lag[:10], y_lag[:10]
X_test, y_test = X_lag[10:], y_lag[10:]
test_years = target_years[10:]

# === 3. Random Forest ===
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_preds = rf_model.predict(X_test)

# === 4. XGBoost ===
xgb_model = XGBRegressor(n_estimators=100, random_state=42)
xgb_model.fit(X_train, y_train)
xgb_preds = xgb_model.predict(X_test)

# === 5. ARIMA ===
arima_preds = []
full_series = list(land_used_series[:13])  # up to 2020

for i in range(2):  # predict 2021 & 2022
    model = ARIMA(full_series, order=(2, 1, 0))
    model_fit = model.fit()
    forecast = model_fit.forecast()[0]
    arima_preds.append(forecast)
    full_series.append(df['Land_used(ha)'][13 + i])

# === 6. Ensemble ===
ensemble_preds = (rf_preds + xgb_preds + arima_preds) / 3

# === 7. Evaluation ===
def evaluate_model(y_true, y_pred):
    return {
        "R2": r2_score(y_true, y_pred),
        "RMSE": np.sqrt(mean_squared_error(y_true, y_pred)),
        "MAE": mean_absolute_error(y_true, y_pred)
    }

eval_results = {
    "ARIMA": evaluate_model(y_test, arima_preds),
    "Random Forest": evaluate_model(y_test, rf_preds),
    "XGBoost": evaluate_model(y_test, xgb_preds),
    "Ensemble": evaluate_model(y_test, ensemble_preds)
}
eval_df = pd.DataFrame(eval_results).T
print("📊 Evaluation Metrics:\n", eval_df)

# === 8. Prediction Table ===
predictions_table = pd.DataFrame({
    "Year": test_years,
    "Actual": y_test,
    "ARIMA": arima_preds,
    "Random Forest": rf_preds,
    "XGBoost": xgb_preds,
    "Ensemble": ensemble_preds
})
print("\n📅 Year-wise Predictions:\n", predictions_table)


📊 Evaluation Metrics:
                      R2          RMSE           MAE
ARIMA         -0.918486  18225.764464  14124.846541
Random Forest  0.244827  11434.825969  11321.500000
XGBoost       -0.686944  17090.574712  13144.187500
Ensemble       0.154249  12101.177073  11994.604123

📅 Year-wise Predictions:
    Year  Actual          ARIMA  Random Forest       XGBoost       Ensemble
0  2021  362320  364926.721672      352604.39  338252.53125  351927.880974
1  2022  336003  361645.971410      348930.39  338223.90625  349600.089220


In [5]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import warnings
warnings.filterwarnings("ignore")

# === 1. Data ===
import pandas as pd

# === 1. Load from Excel ===
df = pd.read_excel("Australia.xlsx")

# === 2. Use only 'Year' and 'Land_used(ha)' columns ===
df = df[['Year', 'Land_used(ha)']].copy()

# (Optional) Sort by year just in case
df = df.sort_values('Year').reset_index(drop=True)

# Proceed with rest of the code unchanged
land_used = df['Land_used(ha)'].values
years = df['Year'].tolist()

df = pd.DataFrame(data)

# === 2. Create initial training data ===
def create_lag_features(series, window=3):
    X, y = [], []
    for i in range(window, len(series)):
        X.append(series[i-window:i])
        y.append(series[i])
    return np.array(X), np.array(y)

land_used = df['Land_used(ha)'].values
X_train, y_train = create_lag_features(land_used[:13], window=3)  # up to 2010 → predict till 2020

# === 3. Train models ===
rf = RandomForestRegressor(n_estimators=100, random_state=42)
xgb = XGBRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
xgb.fit(X_train, y_train)

# === 4. Recursive prediction from 2011 to 2022 ===
years = list(df['Year'][3:])  # 2011–2022
actual = list(df['Land_used(ha)'][3:])

rf_preds, xgb_preds, arima_preds = [], [], []
series_rf = list(land_used[:3])  # starting with 2008–2010
series_arima = list(land_used[:3])

for _ in range(len(years)):
    # Random Forest & XGBoost
    input_seq = np.array(series_rf[-3:]).reshape(1, -1)
    rf_pred = rf.predict(input_seq)[0]
    xgb_pred = xgb.predict(input_seq)[0]
    rf_preds.append(rf_pred)
    xgb_preds.append(xgb_pred)
    series_rf.append(rf_pred)  # update for next iteration

    # ARIMA
    model = ARIMA(series_arima, order=(2, 1, 0))
    model_fit = model.fit()
    arima_pred = model_fit.forecast()[0]
    arima_preds.append(arima_pred)
    series_arima.append(arima_pred)

# === 5. Ensemble ===
ensemble_preds = (np.array(rf_preds) + np.array(xgb_preds) + np.array(arima_preds)) / 3

# === 6. Evaluation ===
def evaluate(y_true, y_pred):
    return {
        "R2": r2_score(y_true, y_pred),
        "RMSE": np.sqrt(mean_squared_error(y_true, y_pred)),
        "MAE": mean_absolute_error(y_true, y_pred)
    }

results = {
    "ARIMA": evaluate(actual, arima_preds),
    "Random Forest": evaluate(actual, rf_preds),
    "XGBoost": evaluate(actual, xgb_preds),
    "Ensemble": evaluate(actual, ensemble_preds)
}
results_df = pd.DataFrame(results).T
print("\n📊 Evaluation Table:\n", results_df)

# === 7. Final Predictions ===
pred_table = pd.DataFrame({
    "Year": years,
    "Actual": actual,
    "ARIMA": arima_preds,
    "Random Forest": rf_preds,
    "XGBoost": xgb_preds,
    "Ensemble": ensemble_preds
})
print("\n📅 Year-wise Prediction Table:\n", pred_table)



📊 Evaluation Table:
                      R2          RMSE           MAE
ARIMA         -0.062817  47989.542793  42100.136708
Random Forest  0.098321  44202.159327  36002.921667
XGBoost        0.323965  38273.836749  22877.481771
Ensemble       0.311988  38611.377271  32357.659175

📅 Year-wise Prediction Table:
     Year  Actual          ARIMA  Random Forest       XGBoost       Ensemble
0   2011  308104  389356.285687      353366.65  308104.12500  350275.686896
1   2012  338626  389407.042522      344158.71  338223.90625  357263.219591
2   2013  329303  389465.251315      340942.37  330823.90625  353743.842522
3   2014  375216  389471.475497      377506.06  375472.21875  380816.584749
4   2015  376530  389469.808831      404145.11  378998.78125  390871.233360
5   2016  447204  389469.257512      413120.11  447028.37500  416539.247504
6   2017  453470  389469.250702      400107.38  428215.03125  405930.553984
7   2018  442958  389469.277655      354969.97  338252.53125  360897.259635
8 

In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, GRU
from tensorflow.keras.optimizers import Adam
import warnings
warnings.filterwarnings("ignore")

# === 1. Load and Prepare Data ===
df = pd.read_excel("Australia.xlsx")
df = df[['Year', 'Land_used(ha)']].sort_values('Year').reset_index(drop=True)

land_used = df['Land_used(ha)'].values
years = df['Year'].tolist()

# === 2. Create Lag Features ===                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   
def create_lag_features(series, window=3):
    X, y = [], []
    for i in range(window, len(series)):
        X.append(series[i-window:i])
        y.append(series[i])
    return np.array(X), np.array(y)

# 2008–2010 → predict up to 2020 (index 0 to 12), 2011–2022 are predictions
X_train, y_train = create_lag_features(land_used[:13], window=3)
actual = list(land_used[3:])  # Actual values for 2011–2022

# === 3. Train RF and XGB ===
rf = RandomForestRegressor(n_estimators=100, random_state=42)
xgb = XGBRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
xgb.fit(X_train, y_train)

# === 4. Train LSTM and GRU ===
scaler = MinMaxScaler()
scaled = scaler.fit_transform(land_used[:13].reshape(-1, 1)).flatten()
X_seq, y_seq = create_lag_features(scaled, 3)
X_seq = X_seq.reshape((X_seq.shape[0], 3, 1))

# LSTM
lstm = Sequential([LSTM(50, activation='relu', input_shape=(3,1)), Dense(1)])
lstm.compile(optimizer=Adam(0.01), loss='mse')
lstm.fit(X_seq, y_seq, epochs=200, verbose=0)

# GRU
gru = Sequential([GRU(50, activation='relu', input_shape=(3,1)), Dense(1)])
gru.compile(optimizer=Adam(0.01), loss='mse')
gru.fit(X_seq, y_seq, epochs=200, verbose=0)

# === 5. Recursive Prediction (2011–2022) ===
series_rf = list(land_used[:3])
series_arima = list(land_used[:3])
series_dl = list(scaled[:3])

rf_preds, xgb_preds, arima_preds, lstm_preds, gru_preds = [], [], [], [], []

for _ in range(len(actual)):
    # RF & XGB
    input_seq = np.array(series_rf[-3:]).reshape(1, -1)
    rf_pred = rf.predict(input_seq)[0]
    xgb_pred = xgb.predict(input_seq)[0]
    rf_preds.append(rf_pred)
    xgb_preds.append(xgb_pred)
    series_rf.append(rf_pred)

    # ARIMA
    model = ARIMA(series_arima, order=(2, 1, 0))
    model_fit = model.fit()
    arima_pred = model_fit.forecast()[0]
    arima_preds.append(arima_pred)
    series_arima.append(arima_pred)

    # LSTM
    input_lstm = np.array(series_dl[-3:]).reshape(1, 3, 1)
    pred_lstm_scaled = lstm.predict(input_lstm, verbose=0)[0][0]
    pred_lstm = scaler.inverse_transform([[pred_lstm_scaled]])[0][0]
    lstm_preds.append(pred_lstm)
    series_dl.append(scaler.transform([[pred_lstm]])[0][0])

    # GRU
    input_gru = np.array(series_dl[-3:]).reshape(1, 3, 1)
    pred_gru_scaled = gru.predict(input_gru, verbose=0)[0][0]
    pred_gru = scaler.inverse_transform([[pred_gru_scaled]])[0][0]
    gru_preds.append(pred_gru)
    series_dl.append(scaler.transform([[pred_gru]])[0][0])

# === 6. Ensemble ===
ensemble_preds = (np.array(rf_preds) + np.array(xgb_preds) + np.array(arima_preds) +
                  np.array(lstm_preds) + np.array(gru_preds)) / 5

# === 7. Evaluation ===
def evaluate(y_true, y_pred):
    return {
        "R2": r2_score(y_true, y_pred),
        "RMSE": np.sqrt(mean_squared_error(y_true, y_pred)),
        "MAE": mean_absolute_error(y_true, y_pred)
    }

results = {
    "ARIMA": evaluate(actual, arima_preds),
    "Random Forest": evaluate(actual, rf_preds),
    "XGBoost": evaluate(actual, xgb_preds),
    "LSTM": evaluate(actual, lstm_preds),
    "GRU": evaluate(actual, gru_preds),
    "Ensemble": evaluate(actual, ensemble_preds)
}

results_df = pd.DataFrame(results).T
print("\n📊 Evaluation Table:\n", results_df)

# === 8. Final Prediction Table ===
pred_table = pd.DataFrame({
    "Year": df['Year'][3:],
    "Actual": actual,
    "ARIMA": arima_preds,
    "Random Forest": rf_preds,
    "XGBoost": xgb_preds,
    "LSTM": lstm_preds,
    "GRU": gru_preds,
    "Ensemble": ensemble_preds
})
print("\n📅 Year-wise Prediction Table:\n", pred_table)


ValueError: could not convert string to float: '380,543\xa0'