In [1]:
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# ------------------------------------------------
from scipy.spatial.distance import cdist

# ------------------------------------------------
from sklearn.metrics import (
    mean_absolute_error,
    mean_absolute_percentage_error,
    root_mean_squared_error,
)
from sklearn.model_selection import train_test_split

In [2]:
from mgtwr.function import _compute_betas_gwr
from mgtwr.kernel import GTWRKernel
from mgtwr.model import MGTWR
from mgtwr.sel import SearchGTWRParameter, SearchMGTWRParameter

In [3]:
data = pd.read_csv("example.csv")
data.head(5)

Unnamed: 0,longitude,latitude,t,x1,x2,y
0,0,0,0,0.771321,0.895098,10.65655
1,1,0,0,0.020752,0.633729,5.692754
2,2,0,0,0.633648,0.462768,7.634701
3,3,0,0,0.748804,0.090788,7.755446
4,4,0,0,0.498507,0.982153,9.351118


In [4]:
# --------------------------------------------------------
# Step 1: Load and Prepare the Dataset
# --------------------------------------------------------

# Split the data into features (X) and target (y), including spatial and temporal coordinates
X = data[["longitude", "latitude", "x1", "x2", "t"]]
y = data["y"]

# Perform train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# Extract spatial coordinates for calibration and validation datasets
coords_train = X_train[["longitude", "latitude"]]
coords_test = X_test[["longitude", "latitude"]]

# Extract temporal values
t_train = X_train[["t"]]
t_test = X_test[["t"]]

# Drop spatial and temporal coordinates from feature datasets
X_train = X_train.drop(columns=["longitude", "latitude", "t"])
X_test = X_test.drop(columns=["longitude", "latitude", "t"])

In [5]:
# ----------------------------------------------------------------
# Step 2: Find the global spatial & temporal bandwith using GTWR
# ----------------------------------------------------------------
sel = SearchGTWRParameter(
    coords=coords_train,
    t=t_train,
    X=X_train,
    y=y_train,
    kernel="gaussian",
    fixed=True,
    thread=8,
)
spatial_bw, temporal_bw = sel.search(
    tau_max=np.max(t_train), verbose=True, time_cost=True
)

print(spatial_bw, temporal_bw)

bw:  6.3 , tau:  10.8 , score:  12737.080005093756
bw:  4.1 , tau:  10.8 , score:  12453.513712070966
bw:  2.7 , tau:  10.8 , score:  11859.32387774656
bw:  1.9 , tau:  5.6 , score:  11158.94788787735
bw:  1.3 , tau:  2.4 , score:  10401.990007209211
bw:  1.0 , tau:  1.4 , score:  10077.714083254392
bw:  1.0 , tau:  1.4 , score:  10077.714083254392
bw:  1.0 , tau:  1.4 , score:  10077.714083254392
bw:  0.9 , tau:  1.0 , score:  10044.562649818177
bw:  0.9 , tau:  1.0 , score:  10044.562649818177
time cost: 0:00:16.655
0.9 1.0


In [6]:
# -----------------------------------------------------------------------------
# Step 3: Calibrate the MGTWR Model using global bandwidths as initial values
# -----------------------------------------------------------------------------
sel_multi = SearchMGTWRParameter(
    coords=coords_train,
    t=t_train,
    X=X_train,
    y=y_train,
    kernel="gaussian",
    fixed=True,
    thread=8,
)
bws = sel_multi.search(
    criterion="AICc",
    init_bw=spatial_bw,
    init_tau=temporal_bw,
    verbose=True,
    rss_score=True,
    tol_multi=1.0e-4,
    time_cost=True,
)

Current iteration: 1 ,SOC: 0.9980499
Bandwidths: 0.8, 0.8, 0.6
taus: 1.3,0.8,0.8
Current iteration: 2 ,SOC: 0.0094217
Bandwidths: 0.8, 0.8, 0.6
taus: 1.3,0.4,0.8
Current iteration: 3 ,SOC: 0.0066964
Bandwidths: 1.0, 0.8, 0.6
taus: 2.5,0.2,0.8
Current iteration: 4 ,SOC: 0.0062324
Bandwidths: 1.0, 0.8, 0.6
taus: 2.6,0.2,0.8
Current iteration: 5 ,SOC: 0.0050692
Bandwidths: 1.2, 0.8, 0.6
taus: 3.8,0.2,0.8
Current iteration: 6 ,SOC: 0.0043062
Bandwidths: 1.2, 0.8, 0.6
taus: 3.8,0.2,0.8
Current iteration: 7 ,SOC: 0.0036632
Bandwidths: 1.2, 0.8, 0.6
taus: 3.8,0.2,0.8
Current iteration: 8 ,SOC: 0.0030814
Bandwidths: 1.2, 0.8, 0.6
taus: 3.8,0.2,0.8
Current iteration: 9 ,SOC: 0.0025765
Bandwidths: 1.2, 0.8, 0.6
taus: 3.8,0.2,0.8
Current iteration: 10 ,SOC: 0.0021486
Bandwidths: 1.2, 0.8, 0.6
taus: 3.8,0.2,0.8
Current iteration: 11 ,SOC: 0.0017909
Bandwidths: 1.2, 0.8, 0.6
taus: 3.8,0.2,0.8
Current iteration: 12 ,SOC: 0.0014941
Bandwidths: 1.2, 0.8, 0.6
taus: 3.8,0.2,0.8
Current iteration: 13 ,SO

In [7]:
mgtwr = MGTWR(
    coords=coords_train,
    t=t_train,
    X=X_train,
    y=y_train,
    selector=sel_multi,
    kernel="gaussian",
    fixed=True,
    thread=8,
).fit(n_chunks=10)
print(mgtwr.R2)

0.9942568294311505


In [8]:
# Extract local coefficients for calibration points
calibration_betas = sel_multi.bws[5]  # Correct index for Betas (coefficients)

# Extract final spatial and temporal bandwidths
final_spatial_bandwidths = sel_multi.bws[0]  # Spatial bandwidths per covariate
final_temporal_bandwidths = sel_multi.bws[1]  # Temporal bandwidths per covariate

# Add intercept column to calibration and validation datasets
X_train2 = np.hstack([np.ones((X_train.shape[0], 1)), X_train])  # Calibration data
X_test2 = np.hstack([np.ones((X_test.shape[0], 1)), X_test])  # Validation data

# Initialize predictions storage
predictions = []

# Iterate through each validation point
for validation_idx, validation_features in enumerate(X_test2):
    weights_list = []

    # Compute weights for each covariate
    for covariate_idx, (spatial_bw, temporal_bw) in enumerate(
        zip(final_spatial_bandwidths, final_temporal_bandwidths)
    ):
        kernel = GTWRKernel(
            coords=coords_train.to_numpy(),
            t=t_train.to_numpy(),
            bw=spatial_bw,
            tau=temporal_bw,
            fixed=True,
            function="gaussian",
        )

        coords_combined = np.hstack(
            [
                coords_train.to_numpy(),
                np.sqrt(temporal_bw) * t_train.to_numpy().reshape(-1, 1),
            ]
        )
        validation_combined = np.hstack(
            [
                coords_test.to_numpy()[validation_idx],
                np.sqrt(temporal_bw) * t_test.to_numpy()[validation_idx],
            ]
        )
        distances = cdist([validation_combined], coords_combined).reshape(-1)
        weights = kernel.cal_kernel(distance=distances).reshape(
            -1, 1
        )  # Shape (n_calibration, 1)
        weights_list.append(weights)

    # Combine weights with coefficients
    weights_matrix = np.hstack(weights_list)  # Shape (n_calibration, n_covariates)
    weighted_betas = np.sum(
        weights_matrix * calibration_betas, axis=0
    )  # Weighted coefficients

    # Compute prediction
    predicted_value = np.dot(validation_features, weighted_betas)  # Dot product
    predictions.append(predicted_value)

# Convert predictions to a NumPy array
predictions = np.array(predictions)

In [9]:
test_df = pd.concat([coords_test, X_test, t_test, y_test], axis=1)
test_df["y_pred"] = np.array(predictions).flatten()
test_df = test_df.sort_values(by="t")
test_df = test_df.reset_index()
test_df

Unnamed: 0,index,longitude,latitude,x1,x2,t,y,y_pred
0,111,3,9,0.512334,0.092455,0,8.062570,8.760085
1,118,10,9,0.885827,0.311393,0,11.101959,50.358718
2,81,9,6,0.513467,0.124246,0,9.153901,22.208305
3,58,10,4,0.826123,0.302684,0,8.289727,53.652814
4,128,8,10,0.645072,0.789637,0,12.534033,62.560009
...,...,...,...,...,...,...,...,...
514,1678,10,7,0.685720,0.735882,11,57.265675,103.969558
515,1664,8,6,0.631222,0.503141,11,60.592880,81.884887
516,1617,9,2,0.931719,0.463042,11,34.816646,113.038754
517,1714,10,10,0.227871,0.115458,11,9.793239,-17.598757


In [10]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# 1. Global Metrics
print("\n--- Global Evaluation Metrics ---")
rmse_global = root_mean_squared_error(y_test, predictions)
r2_global = r2_score(y_test, predictions)
mae_global = mean_absolute_error(y_test, predictions)

print(f"Root Mean Squared Error (RMSE): {rmse_global:.4f}")
print(f"Coefficient of Determination (R^2): {r2_global:.4f}")
print(f"Mean Absolute Error (MAE): {mae_global:.4f}")


--- Global Evaluation Metrics ---
Root Mean Squared Error (RMSE): 147.3051
Coefficient of Determination (R^2): -3.7799
Mean Absolute Error (MAE): 114.6217


In [11]:
# Initialize a list to store temporal metrics
temporal_metrics = []

# Group data by the temporal component `t`
for t_value, group in test_df.groupby("t"):
    # Extract actual and predicted values for the current time point
    y_actual = group["y"]
    y_pred = group["y_pred"]

    # Compute evaluation metrics
    rmse = np.sqrt(mean_squared_error(y_actual, y_pred))
    r2 = r2_score(y_actual, y_pred)
    mae = mean_absolute_error(y_actual, y_pred)

    # Store metrics for the current time point
    temporal_metrics.append(
        {
            "time_point": t_value,
            "RMSE": rmse,
            "MAE": mae,
            "R^2": r2,
        }
    )

# Convert metrics to a DataFrame for better visualization
df_temporal_metrics = pd.DataFrame(temporal_metrics)
df_temporal_metrics

Unnamed: 0,time_point,RMSE,MAE,R^2
0,0,37.795155,31.069739,-423.495957
1,1,74.49793,64.338011,-7.353795
2,2,108.970111,94.022156,-6.826016
3,3,145.383871,121.290111,-5.242163
4,4,163.128774,134.411961,-3.384824
5,5,209.16183,172.649392,-4.594683
6,6,207.615261,179.140495,-4.023711
7,7,194.647624,168.670388,-7.648288
8,8,180.612522,151.440118,-4.201253
9,9,148.565929,120.509426,-4.658866
