In [1]:
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# ------------------------------------------------
from scipy.spatial.distance import cdist

# ------------------------------------------------
from sklearn.metrics import (
    mean_absolute_error,
    mean_absolute_percentage_error,
    root_mean_squared_error,
)
from sklearn.model_selection import train_test_split

In [2]:
from mgtwr.model import GTWR
from mgtwr.kernel import GTWRKernel
from mgtwr.sel import SearchGTWRParameter
from mgtwr.function import _compute_betas_gwr

In [3]:
data = pd.read_csv("example.csv")
data.head(5)

Unnamed: 0,longitude,latitude,t,x1,x2,y
0,0,0,0,0.771321,0.895098,10.65655
1,1,0,0,0.020752,0.633729,5.692754
2,2,0,0,0.633648,0.462768,7.634701
3,3,0,0,0.748804,0.090788,7.755446
4,4,0,0,0.498507,0.982153,9.351118


In [4]:
# --------------------------------------------------------
# Step 1: Load and Prepare the Dataset
# --------------------------------------------------------

# Split the data into features (X) and target (y), including spatial and temporal coordinates
X = data[["longitude", "latitude", "x1", "x2", "t"]]
y = data["y"]

# Perform train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# Extract spatial coordinates for calibration and validation datasets
coords_train = X_train[["longitude", "latitude"]]
coords_test = X_test[["longitude", "latitude"]]

# Extract temporal values
t_train = X_train[["t"]]
t_test = X_test[["t"]]

# Drop spatial and temporal coordinates from feature datasets
X_train = X_train.drop(columns=["longitude", "latitude", "t"])
X_test = X_test.drop(columns=["longitude", "latitude", "t"])

In [5]:
# --------------------------------------------------------
# Step 2: Calibrate the GTWR Model
# --------------------------------------------------------
sel = SearchGTWRParameter(coords=coords_train, t=t_train, X=X_train, y=y_train, kernel='gaussian', fixed=True, thread=8)
spatial_bw, temporal_bw = sel.search(tau_max=np.max(t_train), verbose=True, time_cost=True)

print(spatial_bw, temporal_bw)

bw:  6.3 , tau:  10.8 , score:  12737.080005093756
bw:  4.1 , tau:  10.8 , score:  12453.513712070966
bw:  2.7 , tau:  10.8 , score:  11859.32387774656
bw:  1.9 , tau:  5.6 , score:  11158.94788787735
bw:  1.3 , tau:  2.4 , score:  10401.990007209211
bw:  1.0 , tau:  1.4 , score:  10077.714083254392
bw:  1.0 , tau:  1.4 , score:  10077.714083254392
bw:  1.0 , tau:  1.4 , score:  10077.714083254392
bw:  0.9 , tau:  1.0 , score:  10044.562649818177
bw:  0.9 , tau:  1.0 , score:  10044.562649818177
time cost: 0:00:17.319
0.9 1.0


In [6]:
gtwr = GTWR(coords=coords_train, t=t_train, X=X_train, y=y_train, bw=spatial_bw, tau=temporal_bw, kernel='gaussian', fixed=True, thread=8).fit()
print(gtwr.R2)

0.9799868891567898


In [7]:
# --------------------------------------------------------
# Step 3: Perform Predictions for Validation Dataset
# --------------------------------------------------------

# Add an intercept column to independent variables
X_train2 = np.hstack([np.ones((X_train.shape[0], 1)), X_train])  # Calibration data
X_test2 = np.hstack([np.ones((X_test.shape[0], 1)), X_test])  # Validation data

In [8]:
# Initialize the GTWRKernel object
gtwr_kernel = GTWRKernel(
    coords=coords_train,
    t=t_train,
    bw=spatial_bw,
    tau=temporal_bw,
    fixed=True,
    function="gaussian",
)

In [9]:
y_pred = []  # Store predictions

# Iterate through each validation point
for coord_test, t_test_point, x_test in zip(coords_test.to_numpy(), t_test.to_numpy(), X_test2):
    # Combine spatial and temporal components for distances
    coords_combined = np.hstack([coords_train, np.sqrt(temporal_bw) * t_train.to_numpy().reshape(-1, 1)])
    validation_combined = np.hstack([coord_test, np.sqrt(temporal_bw) * t_test_point])

    # Calculate distances
    distances = cdist([validation_combined], coords_combined).reshape(-1)

    # Compute weights using GTWRKernel
    weights = gtwr_kernel.cal_kernel(distance=distances).reshape(-1, 1)

    # Skip points with zero weights
    if np.sum(weights) == 0:
        print(f"Warning: All weights are zero for validation point {coord_test} at time {t_test_point}. Skipping.")
        y_pred.append(np.nan)
        continue

    # Compute regression coefficients using `_compute_betas_gwr`
    betas, _ = _compute_betas_gwr(y=y_train.to_numpy().reshape(-1, 1), x=X_train2, wi=weights)

    # Predict the dependent variable
    y_test_pred = np.dot(x_test, betas)  # Dot product with regression coefficients
    y_pred.append(y_test_pred)

In [10]:
test_df = pd.concat([coords_test, X_test, t_test, y_test], axis=1)
test_df["y_pred"] = np.array(y_pred).flatten()
test_df = test_df.sort_values(by="t")
test_df = test_df.reset_index()

In [11]:
test_df.query("t==0")

Unnamed: 0,index,longitude,latitude,x1,x2,t,y,y_pred
0,111,3,9,0.512334,0.092455,0,8.06257,13.032148
1,118,10,9,0.885827,0.311393,0,11.101959,16.041685
2,81,9,6,0.513467,0.124246,0,9.153901,19.824162
3,58,10,4,0.826123,0.302684,0,8.289727,24.493064
4,128,8,10,0.645072,0.789637,0,12.534033,29.613338
5,43,7,3,0.85685,0.384791,0,9.90618,23.45785
6,100,4,8,0.578136,0.869636,0,10.643051,64.596581
7,63,3,5,0.590201,0.819897,0,9.965492,41.808954
8,101,5,8,0.853934,0.980995,0,12.285061,80.459108
9,107,11,8,0.037094,0.835833,0,7.042336,4.934366


In [12]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

# 1. Global Metrics
print("\n--- Global Evaluation Metrics ---")
rmse_global = root_mean_squared_error(y_test, y_pred)
r2_global = r2_score(y_test, y_pred)
mae_global = mean_absolute_error(y_test, y_pred)

print(f"Root Mean Squared Error (RMSE): {rmse_global:.4f}")
print(f"Coefficient of Determination (R^2): {r2_global:.4f}")
print(f"Mean Absolute Error (MAE): {mae_global:.4f}")


--- Global Evaluation Metrics ---
Root Mean Squared Error (RMSE): 14.2223
Coefficient of Determination (R^2): 0.9554
Mean Absolute Error (MAE): 10.3695


In [13]:
# Initialize a list to store temporal metrics
temporal_metrics = []

# Group data by the temporal component `t`
for t_value, group in test_df.groupby("t"):
    # Extract actual and predicted values for the current time point
    y_actual = group["y"]
    y_pred = group["y_pred"]

    # Compute evaluation metrics
    rmse = np.sqrt(mean_squared_error(y_actual, y_pred))
    r2 = r2_score(y_actual, y_pred)
    mae = mean_absolute_error(y_actual, y_pred)

    # Store metrics for the current time point
    temporal_metrics.append({
        "time_point": t_value,
        "RMSE": rmse,
        "R^2": r2,
        "MAE": mae
    })

# Convert metrics to a DataFrame for better visualization
df_temporal_metrics = pd.DataFrame(temporal_metrics)
df_temporal_metrics

Unnamed: 0,time_point,RMSE,R^2,MAE
0,0,19.921624,-116.937188,14.716938
1,1,8.573342,0.889364,6.574467
2,2,8.068578,0.957094,6.24673
3,3,11.692444,0.959625,8.774577
4,4,14.530044,0.965212,10.689114
5,5,17.614848,0.96032,12.34046
6,6,15.015361,0.973723,12.422648
7,7,15.265993,0.946804,10.799852
8,8,17.008926,0.953872,12.928261
9,9,14.702103,0.944582,11.153928
