In [5]:
import os
import json
import sys
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error


project_root = os.path.abspath(os.path.join(os.getcwd(), "../../.."))
sys.path.insert(0, project_root)

# Load modules 
from src.models.baseline import train_dummy_regressor
from src.models.poisson import train_poisson_regressor
from src.utils.data_partition import temporal_split
from src.evaluation.spatial_metrics import pai, pei, pei_star


from src.utils.spatial_processing import predictions_to_grid, define_hotspot_by_crimes


# Load global config 
config_path = os.path.join("..", "config.json")
with open(config_path, 'r') as f:
    config = json.load(f)

# Define hotspot method from config
hotspot_percentage = config["evaluation"]["hotspot_percentage"]
hotspot_method = config["evaluation"].get("hotspot_definition", "by_cells")  # default

# Dynamically select hotspot definition method
if hotspot_method == "by_crimes":
    from src.utils.spatial_processing import define_hotspot_by_crimes as define_hotspot
elif hotspot_method == "by_cells":
    from src.utils.spatial_processing import define_hotspot_by_cells as define_hotspot
else:
    raise ValueError(f"Unknown hotspot_definition method: {hotspot_method}") 



rows = config["data_generation"]["rows"]
cols = config["data_generation"]["cols"]
grid_size = (rows, cols)
partition_config = config["data_partition"]
days_per_month = 30
train_days = partition_config["train_months"] * days_per_month
calibration_days = partition_config["calibration_months"] * days_per_month
test_days = partition_config["test_months"] * days_per_month


In [6]:
# Load data
df = pd.read_csv("../data/examples/poisson_example_40x40.csv")

# Partition data
partition = config["data_partition"]
train_days = partition["train_months"] * 30
calib_days = partition["calibration_months"] * 30
test_days = partition["test_months"] * 30

df_train, df_calib, df_test = temporal_split(df, train_days, train_days + calib_days, train_days + calib_days + test_days)

# Feature and target setup
features = ["timestep", "row", "col"]
target = "count"
X_train, y_train = df_train[features], df_train[target]
X_test, y_test = df_test[features], df_test[target]

# Train models
dummy_model = train_dummy_regressor(X_train, y_train)
poisson_model = train_poisson_regressor(X_train, y_train)

# Predict
dummy_pred = dummy_model.predict(X_test)
poisson_pred = poisson_model.predict(X_test)

# Metrics: RMSE and MAE
print("Numerical evaluation:\n")
for name, pred in zip(["Dummy", "Poisson"], [dummy_pred, poisson_pred]):
    rmse = np.sqrt(mean_squared_error(y_test, pred))
    mae = mean_absolute_error(y_test, pred)
    print(f"{name} - RMSE: {rmse:.3f} | MAE: {mae:.3f}")

# Transform to spatial grids
grid_true, grid_dummy = predictions_to_grid(X_test, y_test, dummy_pred, grid_size)
_, grid_poisson = predictions_to_grid(X_test, y_test, poisson_pred, grid_size)


# Hotspot masks
hotspot_dummy = define_hotspot(grid_dummy, hotspot_percentage)
hotspot_poisson = define_hotspot(grid_poisson, hotspot_percentage)
hotspot_optimal = define_hotspot(grid_true, hotspot_percentage)

# Spatial metrics
print("\nSpatial metrics evaluation:\n")
for name, hotspot in zip(["Dummy", "Poisson"], [hotspot_dummy, hotspot_poisson]):
    print(f"{name}Regressor:")
    print(f"  PAI:     {pai(grid_true, hotspot):.3f}")
    print(f"  PEI:     {pei(grid_true, hotspot, hotspot_optimal):.3f}")
    print(f"  PEI*:    {pei_star(grid_true, hotspot):.3f}\n")

Numerical evaluation:

Dummy - RMSE: 2.383 | MAE: 1.479
Poisson - RMSE: 2.376 | MAE: 1.476

Spatial metrics evaluation:

DummyRegressor:
  PAI:     0.853
  PEI:     0.139
  PEI*:    0.360

PoissonRegressor:
  PAI:     1.194
  PEI:     0.195
  PEI*:    0.468

