Optimize GWR bandwidth using Reinforcement learning approach (Proximal Policy Optimization, PPO)

In [None]:
from stable_baselines3 import PPO
import pandas as pd

from src.optimizer.reinforce.gwr_optimizer import GwrOptimizerRL
from src.dataset.interfaces.spatial_dataset import FieldInfo
from src.optimizer.reinforce.callback import EpisodeTracker
from src.dataset.spatial_dataset import SpatialDataset
from src.kernel.gwr_kernel import GwrKernel
from src.log.gwr_logger import GwrLogger
from src.model.gwr import GWR

Create a logger to record the GWR model's information.

In [None]:
logger = GwrLogger()

Load the Georgia dataset and create a spatial dataset.

In [None]:
georgia_data = pd.read_csv(r'./data/GData_utm.csv')
spatialDataset = SpatialDataset(
	georgia_data,
	FieldInfo(
		predictor_fields=['PctFB', 'PctBlack', 'PctRural'],
		response_field='PctBach',
		coordinate_x_field='Longitud',
		coordinate_y_field='Latitude'
	),
	logger,
	isSpherical=True
)

Create a GWR kernel and GWR model.

In [None]:
kernel = GwrKernel(
	spatialDataset,
	logger,
	kernel_type='bisquare',
	kernel_bandwidth_type='adaptive'
)
gwr = GWR(spatialDataset, kernel, logger)

Initialize gwr gym environment

In [None]:
env = GwrOptimizerRL(
	gwr,
	logger,
	min_bandwidth=10,
	max_bandwidth=spatialDataset.X.shape[0],
	min_action=-10,
	max_action=10
)

Using PPO to optimize the bandwidth

In [None]:
TOTAL_TIMESTEPS = 5000
episodeTracker = EpisodeTracker(
  logger,
  total_timesteps=TOTAL_TIMESTEPS
)
model = PPO(
  "MlpPolicy", 
  env, 
  verbose=1, 
  device='cpu'
)
model.learn(
  total_timesteps=TOTAL_TIMESTEPS, 
  callback=episodeTracker
)
logger.append_info("PPO: PPO finished training.")

Test the model

In [None]:
obs, _ = env.reset()
for _ in range(100):
	action, _ = model.predict(obs)
	obs, reward, done, truncated, _ = env.step(action)
	logger.append_info(
		f"Bandwidth: {obs}, Reward (R2): {reward}"
	)
	if done or truncated:
		break

logger.save_model_info_json()