## Spatiotemporal Forecasting + Interpolation w/ GPR

Take everything seen so far, and predict P1 and P2 on a regular grid over Berlin at a future timestamp using a spatiotemporal Gaussian Process.

In [1]:
import pandas as pd
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C

BBOX = {
    "lat_min": 52.3383,
    "lat_max": 52.6755,
    "lon_min": 13.0884,
    "lon_max": 13.7612,
}

In [2]:
# Read Parquet
df = pd.read_parquet("../data/2024-citsci-pollutants-hourly.parquet")

df.head()

Unnamed: 0,PM2_5,lat,lon,timestamp_hour
0,10.93,52.508,13.422,2024-03-28
1,12.1,52.508,13.422,2024-03-28
2,12.1,52.508,13.422,2024-03-28
3,11.63,52.508,13.422,2024-03-28
4,11.7,52.508,13.422,2024-03-28


In [9]:
# Ensure datetime
df['timestamp_hour'] = pd.to_datetime(df['timestamp_hour'])

# Aggregate PM2_5 by sensor/location + hour
df = df.groupby(['lat', 'lon', 'timestamp_hour'], as_index=False)['PM2_5'].mean()

# Convert to minutes since start
start_time = df['timestamp_hour'].min()
df['minutes_since_start'] = (df['timestamp_hour'] - start_time).dt.total_seconds() / 60

In [10]:
# Features: lat, lon, time
X = df[['lat', 'lon', 'minutes_since_start']].values

# Target
y = df['PM2_5'].values

In [13]:
X.shape, y.shape

((1489017, 3), (1489017,))

In [None]:
# Gaussian process -- USE GPU!