In [1]:
%matplotlib inline

In [2]:
!pip install pykrige

Collecting pykrige
  Downloading PyKrige-1.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (852 kB)
[K     |████████████████████████████████| 852 kB 27.5 MB/s eta 0:00:01
Installing collected packages: pykrige
Successfully installed pykrige-1.7.0
You should consider upgrading via the '/scratch/ab9738/cctv_pollution/env/bin/python -m pip install --upgrade pip' command.[0m



# Regression kriging

An example of regression kriging


In [3]:
import sys

from sklearn.datasets import fetch_california_housing
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR

from pykrige.rk import RegressionKriging

In [4]:
svr_model = SVR(C=0.1, gamma="auto")
rf_model = RandomForestRegressor(n_estimators=100)
lr_model = LinearRegression(normalize=True, copy_X=True, fit_intercept=False)

models = [svr_model, rf_model, lr_model]

In [5]:
try:
    housing = fetch_california_housing()
except PermissionError:
    # this dataset can occasionally fail to download on Windows
    sys.exit(0)

In [11]:
housing["target"]

array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894])

In [12]:
# take the first 5000 as Kriging is memory intensive
p = housing["data"][:5000, :-2]
x = housing["data"][:5000, -2:]
target = housing["target"][:5000]

In [13]:
p_train, p_test, x_train, x_test, target_train, target_test = train_test_split(
    p, x, target, test_size=0.3, random_state=42
)

In [14]:
for m in models:
    print("=" * 40)
    print("regression model:", m.__class__.__name__)
    m_rk = RegressionKriging(regression_model=m, n_closest_points=10)
    m_rk.fit(p_train, x_train, target_train)
    print("Regression Score: ", m_rk.regression_model.score(p_test, target_test))
    print("RK score: ", m_rk.score(p_test, x_test, target_test))

regression model: SVR
Finished learning regression model
Finished kriging residuals
Regression Score:  -0.03405385545698292
RK score:  0.6706182225388981
regression model: RandomForestRegressor
Finished learning regression model
Finished kriging residuals
Regression Score:  0.7020655527067068
RK score:  0.7427155571449324
regression model: LinearRegression
Finished learning regression model


If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LinearRegression())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)




Finished kriging residuals
Regression Score:  0.5277968398381674
RK score:  0.6036605153133717
