# Air quality and house price model inference

Wrap into a custom class dealing with the lag computation.

In [1]:
import pickle
import geopandas as gpd
import libpysal

from indicators import Model


import os
os.environ['USE_PYGEOS'] = '0'
import geopandas

In a future release, GeoPandas will switch to using Shapely by default. If you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).
  import geopandas as gpd


In [2]:
data_folder = "/Users/martin/Library/CloudStorage/OneDrive-SharedLibraries-TheAlanTuringInstitute/Daniel Arribas-Bel - demoland_data"

Load the data

In [3]:
data = gpd.read_parquet(f"{data_folder}/processed/interpolated/all_oa.parquet")

Filter only explanatory variables.

In [4]:
exvars = data.drop(
    columns=[
        "geo_code",
        "geometry",
        "air_quality_index",
        "house_price_index",
        "jobs_accessibility_index",
        "greenspace_accessibility_index",
    ]
)

## Air quality

Load the sklearn model

In [5]:
with open(f"{data_folder}/models/air_quality_model.pickle", "rb") as f:
    air_quality = pickle.load(f)

Create spatial weights

In [6]:
queen = libpysal.weights.Queen.from_dataframe(data)
_2k = libpysal.weights.DistanceBand.from_dataframe(data, 2000)
W = libpysal.weights.w_union(queen, _2k)
W.transform = "r"

 There are 3 disconnected components.


Create object.

In [7]:
aqm = Model(W, air_quality)

Save the custom predictor class to a pickle.

In [8]:
with open(f"{data_folder}/models/air_quality_predictor.pickle", "wb") as f:
    pickle.dump(aqm, f)

### England-wide model

Load the sklearn model

In [6]:
with open(f"{data_folder}/models/air_quality_model_gb.pickle", "rb") as f:
    air_quality = pickle.load(f)

Create spatial weights

In [7]:
queen = libpysal.weights.Queen.from_dataframe(data)
W = libpysal.weights.higher_order(queen, k=5, lower_order=True, silence_warnings=True)
W.transform = "r"

 There are 3 disconnected components.


Create object.

In [8]:
aqm = Model(W, air_quality)

Save the custom predictor class to a pickle.

In [9]:
with open(f"{data_folder}/models/air_quality_predictor_gb.pickle", "wb") as f:
    pickle.dump(aqm, f)

## House price

Load the sklearn model

In [9]:
with open(f"{data_folder}/models/house_price_model.pickle", "rb") as f:
    house_price = pickle.load(f)

Create spatial weights

In [10]:
q5 = libpysal.weights.higher_order(queen, k=5, lower_order=True)
q5.transform = "r"

Create a wrapper class computing the lag.

In [11]:
hpm = Model(q5, house_price)

Save the custom predictor class to a pickle.

In [12]:
with open(f"{data_folder}/models/house_price_predictor.pickle", "wb") as f:
    pickle.dump(hpm, f)

### England-wide model

Load the sklearn model

In [10]:
with open(f"{data_folder}/models/house_price_model_gb.pickle", "rb") as f:
    house_price = pickle.load(f)

Create a wrapper class computing the lag.

In [11]:
hpm = Model(W, house_price)

Save the custom predictor class to a pickle.

In [12]:
with open(f"{data_folder}/models/house_price_predictor_gb.pickle", "wb") as f:
    pickle.dump(hpm, f)

## Using the class for prediction

To use the class for prediction, load the pickle and call `predict` on a data frame with explanatory variables (either default or reflecting a scenario).

In [5]:
with open(f"{data_folder}/models/air_quality_predictor.pickle", "rb") as f:
    aqm2 = pickle.load(f)

In [6]:
aqm2.predict(exvars)

array([17.19278662, 16.43954378, 17.48423016, ..., 16.7559517 ,
       12.60627689, 17.31309272])

Exactly the same would it be for the house price model.