# GreenValueNet

This notebook contains the code needed to execute the GreenValueNet hedonic pricing neural network. 

In [23]:
import os
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from shapely.geometry import Point
from pathlib import Path
from data_load_funcs import get_params, load_data_catalogue
from processing_funcs import process_data, normalise_values

cwd = Path.cwd()
params = get_params()
data_catalogue = load_data_catalogue()
print(params)

# check inputs func
# checks for processed file
# if not creates at highest level
# if not those goes back
# does again until raw files are checked

# in process_data, either load it in, or create it
# in process_housing_data, either load it in, or create it
# same in spatial data


{'chunksize': 100000, 'target_grid': 1000, 'land_use_legend': {1: 'broad_lf_wdlnd', 2: 'conif_wdlnd', 3: 'arable', 4: 'imp_grasslnd', 5: 'semi_nat_grasslnd', 6: 'calcereous_grassland', 7: 'calcereous_grassland', 8: 'calcereous_grassland', 9: 'mountain_hth_bog', 10: 'mountain_hth_bog', 11: 'mountain_hth_bog', 12: 'mountain_hth_bog', 13: 'saltwater', 14: 'freshwater', 15: 'coastal', 16: 'coastal', 17: 'coastal', 18: 'coastal', 19: 'coastal', 20: 'built_up', 21: 'built_up', 255: 'other'}, 'spatial_dict': {'coastline': {'folder': 'processed_inputs', 'file': 'coastline.shp'}, 'prim_school': {'folder': 'processed_inputs', 'file': 'primary_school.shp'}, 'sec_school': {'folder': 'processed_inputs', 'file': 'secondary_school.shp'}, 'roads': {'folder': 'processed_inputs', 'file': 'roads_c.shp'}, 'nat_park': {'folder': 'raw_inputs', 'file': 'National_Parks_(England)___Natural_England.shp'}, 'nat_trust': {'folder': 'raw_inputs', 'file': 'NT_Land_Always_Open.shp'}, 'rail_station': {'folder': 'raw_i

In [None]:
dataset = process_data(data_catalogue, params)

# print summary stats table here
# extract norm cols from params
for col in norm_cols:
    dataset[col] = normalise_values(dataset[col])

In [10]:
import rasterio
import numpy as np
from data_load_funcs import get_file_path
def calc_rast_props(raster_data, legend, target_grid_size, pixel_dim: dict):
    """
    This function takes in a raster as a numpy array and uses the legend and pixel_dim
    to calculate the proportion of each pixel in target grid size that is each legend
    category
    """
    pixel_w, pixel_h = pixel_dim['width'], pixel_dim['height']
    target_grid_pixels = int(target_grid_size / pixel_w)
    # Reshape the raster data into non-overlapping target grid cells
    reshaped_data = raster_data.reshape(
        raster_data.shape[0] // target_grid_pixels, target_grid_pixels,
        raster_data.shape[1] // target_grid_pixels, target_grid_pixels
    )

    # Count the occurrences of each land use category in each grid cell
    counts = np.zeros((len(legend), reshaped_data.shape[0], reshaped_data.shape[2]), dtype=int)
    for value, category in legend.items():
        counts[value - 1] = (reshaped_data == value).sum(axis=(1, 3))

 
    total_pixels = counts.sum(axis=0)
    proportions_grid_cell = counts / total_pixels
    proportions = proportions_grid_cell.sum(axis=(1, 2))
    total_cells = reshaped_data.shape[0] * reshaped_data.shape[2]
    proportions /= total_cells

    return proportions

In [19]:
catalogue = data_catalogue
lu_rast = cwd / "data" / get_file_path(catalogue, 'inputs', 'land_cover')
clean_path = cwd / "data" / "processed_inputs" / params['land_use']['processed_file']
if os.path.exists(clean_path):
    lu_props = pd.read_file(clean_path)
else:
    # TODO update and move to Params
    legend = params['land_use_legend']
    target_grid_size = params['target_grid']

    with rasterio.open(lu_rast) as src:
        land_use = src.read(1)
        pix_w, pix_h = src.transform.a, src.transform.e

    pixel_dim = {
        'height': pix_h,
        'width': pix_w
    }
    
    # lu_props = calc_rast_props(land_use, legend, target_grid_size, pixel_dim)

Extracting file path from catalogue: inputs/land_cover ...


In [24]:
pixel_w, pixel_h = pixel_dim['width'], pixel_dim['height']
target_grid_pixels = int(target_grid_size / pixel_w)
# Reshape the raster data into non-overlapping target grid cells
reshaped_data = land_use.reshape(
    land_use.shape[0] // target_grid_pixels, target_grid_pixels,
    land_use.shape[1] // target_grid_pixels, target_grid_pixels
)

print(reshaped_data)


[[[[255 255 255 ... 255 255 255]
   [255 255 255 ... 255 255 255]
   [255 255 255 ... 255 255 255]
   ...
   [255 255 255 ... 255 255 255]
   [255 255 255 ... 255 255 255]
   [255 255 255 ... 255 255 255]]

  [[255 255 255 ... 255 255 255]
   [255 255 255 ... 255 255 255]
   [255 255 255 ... 255 255 255]
   ...
   [255 255 255 ... 255 255 255]
   [255 255 255 ... 255 255 255]
   [255 255 255 ... 255 255 255]]

  [[255 255 255 ... 255 255 255]
   [255 255 255 ... 255 255 255]
   [255 255 255 ... 255 255 255]
   ...
   [255 255 255 ... 255 255 255]
   [255 255 255 ... 255 255 255]
   [255 255 255 ... 255 255 255]]

  ...

  [[255 255 255 ... 255 255 255]
   [255 255 255 ... 255 255 255]
   [255 255 255 ... 255 255 255]
   ...
   [255 255 255 ... 255 255 255]
   [255 255 255 ... 255 255 255]
   [255 255 255 ... 255 255 255]]

  [[255 255 255 ... 255 255 255]
   [255 255 255 ... 255 255 255]
   [255 255 255 ... 255 255 255]
   ...
   [255 255 255 ... 255 255 255]
   [255 255 255 ... 255 25

In [25]:
# Count the occurrences of each land use category in each grid cell
counts = np.zeros((len(legend), reshaped_data.shape[0], reshaped_data.shape[2]), dtype=int)
for value, category in legend.items():
    counts[value - 1] = (reshaped_data == value).sum(axis=(1, 3))


total_pixels = counts.sum(axis=0)
proportions_grid_cell = counts / total_pixels
proportions = proportions_grid_cell.sum(axis=(1, 2))
total_cells = reshaped_data.shape[0] * reshaped_data.shape[2]
proportions /= total_cells

  proportions_grid_cell = counts / total_pixels


In [28]:
total_pixels

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [29]:
values, counts = np.unique(total_pixels, return_counts=True)#
print(values)
print(counts)


[   0    1    2 ... 2498 2499 2500]
[666821     92     69 ...    130    368 227738]


In [None]:
subset_sz = 2000
subset_pxl = int(subset_sz / pixel_w)
sta