# Sample explanatory variables based on empirical data and custom scenario definition

This notebook contains code to generate explanatory variables for an output area based on 4 sliders:

- Level of urbanity
- Use
- Greenspace
- Job types

The data are either adapted from the original observed value (if the level of urbanity does not change) or sampled from the signature types from across the GB. 

In [9]:
import pandas as pd
import geopandas as gpd
import numpy as np

In [10]:
data_folder = "/Users/martin/Library/CloudStorage/OneDrive-SharedLibraries-TheAlanTuringInstitute/Daniel Arribas-Bel - demoland_data"

Load the data informing the distributions.

In [11]:
median_form = pd.read_parquet(f"{data_folder}/sampling/median_form.parquet")
iqr_form = pd.read_parquet(f"{data_folder}/sampling/iqr_form.parquet")
median_function = pd.read_parquet(f"{data_folder}/sampling/median_function.parquet")
iqr_function = pd.read_parquet(f"{data_folder}/sampling/iqr_function.parquet")
oa = (
    gpd.read_parquet(f"{data_folder}/processed/interpolated/all_oa.parquet")
    .set_index("geo_code")
    .rename(columns={"population_estimate": "population"})
)
oa_key = pd.read_parquet(f"{data_folder}/sampling/oa_key.parquet")

Get OA areas for area-weighted variables.

In [12]:
oa_area = oa.area

Define a sampling method.

In [13]:
def _form(signature_type, variable, random_seed):
    """Get values for form variables

    Values are sampled from a normal distribution around
    median of a variable per signature type. The spread is
    defined as 1/5 of interquartile range.
    """
    rng = np.random.default_rng(random_seed)
    return rng.normal(
        median_form.loc[signature_type, variable],
        iqr_form.loc[signature_type, variable] / 5,
    )


def _function(signature_type, variable, random_seed):
    """Get values for function variables

    Values are sampled from a normal distribution around
    median of a variable per signature type. The spread is
    defined as 1/5 of interquartile range.
    """
    rng = np.random.default_rng(random_seed)
    return rng.normal(
        median_function.loc[signature_type, variable],
        iqr_function.loc[signature_type, variable] / 5,
    )


def _populations(defaults, index):
    """Balance residential and workplace population

    Workplace population and residential population are treated 1:1 and
    are re-allocated based on the index. The proportion of workplace categories
    is not changed.
    """
    if not -1 <= index <= 1:
        raise ValueError(f"use index must be in a range -1...1. {index} given.")
    jobs = [
        "A, B, D, E. Agriculture, energy and water",
        "C. Manufacturing",
        "F. Construction",
        "G, I. Distribution, hotels and restaurants",
        "H, J. Transport and communication",
        "K, L, M, N. Financial, real estate, professional and administrative activities",
        "O,P,Q. Public administration, education and health",
        "R, S, T, U. Other",
    ]
    n_jobs = defaults[jobs].sum()
    if index < 0:
        difference = index * n_jobs
    else:
        difference = index * defaults.population
    new_n_jobs = n_jobs + difference
    defaults.population = defaults.population - difference
    multiplier = new_n_jobs / n_jobs
    defaults[jobs] = defaults[jobs] * multiplier
    return defaults


def _greenspace(defaults, index):
    """Allocate greenspace to OA

    Allocate publicly accessible formal greenspace to OA. Defines a portion
    of OA that is covered by gren urban areas. Realistic values are be fairly
    low. The value affects populations and other land cover classes.
    """
    if not 0 <= index <= 1:
        raise ValueError(f"greenspace index must be in a range 0...1. {index} given.")
    greenspace_orig = defaults["Land cover [Green urban areas]"]
    newly_allocated_gs = index - greenspace_orig
    defaults = defaults * (1 - newly_allocated_gs)
    defaults["Land cover [Green urban areas]"] = index
    return defaults


def _job_types(defaults, index):
    """Balance job types

    Balance job types between manual and white collar workplace categories.
    Index represents the proportion of white collar jobs in an area. The
    total sum of FTEs is not changed.

    The service category is not affected under an assumption that both white
    and blue collar workers need the same amount of services to provide food etc.
    """
    if not 0 <= index <= 1:
        raise ValueError(f"job_types index must be in a range 0...1. {index} given.")
    blue = [
        "A, B, D, E. Agriculture, energy and water",
        "C. Manufacturing",
        "F. Construction",
        "H, J. Transport and communication",
    ]
    white = [
        "K, L, M, N. Financial, real estate, professional and administrative activities",
        "O,P,Q. Public administration, education and health",
    ]
    blue_collar = defaults[blue].sum()
    white_collar = defaults[white].sum()
    total = blue_collar + white_collar
    orig_proportion = white_collar / total

    new_blue = total * (1 - index)
    new_white = total * index

    blue_diff = new_blue / blue_collar
    white_diff = new_white / white_collar

    defaults[blue] = defaults[blue] * blue_diff
    defaults[white] = defaults[white] * white_diff

    return defaults


def get_signature_values(
    oa_code: str,
    signature_type: str = None,
    use: float = 0,
    greenspace: float = None,
    job_types: float = None,
    random_seed: int = None,
):
    """Generate explanatory variables based on a scenario

    Generates values for explanatory variables based on empirical data derived
    from the Urban Grammar project and a scenario definition based on a
    Urban Grammar signature type, land use balance, greenspace allocation
    and a job type balance.

    If the target ``signature_type`` differs from the one already allocated
    to OA, the data is sampled from the distribution from the whole GB. If
    they are equal, the existing values measured in place are used. That allows
    playing with other variables without changing the form.

    Parameters
    ----------
    oa_code : string
        String representing the OA code, e.g. ``"E00042707"``.

    signature_type : string
        String representing signature type. See below the possible options
        and their relationship to the level of urbanity.

            0: 'Wild countryside',
            1: 'Countryside agriculture',
            2: 'Urban buffer',
            3: 'Warehouse/Park land',
            4: 'Open sprawl',
            5: 'Disconnected suburbia',
            6: 'Accessible suburbia',
            7: 'Connected residential neighbourhoods',
            8: 'Dense residential neighbourhoods',
            9: 'Gridded residential quarters',
            10: 'Dense urban neighbourhoods',
            11: 'Local urbanity',
            12: 'Regional urbanity',
            13: 'Metropolitan urbanity',
            14: 'Concentrated urbanity',
            15: 'Hyper concentrated urbanity',

    use : float, optional
        Float in a range -1...1 reflecting the land use balance between
        fully residential (-1) and fully commercial (1). Defautls to 0,
        a value derived from signatures. For values < 0, we are allocating
        workplace population to residential population. For values > 0, we
        are allocating residential population to workplace population.
        Extremes are allowed but are not realistic, in most cases.
    greenspace : float, optional
        Float in a range 0...1 reflecting the amount of greenspace in the
        area. 0 representes no accessible greenspace, 1 represents whole
        area covered by a greenspace. This value will proportionally affect
        the amounts of jobs and population.
    job_types : float, optional
        Float in a range 0...1 reflecting the balance of job types in the
        area between entirely blue collar jobs (0) and entirely white collar
        jobs (1).
    random_seed : int, optional
        Random seed

    Returns
    -------
    Series
    """
    orig_type = oa_key.primary_type[oa_code]
    if signature_type is not None and orig_type != signature_type:
        form = pd.Series(
            [_form(signature_type, var, random_seed) for var in median_form.columns],
            index=median_form.columns,
            name=oa_code,
        ).abs()

        defaults = pd.Series(
            [
                _function(signature_type, var, random_seed)
                for var in median_function.columns
            ],
            index=median_function.columns,
            name=oa_code,
        ).abs()

        area_weighted = [
            "population",
            "A, B, D, E. Agriculture, energy and water",
            "C. Manufacturing",
            "F. Construction",
            "G, I. Distribution, hotels and restaurants",
            "H, J. Transport and communication",
            "K, L, M, N. Financial, real estate, professional and administrative activities",
            "O,P,Q. Public administration, education and health",
            "R, S, T, U. Other",
        ]
        defaults[area_weighted] = defaults[area_weighted] * oa_area[oa_code]

    else:
        form = oa.loc[oa_code][median_form.columns]
        defaults = oa.loc[oa_code][median_function.columns]

    # population
    if use != 0:
        defaults = _populations(defaults, index=use)

    # greenspace
    if greenspace:
        defaults = _greenspace(defaults, greenspace)

    if job_types:
        defaults = _job_types(defaults, job_types)
    return pd.concat([form, defaults])

Example: 

Set the OA we are interested in.

In [14]:
oa_code = "E00042271"

Check the signature type of the OA. 

In [15]:
oa_key.primary_type[oa_code]

'Dense urban neighbourhoods'

This is the actual value with no changes.

In [16]:
get_signature_values(
    oa_code,
)

sdbAre                                                                               836.43386
sdbCoA                                                                               10.849734
ssbCCo                                                                                0.342091
ssbCor                                                                                5.918075
ssbSqu                                                                                5.799881
ssbERI                                                                                  0.8782
ssbCCM                                                                               28.454278
ssbCCD                                                                                3.243832
stbOri                                                                               14.042689
sdcAre                                                                              3387.82744
sscCCo                                            

Stay within the same signature type and change only use. 

1. More residential

In [17]:
get_signature_values(oa_code, use=-0.5)

sdbAre                                                                               836.43386
sdbCoA                                                                               10.849734
ssbCCo                                                                                0.342091
ssbCor                                                                                5.918075
ssbSqu                                                                                5.799881
ssbERI                                                                                  0.8782
ssbCCM                                                                               28.454278
ssbCCD                                                                                3.243832
stbOri                                                                               14.042689
sdcAre                                                                              3387.82744
sscCCo                                            

2. Less residential, more jobs

In [18]:
get_signature_values(oa_code, use=0.4)

sdbAre                                                                               836.43386
sdbCoA                                                                               10.849734
ssbCCo                                                                                0.342091
ssbCor                                                                                5.918075
ssbSqu                                                                                5.799881
ssbERI                                                                                  0.8782
ssbCCM                                                                               28.454278
ssbCCD                                                                                3.243832
stbOri                                                                               14.042689
sdcAre                                                                              3387.82744
sscCCo                                            

3. More residential and more greenspace.

Check current greenspace first.

In [19]:
get_signature_values(
    oa_code,
)["Land cover [Green urban areas]"]

0.0

Nothing. Allocate 20% of area

In [20]:
get_signature_values(oa_code, use=0.4, greenspace=0.2)

sdbAre                                                                               836.43386
sdbCoA                                                                               10.849734
ssbCCo                                                                                0.342091
ssbCor                                                                                5.918075
ssbSqu                                                                                5.799881
ssbERI                                                                                  0.8782
ssbCCM                                                                               28.454278
ssbCCD                                                                                3.243832
stbOri                                                                               14.042689
sdcAre                                                                              3387.82744
sscCCo                                            

Change job type allocation towards more blue collar jobs.

In [21]:
get_signature_values(
    oa_code,
    use=0.4,
    greenspace=0.2,
    job_types=0.2,
)

sdbAre                                                                               836.43386
sdbCoA                                                                               10.849734
ssbCCo                                                                                0.342091
ssbCor                                                                                5.918075
ssbSqu                                                                                5.799881
ssbERI                                                                                  0.8782
ssbCCM                                                                               28.454278
ssbCCD                                                                                3.243832
stbOri                                                                               14.042689
sdcAre                                                                              3387.82744
sscCCo                                            

Change the signature type (a proxy for a level of urbanity).

In [22]:
get_signature_values(
    oa_code,
    signature_type="Local urbanity",
    use=0.4,
    greenspace=0.2,
    job_types=0.2,
)

sdbAre                                                                              422.259877
sdbCoA                                                                                0.000000
ssbCCo                                                                                0.408337
ssbCor                                                                                3.769432
ssbSqu                                                                                0.403360
ssbERI                                                                                0.982311
ssbCCM                                                                               18.595822
ssbCCD                                                                                0.135386
stbOri                                                                                8.222212
sdcAre                                                                             1663.577623
sscCCo                                            