In [4]:
import pandas as pd
import geopandas as gpd
import acbm

from libpysal.weights import Queen


from acbm.assigning.primary_select import (
    select_facility,
)


In [5]:
activity_chains = pd.read_csv(
    acbm.root_path / "data/processed/activities_pam/legs.csv"
)

activity_chains = activity_chains.drop(columns=["Unnamed: 0", "freq"])

In [6]:
activity_chains.head(5)

Unnamed: 0,pid,hid,ozone,dzone,purp,origin activity,destination activity,mode,seq,tst,tet,duration
0,199,89,E00059031,E00057751,other,home,other,walk,1.0,1900-01-01 10:00:00,1900-01-01 10:45:00,0:45:00
1,199,89,E00057751,E00059031,home,other,home,walk,2.0,1900-01-01 10:45:00,1900-01-01 11:30:00,0:45:00
2,199,89,E00059031,E00057787,shop,home,shop,car,3.0,1900-01-01 12:00:00,1900-01-01 12:10:00,0:10:00
3,199,89,E00057787,E00059031,home,shop,home,car,4.0,1900-01-01 12:45:00,1900-01-01 12:55:00,0:10:00
4,199,89,E00059031,E00058564,work,home,work,car,5.0,1900-01-01 13:30:00,1900-01-01 13:50:00,0:20:00


In [7]:
people = pd.read_csv(
    acbm.root_path / "data/processed/activities_pam/people.csv"
)

people.head(5)

Unnamed: 0,pid,hid,freq,hzone,age_years
0,199,89,,E00059031,39
1,200,89,,E00059031,38
2,201,89,,E00059031,7
3,312,139,,E00059045,64
4,313,139,,E00059045,64


In [87]:
activity_chains_home = activity_chains[activity_chains["destination activity"] == "home"]
activity_chains_work = activity_chains[activity_chains["destination activity"] == "work"]
activity_chains_edu = activity_chains[activity_chains["destination activity"] == "education"]
# secondary activities
activities_to_exclude = ["home", "work", "education"]
activity_chains_other = activity_chains[~activity_chains["destination activity"].isin(activities_to_exclude)]


## POI data

In [9]:
osm_data_gdf = gpd.read_parquet(
    acbm.root_path / "data/external/boundaries/west-yorkshire_epsg_4326.parquet"
)

#osm_data_gdf = gpd.GeoDataFrame(osm_data_gdf, geometry="geometry", crs="EPSG:4326")

## Boundary data

In [10]:
where_clause = "MSOA21NM LIKE '%Leeds%'"

boundaries = gpd.read_file(
    acbm.root_path / "data/external/boundaries/oa_england.geojson", where=where_clause
)

boundaries = boundaries.to_crs(epsg=4326)

## Add zone column (OA21CD) to POI data

In [11]:
# ensure that osm_data_gdf and boundaries are in the same crs
osm_data_gdf = osm_data_gdf.to_crs(boundaries.crs)

osm_data_gdf = gpd.sjoin(
    osm_data_gdf, boundaries[["OA21CD", "geometry"]], how="inner", predicate="within"
)

osm_data_gdf.head(10)

Unnamed: 0,id,activities,geometry,units,levels,area,floor_area,distance_to_nearest_transit,distance_to_nearest_shop,distance_to_nearest_medical,index_right,OA21CD
0,643657,transit,POINT (-1.50849 53.76233),1.0,1.0,0,0.0,0.0,109.755337,1883.754159,983,E00057795
1,20621535,home,POINT (-1.56566 53.81375),1.0,2.0,0,0.0,198.493258,108.072976,255.853754,851,E00057657
2,21591858,home,POINT (-1.63279 53.87138),1.0,2.0,0,0.0,969.484065,509.809989,2318.285507,557,E00057349
3,27325240,transit,POINT (-1.66533 53.90971),1.0,1.0,0,0.0,0.0,578.769135,1673.801938,1504,E00058347
4,27348238,transit,POINT (-1.63022 53.90354),1.0,1.0,0,0.0,0.0,95.392235,3927.514412,1542,E00058385
5,27411630,home,POINT (-1.72353 53.90655),1.0,2.0,0,0.0,216.265895,986.041911,1716.667311,1524,E00058367
7,27470976,home,POINT (-1.56251 53.90203),1.0,2.0,0,0.0,873.984192,1799.502624,3250.842461,1478,E00058319
8,27475939,"work,shop",POINT (-1.38550 53.92917),1.0,1.0,0,0.0,13.691226,0.0,276.210958,2148,E00059036
9,27563268,transit,POINT (-1.71095 53.91335),1.0,1.0,0,0.0,0.0,521.696581,653.49953,1531,E00058374
10,27652981,"work,other",POINT (-1.69244 53.90651),1.0,2.0,0,0.0,105.789961,5.964285,75.32615,1506,E00058349


# Assign point locations

## 1. Get neighboring zones

Sometimes, an activity can be assigned to a zone, but there are no facilities
in the zone that match the activity type. In this case, we can search for matching
facilities in neighboring zones.

In [12]:
# get neighbors 
zone_neighbors = Queen.from_dataframe(boundaries, idVariable="OA21CD").neighbors



  zone_neighbors = Queen.from_dataframe(boundaries, idVariable="OA21CD").neighbors


## 2. Assign: Home

### Calculate a home location only once per household


In [48]:
# Keep one row per household and select only household and OA21CD columns
activity_chains_home_hh = activity_chains_home.drop_duplicates(subset=["hid"])
activity_chains_home_hh = activity_chains_home_hh[["hid", "destination activity", "dzone"]]
activity_chains_home_hh.head(10)

Unnamed: 0,hid,destination activity,dzone
1,89,home,E00059031
11,139,home,E00059045
17,197,home,E00059023
21,283,home,E00059012
31,342,home,E00059033
33,371,home,E00059016
41,621,home,E00059021
46,645,home,E00059015
52,892,home,E00059011
56,1156,home,E00059023


In [95]:
import pandas as pd
import geopandas as gpd
import numpy as np
import logging
from typing import Optional

logger = logging.getLogger(__name__)

def _select_facility(
    row: pd.Series,
    unique_id_col: str,
    facilities_gdf: gpd.GeoDataFrame,
    row_destination_zone_col: str,
    gdf_facility_zone_col: str,
    row_activity_type_col: str,
    gdf_facility_type_col: str,
    fallback_type: Optional[str] = None,
    fallback_to_random: bool = False,
    neighboring_zones: Optional[dict] = None,
    gdf_sample_col: Optional[str] = None,
) -> dict:
    """
    Select a suitable facility based on the activity type and a specific zone from a GeoDataFrame.
    Optionally:
     - looks in neighboring zones when there is no suitable facility in the initial zone
     - add a fallback type to search for a more general type of facility when no specific facilities are found
       (e.g. 'education' instead of 'education_university')
     - sample based on a specific column in the GeoDataFrame (e..g. floor_area)

    Parameters
    ----------
    selection_row : pandas.Series
        A row from the DataFrame indicating the selection criteria, including the destination zone and activity type.
    unique_id_col : str
        The column name in `selection_row` that indicates the unique id. It will be the key of the output dictionary.
    facilities_gdf : geopandas.GeoDataFrame
        GeoDataFrame containing facilities to sample from.
    row_destination_zone_col : str
        The column name in `selection_row` that indicates the destination zone.
    gdf_facility_zone_col : str
        The column name in `facilities_gdf` that indicates the facility zone.
    row_activity_type_col : str
        The column in `selection_row` indicating the type of activity (e.g., 'education', 'work').
    gdf_facility_type_col : str
        The column in `facilities_gdf` to filter facilities by type based on the activity type.
    fallback_type : Optional[str]
        A more general type of facility to fallback to if no specific facilities are found. By default None.
    fallback_to_random : bool
        If True, sample from all facilities in the zone if no specific facilities are found. By default False.
    neighboring_zones : Optional[dict]
        A dictionary mapping zones to their neighboring zones for fallback searches, by default None.
    gdf_sample_col : Optional[str]
        The column to sample from, by default None. The only feasible input is "floor_area". If "floor_area" is specified,
        uses this column's values as weights for sampling.

    Returns
    -------
    dict
        Dictionary containing the id and geometry of the chosen facility. Returns {'id': np.nan, 'geometry': np.nan} if no suitable facility is found.
    """
    # ----- Step 1. Find valid facilities in the destination zone

    # Extract the destination zone from the input row
    destination_zone = row[row_destination_zone_col]
    if pd.isna(destination_zone):
        logger.info(f"Activity {row.name}: Destination zone is NA")
        return {'id': np.nan, 'geometry': np.nan}

    # Filter facilities within the specified destination zone
    facilities_in_zone = facilities_gdf[
        facilities_gdf[gdf_facility_zone_col] == destination_zone
    ]
    # Attempt to find facilities matching the specific facility type
    facilities_valid = facilities_in_zone[
        facilities_in_zone[gdf_facility_type_col].apply(
            lambda x: row[row_activity_type_col] in x
        )
    ]
    logger.info(
        f"Activity {row.name}: Found {len(facilities_valid)} matching facilities in zone {destination_zone}"
    )

    # If no specific facilities found in the initial zone, and neighboring zones are provided, search in neighboring zones
    if facilities_valid.empty and neighboring_zones:
        logger.info(
            f"Activity {row.name}: No {row[row_activity_type_col]} facilities in {destination_zone}. Expanding search to neighboring zones"
        )
        neighbors = neighboring_zones.get(destination_zone, [])
        facilities_in_neighboring_zones = facilities_gdf[
            facilities_gdf[gdf_facility_zone_col].isin(neighbors)
        ]
        facilities_valid = facilities_in_neighboring_zones[
            facilities_in_neighboring_zones[gdf_facility_type_col].apply(
                lambda x: row[row_activity_type_col] in x
            )
        ]
        logger.info(
            f"Activity {row.name}: Found {len(facilities_valid)} matching facilities in neighboring zones"
        )

    # If no specific facilities found and a fallback type is provided, attempt to find facilities matching the fallback type
    if facilities_valid.empty and fallback_type:
        logger.info(
            f"Activity {row.name}: No {row[row_activity_type_col]} facilities in zone {destination_zone} or neighboring zones, trying with {fallback_type}"
        )
        # This should consider both the initial zone and neighboring zones if the previous step expanded the search
        facilities_valid = facilities_in_zone[
            facilities_in_zone[gdf_facility_type_col].apply(
                lambda x: fallback_type in x
            )
        ]
        logger.info(
            f"Activity {row.name}: Found {len(facilities_valid)} matching facilities with type: {fallback_type}"
        )

    # if no specific facilities found and fallback_to_random is True, take all facilities in the zone
    if facilities_valid.empty and fallback_to_random:
        logger.info(
            f"Activity {row.name}: No facilities in zone {destination_zone} with {gdf_facility_type_col} '{fallback_type or row[row_activity_type_col]}'. Sampling from all facilities in the zone"
        )
        facilities_valid = facilities_in_zone

    # If no facilities found after all attempts, log the failure and return NaN
    if facilities_valid.empty:
        logger.info(
            f"Activity {row.name}: No facilities in zone {destination_zone} with {gdf_facility_type_col} '{fallback_type or row[row_activity_type_col]}'"
        )
        return {row[unique_id_col]: (np.nan, np.nan)}
    
    # ----- Step 2. Sample a facility from the valid facilities

    # If "floor_area" is specified for sampling
    if (
        gdf_sample_col == "floor_area"
        and "floor_area" in facilities_valid.columns
        and facilities_valid["floor_area"].sum() != 0
    ):
        # Ensure floor_area is numeric
        facilities_valid["floor_area"] = pd.to_numeric(
            facilities_valid["floor_area"], errors="coerce"
        )
        facilities_valid = facilities_valid.dropna(subset=["floor_area"])
        facility = facilities_valid.sample(1, weights=facilities_valid["floor_area"])
        logger.info(f"Activity {row.name}: Sampled facility based on floor area)")
    else:
        # Otherwise, randomly sample one facility from the valid facilities
        facility = facilities_valid.sample(1)
        logger.info(f"Activity {row.name}: Sampled facility randomly")

    # Return the id and geometry of the selected facility
    return {row[unique_id_col]: (facility["id"].values[0], facility["geometry"].values[0])}


def select_facility_main(
    df: pd.DataFrame,
    unique_id_col: str,
    facilities_gdf: gpd.GeoDataFrame,
    row_destination_zone_col: str,
    gdf_facility_zone_col: str,
    row_activity_type_col: str,
    gdf_facility_type_col: str,
    gdf_sample_col: Optional[str] = None,
    neighboring_zones: Optional[dict] = None,
    fallback_type: Optional[str] = None,
    fallback_to_random: bool = False,
) -> pd.DataFrame:
    """
    Select facilities for each row in the DataFrame based on the provided logic.

    Parameters
    ----------
    df : pd.DataFrame
        DataFrame containing the activity chains.
    facilities_gdf : gpd.GeoDataFrame
        GeoDataFrame containing facilities to sample from.
    row_destination_zone_col : str
        The column name in `df` that indicates the destination zone.
    gdf_facility_zone_col : str
        The column name in `facilities_gdf` that indicates the facility zone.
    row_activity_type_col : str
        The column in `df` indicating the type of activity (e.g., 'education', 'work').
    gdf_facility_type_col : str
        The column in `facilities_gdf` to filter facilities by type based on the activity type.
    gdf_sample_col : Optional[str]
        The column to sample from, by default None. The only feasible input is "floor_area".
    neighboring_zones : Optional[dict]
        A dictionary mapping zones to their neighboring zones for fallback searches, by default None.

    Returns
    -------
    pd.DataFrame
        DataFrame containing the selected facilities for each row.
    """
    # Initialize a dictionary to store the selected facilities
    selected_facilities = {}

    # Select a facility for each row in the DataFrame
    for index, row in df.iterrows():
        selected_facility = _select_facility(
            row=row,
            unique_id_col=unique_id_col,
            facilities_gdf=facilities_gdf,
            row_destination_zone_col=row_destination_zone_col,
            row_activity_type_col=row_activity_type_col,
            gdf_facility_zone_col=gdf_facility_zone_col,
            gdf_facility_type_col=gdf_facility_type_col,
            gdf_sample_col=gdf_sample_col,
            neighboring_zones=neighboring_zones,
            fallback_type=fallback_type,
            fallback_to_random=fallback_to_random,
        )

        # Update the dictionary with the selected facility
        selected_facilities.update(selected_facility)

    return selected_facilities




In [74]:
def map_activity_locations(activity_chains_df, activity_locations_dict, id_col='pid'):
    """
    Map activity locations to the activity chains DataFrame.

    Parameters
    ----------
    activity_chains_df : pd.DataFrame
        DataFrame containing the activity chains.
    activity_locations_dict : dict
        Dictionary containing the activity locations.
    pid_col : str, optional
        The column name in `activity_chains_df` that contains the unique identifiers, by default 'pid'.

    Returns
    -------
    pd.DataFrame
        DataFrame with mapped activity locations.
    """
    activity_chains_df['end_location_id'] = activity_chains_df[id_col].map(
        lambda pid: activity_locations_dict[pid][0] if pid in activity_locations_dict else None
    )
    activity_chains_df['end_location_geometry'] = activity_chains_df[id_col].map(
        lambda pid: activity_locations_dict[pid][1] if pid in activity_locations_dict else None
    )
    return activity_chains_df

In [100]:
activity_locations_home = select_facility_main(
    df = activity_chains_home_hh,
    unique_id_col="hid",
    facilities_gdf=osm_data_gdf,
    row_destination_zone_col="dzone",
    row_activity_type_col="destination activity",
    gdf_facility_zone_col="OA21CD",
    gdf_facility_type_col="activities",
    gdf_sample_col="floor_area",
    neighboring_zones=zone_neighbors,
)

dict(list(activity_locations_home.items())[0:5])

{89: ('1735591106', <POINT (-1.396 53.934)>),
 139: ('1743398874', <POINT (-1.396 53.93)>),
 197: ('1730012610', <POINT (-1.391 53.94)>),
 283: ('1786591704', <POINT (-1.384 53.941)>),
 342: ('1686101310', <POINT (-1.405 53.933)>)}

In [76]:
# Map the activity_id and activity_geometry to the activity_chains_home_df DataFrame
activity_chains_home = map_activity_locations(activity_chains_df=activity_chains_home, 
                                              activity_locations_dict=activity_locations_home, 
                                              id_col='hid')

activity_chains_home.head(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  activity_chains_df['end_location_id'] = activity_chains_df[id_col].map(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  activity_chains_df['end_location_geometry'] = activity_chains_df[id_col].map(


Unnamed: 0,pid,hid,ozone,dzone,purp,origin activity,destination activity,mode,seq,tst,tet,duration,end_location_id,end_location_geometry
1,199,89,E00057751,E00059031,home,other,home,walk,2.0,1900-01-01 10:45:00,1900-01-01 11:30:00,0:45:00,1735591106,POINT (-1.3959873611794986 53.93355747640568)
3,199,89,E00057787,E00059031,home,shop,home,car,4.0,1900-01-01 12:45:00,1900-01-01 12:55:00,0:10:00,1735591106,POINT (-1.3959873611794986 53.93355747640568)
5,199,89,E00059031,E00059031,home,work,home,car,6.0,1900-01-01 20:30:00,1900-01-01 21:00:00,0:30:00,1735591106,POINT (-1.3959873611794986 53.93355747640568)
7,200,89,E00056934,E00059031,home,escort,home,car,2.0,1900-01-01 15:30:00,1900-01-01 15:50:00,0:20:00,1735591106,POINT (-1.3959873611794986 53.93355747640568)
9,201,89,E00059031,E00059031,home,education,home,car,2.0,1900-01-01 15:30:00,1900-01-01 15:50:00,0:20:00,1735591106,POINT (-1.3959873611794986 53.93355747640568)
11,312,139,E00059045,E00059045,home,work,home,car,2.0,1900-01-01 22:02:00,1900-01-01 22:03:00,0:01:00,1743398874,POINT (-1.3955721586108336 53.92982250353147)
15,313,139,E00059025,E00059045,home,escort,home,car,4.0,1900-01-01 22:02:00,1900-01-01 22:03:00,0:01:00,1743398874,POINT (-1.3955721586108336 53.92982250353147)
17,425,197,E00057887,E00059023,home,other,home,car,2.0,1900-01-01 11:19:00,1900-01-01 11:32:00,0:13:00,1730012610,POINT (-1.391192278937309 53.93988602688271)
19,425,197,E00169780,E00059023,home,shop,home,car,4.0,1900-01-01 18:14:00,1900-01-01 18:30:00,0:16:00,1730012610,POINT (-1.391192278937309 53.93988602688271)
21,611,283,E00169789,E00059012,home,shop,home,car,4.0,1900-01-01 12:15:00,1900-01-01 12:35:00,0:20:00,1786591704,POINT (-1.3844992736553874 53.94113442987814)


## 3. Assign: Work

In [99]:
activity_locations_work = select_facility_main(
    df = activity_chains_work,
    unique_id_col="pid",
    facilities_gdf=osm_data_gdf,
    row_destination_zone_col="dzone",
    row_activity_type_col="destination activity",
    gdf_facility_zone_col="OA21CD",
    gdf_facility_type_col="activities",
    gdf_sample_col="floor_area",
    neighboring_zones=zone_neighbors,
)

dict(list(activity_locations_work.items())[0:5])

{199: ('1234168144', <POINT (-1.495 53.788)>),
 312: ('1739580702', <POINT (-1.394 53.931)>),
 313: ('402156600', <POINT (-1.517 53.778)>),
 1385: ('1278889392', <POINT (-1.461 53.822)>),
 1386: ('1822956418', <POINT (-1.387 53.929)>)}

In [77]:
# Map the activity_id and activity_geometry to the activity_chains_df DataFrame
activity_chains_work = map_activity_locations(activity_chains_df=activity_chains_work, 
                                              activity_locations_dict=activity_locations_work, 
                                              id_col='pid')

activity_chains_work.head(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  activity_chains_df['end_location_id'] = activity_chains_df[id_col].map(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  activity_chains_df['end_location_geometry'] = activity_chains_df[id_col].map(


## 4. Assign: Education

We use education_type as a fallback. This has been calculated using age_group_mapping https://github.com/Urban-Analytics-Technology-Platform/acbm/blob/935b3243bf0f8ce9d766d256f33990ca6697f4e4/scripts/2_match_households_and_individuals.py#L1085

In [83]:
# load in activity chains
spc_with_nts = pd.read_parquet(
    acbm.root_path / "data/interim/matching/spc_with_nts_trips.parquet"
)

spc_with_nts.head(5)

Unnamed: 0,id,household,pid_hs,msoa11cd,oa11cd,members,sic1d2007,sic2d2007,pwkstat,salary_yearly,...,tst,tet,TripDisIncSW,TripDisExSW,TripTotalTime,TripTravTime,ozone,dzone,W5,W5xHH
0,199,89,2906098,E02002330,E00059031,"[199, 200, 201]",C,26.0,1,21491.294922,...,390.0,410.0,13.0,13.0,20.0,20.0,2.0,2.0,0.568328,1.0
1,199,89,2906098,E02002330,E00059031,"[199, 200, 201]",C,26.0,1,21491.294922,...,990.0,1020.0,13.0,13.0,30.0,20.0,2.0,2.0,0.568328,1.0
2,199,89,2906098,E02002330,E00059031,"[199, 200, 201]",C,26.0,1,21491.294922,...,520.0,525.0,1.0,1.0,5.0,5.0,2.0,2.0,0.568328,1.0
3,199,89,2906098,E02002330,E00059031,"[199, 200, 201]",C,26.0,1,21491.294922,...,530.0,535.0,1.0,1.0,5.0,5.0,2.0,2.0,0.568328,1.0
4,199,89,2906098,E02002330,E00059031,"[199, 200, 201]",C,26.0,1,21491.294922,...,890.0,900.0,1.0,1.0,10.0,5.0,2.0,2.0,0.568328,1.0


In [84]:
# we get one row per id
spc_with_nts_edu = spc_with_nts[["id", "education_type"]].drop_duplicates(subset = "id")
spc_with_nts_edu.head(5)

Unnamed: 0,id,education_type
0,199,education_university
21,200,education_university
45,201,education_school
64,312,education_university
76,313,education_university


In [88]:
# merge the education type with the activity chains
activity_chains_edu = activity_chains_edu.merge(spc_with_nts_edu, left_on="pid", right_on="id", how="left").drop(columns=["id"])
activity_chains_edu.head(5)

Unnamed: 0,pid,hid,ozone,dzone,purp,origin activity,destination activity,mode,seq,tst,tet,duration,education_type
0,201,89,E00059031,E00059030,education,home,education,car,1.0,1900-01-01 08:40:00,1900-01-01 08:45:00,0:05:00,education_school
1,4550,2107,E00059038,E00059036,education,home,education,car,1.0,1900-01-01 08:10:00,1900-01-01 08:30:00,0:20:00,education_school
2,5486,2499,E00059022,E00059023,education,home,education,walk,1.0,1900-01-01 08:05:00,1900-01-01 08:25:00,0:20:00,education_school
3,6310,2834,E00059028,E00059030,education,home,education,walk,1.0,1900-01-01 08:05:00,1900-01-01 08:25:00,0:20:00,education_school
4,9508,4118,E00059025,E00059043,education,home,education,car,1.0,1900-01-01 08:30:00,1900-01-01 08:45:00,0:15:00,education_school


In [98]:
# apply the function to a row in activity_chains_ex
activity_locations_edu = select_facility_main(
    df = activity_chains_edu,
    unique_id_col="pid",
    facilities_gdf=osm_data_gdf,
    row_destination_zone_col="dzone",
    row_activity_type_col="education_type",
    gdf_facility_zone_col="OA21CD",
    gdf_facility_type_col="activities",
    gdf_sample_col="floor_area",
    neighboring_zones=zone_neighbors,
    fallback_type="education",
)

# show the first five elements in the dictionary
dict(list(activity_locations_edu.items())[0:5])

{201: ('160360912', <POINT (-1.391 53.933)>),
 4550: ('24688425', <POINT (-1.39 53.931)>),
 5486: ('2409409', <POINT (-1.389 53.941)>),
 6310: ('160360912', <POINT (-1.391 53.933)>),
 9508: ('25600303', <POINT (-1.379 53.931)>)}

In [90]:
# Map the activity_id and activity_geometry to the activity_chains_home_df DataFrame
activity_chains_edu = map_activity_locations(activity_chains_df=activity_chains_edu, 
                                             activity_locations_dict=activity_locations_edu, 
                                             id_col='pid')

activity_chains_edu.head(10)

Unnamed: 0,pid,hid,ozone,dzone,purp,origin activity,destination activity,mode,seq,tst,tet,duration,education_type,end_location_id,end_location_geometry
0,201,89,E00059031,E00059030,education,home,education,car,1.0,1900-01-01 08:40:00,1900-01-01 08:45:00,0:05:00,education_school,160360912,POINT (-1.3905395371523108 53.93323059983641)
1,4550,2107,E00059038,E00059036,education,home,education,car,1.0,1900-01-01 08:10:00,1900-01-01 08:30:00,0:20:00,education_school,24688425,POINT (-1.3895129981902052 53.930690868751356)
2,5486,2499,E00059022,E00059023,education,home,education,walk,1.0,1900-01-01 08:05:00,1900-01-01 08:25:00,0:20:00,education_school,2409409,POINT (-1.3893578340484058 53.9408016531483)
3,6310,2834,E00059028,E00059030,education,home,education,walk,1.0,1900-01-01 08:05:00,1900-01-01 08:25:00,0:20:00,education_school,160360912,POINT (-1.3905395371523108 53.93323059983641)
4,9508,4118,E00059025,E00059043,education,home,education,car,1.0,1900-01-01 08:30:00,1900-01-01 08:45:00,0:15:00,education_school,25600303,POINT (-1.3788483462935803 53.93096345456586)
5,10184,4474,E00058975,E00058981,education,home,education,car,1.0,1900-01-01 08:00:00,1900-01-01 08:15:00,0:15:00,education_school,12105985,POINT (-1.3534740855059533 53.90352890730664)
6,16842,7377,E00058343,E00058376,education,home,education,car,1.0,1900-01-01 08:15:00,1900-01-01 08:30:00,0:15:00,education_school,895821,POINT (-1.6935145347674643 53.91201457658703)
7,17336,7605,E00058339,E00058345,education,home,education,car,1.0,1900-01-01 08:35:00,1900-01-01 08:55:00,0:20:00,education_school,121323674,POINT (-1.7081158416063476 53.91216123861758)
8,24533,11175,E00058338,E00058345,education,home,education,car,1.0,1900-01-01 07:55:00,1900-01-01 08:00:00,0:05:00,education_kg,281425490,POINT (-1.7071914906836947 53.91213021682614)
9,25434,11574,E00058353,E00058355,education,home,education,car,1.0,1900-01-01 07:46:00,1900-01-01 08:03:00,0:17:00,education_school,174706718,POINT (-1.70241676914788 53.903283772492614)


## 5. Assign: Secondary locations

### Create unique_id column 

pid and hid are not unique id columns, as there can be many different secondary activities done by the same person. 

We create a unique identifier that can be mapped back to the original data.

In [93]:
# Unique id column: Concatenate pid, seq

activity_chains_other["act_id"] = activity_chains_other["pid"].astype(str) + "_" + activity_chains_other["seq"].astype(str)
activity_chains_other.head(5)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  activity_chains_other["act_id"] = activity_chains_other["pid"].astype(str) + "_" + activity_chains_other["seq"].astype(str)


Unnamed: 0,pid,hid,ozone,dzone,purp,origin activity,destination activity,mode,seq,tst,tet,duration,act_id
0,199,89,E00059031,E00057751,other,home,other,walk,1.0,1900-01-01 10:00:00,1900-01-01 10:45:00,0:45:00,199_1.0
2,199,89,E00059031,E00057787,shop,home,shop,car,3.0,1900-01-01 12:00:00,1900-01-01 12:10:00,0:10:00,199_3.0
6,200,89,E00059031,E00056934,escort,home,escort,car,1.0,1900-01-01 12:45:00,1900-01-01 14:00:00,1:15:00,200_1.0
12,313,139,E00059045,E00170265,escort,home,escort,car,1.0,1900-01-01 06:40:00,1900-01-01 06:41:00,0:01:00,313_1.0
14,313,139,E00059045,E00059025,escort,work,escort,car,3.0,1900-01-01 21:40:00,1900-01-01 22:02:00,0:22:00,313_3.0


In [None]:
# apply the function to a row in activity_chains_ex
activity_locations_other = select_facility_main(
    df = activity_chains_other,
    unique_id_col="act_id",
    facilities_gdf=osm_data_gdf,
    row_destination_zone_col="dzone",
    row_activity_type_col="purp",
    gdf_facility_zone_col="OA21CD",
    gdf_facility_type_col="activities",
    gdf_sample_col="floor_area",
    neighboring_zones=zone_neighbors,
    fallback_to_random=True,
)

dict(list(activity_locations_other.items())[0:5])

In [97]:
# Map the activity_id and activity_geometry to the activity_chains_home_df DataFrame
activity_chains_other = map_activity_locations(activity_chains_df=activity_chains_other, 
                                             activity_locations_dict=activity_locations_other, 
                                             id_col='act_id')

activity_chains_other.head(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  activity_chains_df['end_location_id'] = activity_chains_df[id_col].map(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  activity_chains_df['end_location_geometry'] = activity_chains_df[id_col].map(


Unnamed: 0,pid,hid,ozone,dzone,purp,origin activity,destination activity,mode,seq,tst,tet,duration,act_id,end_location_id,end_location_geometry
0,199,89,E00059031,E00057751,other,home,other,walk,1.0,1900-01-01 10:00:00,1900-01-01 10:45:00,0:45:00,199_1.0,789852700,POINT (-1.6426150403792243 53.840081767061676)
2,199,89,E00059031,E00057787,shop,home,shop,car,3.0,1900-01-01 12:00:00,1900-01-01 12:10:00,0:10:00,199_3.0,1127350880,POINT (-1.531742214084653 53.77804802242871)
6,200,89,E00059031,E00056934,escort,home,escort,car,1.0,1900-01-01 12:45:00,1900-01-01 14:00:00,1:15:00,200_1.0,1935592730,POINT (-1.370798586961893 53.766521697291694)
12,313,139,E00059045,E00170265,escort,home,escort,car,1.0,1900-01-01 06:40:00,1900-01-01 06:41:00,0:01:00,313_1.0,357904944,POINT (-1.5345635783813476 53.791961924428186)
14,313,139,E00059045,E00059025,escort,work,escort,car,3.0,1900-01-01 21:40:00,1900-01-01 22:02:00,0:22:00,313_3.0,1957404616,POINT (-1.3701628153017384 53.9348171264856)
16,425,197,E00059023,E00057887,other,home,other,car,1.0,1900-01-01 09:00:00,1900-01-01 09:14:00,0:14:00,425_1.0,1022414380,POINT (-1.607217429877327 53.811740511450814)
18,425,197,E00059023,E00169780,shop,home,shop,car,3.0,1900-01-01 17:30:00,1900-01-01 17:42:00,0:12:00,425_3.0,949394246,POINT (-1.5539252385756364 53.80104992471259)
20,611,283,E00059012,E00169789,shop,home,shop,car,3.0,1900-01-01 09:00:00,1900-01-01 09:45:00,0:45:00,611_3.0,408473998,POINT (-1.5395968206622659 53.79560579771324)
22,611,283,E00059012,E00057434,escort,home,escort,car,5.0,1900-01-01 14:20:00,1900-01-01 14:35:00,0:15:00,611_5.0,284542360,POINT (-1.3847991755941609 53.79631513339939)
24,611,283,E00059012,E00058900,other,home,other,car,7.0,1900-01-01 16:15:00,1900-01-01 16:35:00,0:20:00,611_7.0,858973344,POINT (-1.6035681271094513 53.840880621961674)


# Assign Origin locations from destinations