In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from src import caa_survey_utils, config, condition_mapping_utils

from src.old_mappers.ModeConditionMapperV4 import ModeConditionMapper as ModeConditionMapperV4
from src.old_mappers.ModeConditionMapperV4_Corrected import ModeConditionMapper as ModeConditionMapperV4_Corrected
from src.old_mappers.ModeConditionMapperV5 import ModeConditionMapper as ModeConditionMapperV5
from src.old_mappers.ModeConditionMapperV6 import ModeConditionMapper as ModeConditionMapperV6
from src.old_mappers.ModeConditionMapperV6_Old_LASAM_Mode_LU import ModeConditionMapper as ModeConditionMapperV6_old_lasam_mode_lu


%load_ext autoreload
%autoreload 2
pd.set_option('display.float_format', '{:.2f}'.format)

# 1. Import and Preprocess Data

## 1.1 CAA Survey Data

In [2]:
# CAA for Heathrow Airport
caa_2023_24_path = r"\\uk-lon-FAS02\Projects\UNIF\Projects\60H700SA - Heathrow SAS 2024\04 Technical\03 LASAM Development\11_Mode choice models\21 Origin assignment\01 Sent to TP\Full dataset for LASAM Zone Assignment.xlsx"
caa_original = pd.read_excel(caa_2023_24_path, engine='openpyxl')
caa_original = caa_original.drop(columns=['Date', 'Mode Group', 'Rename', 'Tylers proposed change', 'Exclusion', 'Report Method Timestamp', 'Weighting'])

### Remove Dummy Records and uplift remaining population

In [3]:
caa_lhr = caa_original[(caa_original['AIRPORT_Prefix']=='LHR')].copy()
caa_lhr.reset_index(drop=True, inplace=True)
caa_lhr = caa_survey_utils.process_dummy_records(caa_lhr)

caa_lhr.head()

0 dummy records removed and reminaing population uplifted by 1.0


Unnamed: 0,TAG,AIRPORT_Prefix,APT_TERMINAL,GATE,Year,SYSTEM_QUARTER,SYSTEM_MONTH,IDATE,SYSTEM_START_TIME,SEX,...,SYSTEM_PURPOSE1,PURPOSE,MBUSINESS,PROMOTE,ETHNIC,DISABLE,POP,DUMMY_FLAG,RUN_DATE,NATION1
0,Heathrow-11/01/2024-TZR123-330,LHR,4,,2024,1,Jan,2024-01-11,13:49:35,Male,...,Leisure,Leisure Other,,8,White - Irish,No,1561.89,Missing,27/03/2025 15:07:51,Ireland
1,Heathrow-14/01/2024-IER123-328,LHR,3,,2024,1,Jan,2024-01-14,06:24:19,Female,...,Business,Business,,10,Other,No,809.01,Missing,27/03/2025 15:07:51,New Zealand
2,Heathrow-05/01/2024-EFL123-326,LHR,5,,2024,1,Jan,2024-01-05,10:10:41,Female,...,Leisure,Leisure Other,,7,White - Any other White background,No,1058.39,Missing,27/03/2025 15:07:51,United States of America (USA)
3,Heathrow-05/01/2024-EAU123-326,LHR,3,,2024,1,Jan,2024-01-05,20:34:22,Male,...,Leisure,Visiting Friends and Relatives,,6,Asian - Chinese,No,849.12,Missing,27/03/2025 15:07:51,China
4,Heathrow-10/01/2024-IER123-314,LHR,2,,2024,1,Jan,2024-01-10,17:04:42,Male,...,Leisure,Visiting Friends and Relatives,,8,White - Any other White background,No,688.41,Missing,27/03/2025 15:07:51,Sweden


### Remove records for interline passengers

In [4]:
caa_lhr = caa_survey_utils.remove_interline_pax(caa_lhr)

removed 0 rows with interline passengers


### Assign LASAM Segment

In [5]:
caa_lhr = pd.merge(caa_lhr, config.segment_lu, on=['SYSTEM_COUNTRY', 'SYSTEM_RouteTo', 'SYSTEM_PURPOSE1', 'SYSTEM_Market'], how='left')

### Some re-naming

In [6]:
# update mode fields from TfL Rail to Elizabeth Line
columns_to_update = ['MODEA', 'MODEB', 'MODEC', 'SYSTEM_FINALMODE']
caa_lhr[columns_to_update] = caa_lhr[columns_to_update].replace('TfL Rail (formerly Heathrow Connect)', 'Elizabeth Line')

# update column name
caa_lhr = caa_lhr.rename(columns={'APT_TERMINAL': 'Terminal'})

### Assign LASAM mode based on CAA final mode to compare mode shares

In [7]:
caa_lhr = pd.merge(caa_lhr, config.caa_final_mode_lasam_mode_lu, on='SYSTEM_FINALMODE', how='left')

### Convert A B C mode categories that are in CAA to Last, 2ndLast and 3rdLast

In [None]:
caa_lhr['Last'] = caa_lhr.apply(caa_survey_utils.apply_last_mode, axis=1)

caa_lhr['2ndLast'] = caa_lhr.apply(caa_survey_utils.apply_2ndlast_mode, axis=1)

caa_lhr['3rdLast'] = caa_lhr.apply(caa_survey_utils.apply_3rdlast_mode, axis=1)

### Add Origin column to classify the origin into LDN, NonLDN and Airport

In [9]:
caa_lhr['Origin'] = caa_lhr.apply(lambda row: 'AIRPORT' if row['SYSTEM_District'] in ['Heathrow Airport (SE)']
                                 else ('LDN' if row['SYSTEM_County']=='Greater London' else 'NonLDN'), axis=1)

## Add columns that indicate whether certain modes have been used at least once

In [None]:
# column to flag that elizabeth line has been used at least once
caa_lhr['Contains_Elizabeth_Line'] = caa_lhr.apply(caa_survey_utils.apply_contains_mode, axis=1, mode='Elizabeth Line')

# column to flag that Heathrow Express has been used at least once
caa_lhr['Contains_Heathrow_Express'] = caa_lhr.apply(caa_survey_utils.apply_contains_mode, axis=1, mode='Heathrow Express')

# column to flag that the Tube has been used at least once
caa_lhr['Contains_Tube'] = caa_lhr.apply(caa_survey_utils.apply_contains_mode, axis=1, mode='Tube/Metro/Subway')

# column to flat that a rental car has been used at least once
caa_lhr['Contains_Rental'] = caa_lhr.apply(caa_survey_utils.apply_contains_mode, axis=1, mode=['Rental car - short term car park', 'Rental car - hire car courtesy bus'])

# 2. Assign LASAM Modes

In [11]:
caa_lhr_2024 = caa_lhr[caa_lhr.Year == 2024].copy()

In [None]:
def step_1(self):
    conditions = [
        (self.df['Last'] == "Other") & (self.df['2ndLast'].isin(["Other", "No Mode"])) & (self.df['3rdLast'].isin(["Other", "No Mode"])),
        (self.df['Last'] == "Other") & (self.df['2ndLast'].isin(["Other", "No Mode"])) & (~self.df['3rdLast'].isin(["Other", "No Mode"])),
        (self.df['Last'] == "Other") & (~self.df['2ndLast'].isin(["Other", "No Mode"])),
        (self.df['Last'] != "Other")
    ]
    
    choices = [
        "Other",
        self.df['3rdLast'],
        self.df['2ndLast'],
        self.df['Last']
    ]
    
    return np.select(conditions, choices, default=np.nan)
    
def step_2(self):

    conditions = [
        (self.df['Step_1'].isin(["Cycle", "Walk (where only mode)"]) & 
        self.df['2ndLast'].isin(["Cycle", "Walk (where only mode)"]) & 
        (self.df['3rdLast'] != 'No Mode')),

        (self.df['Step_1'].isin(["Cycle", "Walk (where only mode)"]) & 
        ~self.df['2ndLast'].isin(["Cycle", "Walk (where only mode)"]) & 
        (self.df['2ndLast'] != 'No Mode')),

        (~self.df['Step_1'].isin(["Cycle", "Walk (where only mode)"]) & 
        (self.df['Step_1'] != 'No Mode'))
    ]

    choices = [
        self.df['3rdLast'],
        self.df['2ndLast'],
        self.df['Step_1']
    ]

    return np.select(conditions, choices, default="Other")

def step_3(self):
    conditions = [
        (self.df['Step_2'] == "Tube/Metro/Subway") & (self.df['Contains_Heathrow_Express'] == True)
    ]

    choices = [
        "Heathrow Express"
    ]

    return np.select(conditions, choices, default=self.df['Step_2'])

def step_4(self):
    conditions = [
        (self.df['Contains_Heathrow_Express'] == True) & (self.df['Contains_Elizabeth_Line'] == False)
    ]

    choices = [
        "Heathrow Express"
    ]

    return np.select(conditions, choices, default=self.df['Step_3'])
    
def step_5(self):
    conditions = [
        (self.df['Contains_Heathrow_Express'] == True) & 
        (self.df['Contains_Elizabeth_Line'] == True) & 
        (self.df['Terminal'] == 5),

        (self.df['Contains_Heathrow_Express'] == True) & 
        (self.df['Contains_Elizabeth_Line'] == True) & 
        (self.df['Terminal'] != 5)
    ]

    choices = [
        "Elizabeth Line",
        "Heathrow Express"
    ]

    return np.select(conditions, choices, default=self.df['Step_4'])

def step_6(self):
    conditions = [
        (self.df['Contains_Rental'] == True) & 
        ~(self.df['Contains_Heathrow_Express'] | self.df['Contains_Elizabeth_Line'] | self.df['Contains_Tube'])
    ]

    choices = [
        "Rentals"
    ]

    return np.select(conditions, choices, default=self.df['Step_5'])

def step_7(self):
    def apply_condition(row):
        columns_to_check = ['Last', '2ndLast', '3rdLast']
        preceding_modes = ["Tube/Metro/Subway", "Elizabeth Line", "TfL Rail (formerly Heathrow Connect)", "National railways", "Rail Unspecified"]
        
        railair_bus = 'RailAir Bus (Reading/Woking/Feltham)'
        
        for i, col in enumerate(columns_to_check):
            if railair_bus in str(row[col]):
                # Check preceding columns for preceding_modes
                for prev_col in columns_to_check[i+1:]:
                    if any(mode in str(row[prev_col]) for mode in preceding_modes):
                        return railair_bus
                # If we've checked all preceding columns and found no preceding_modes
                return 'Other National/Regional coach service'
        
        # If RailAir Bus is not found in any column
        return row['Step_6']
    
    return self.df.apply(apply_condition, axis=1)

def step_8(self):
    conditions = [
        (self.df['Last']=='Hotel bus') & (self.df['2ndLast']=='Charter coach')
    ]

    choices = [
        "Charter coach"
    ]

    return np.select(conditions, choices, default=self.df['Step_7'])
